Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vidscraper-cmd. #8

Merged
merged 3 commits into from
Apr 5, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions bin/vidscraper-cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python

# Copyright 2012 - Participatory Culture Foundation
#
# This file is part of vidscraper.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import sys
import vidscraper


if __name__ == "__main__":
sys.exit(vidscraper.VidscraperCommandHandler().main())
44 changes: 44 additions & 0 deletions docs/topics/vidscraper_cmd.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
.. Copyright 2012 - Participatory Culture Foundation

This file is part of vidscraper.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS`` AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Command Line
============

vidscraper comes with a command line utility allowing you to do crazy
awesome things on the command line.

Want to get the metadata for a YouTube video?

::

$ vidscraper-cmd video http://www.youtube.com/watch?v=J_DV9b0x7v4


Want just the title and embed code?

::

$ vidscraper-cmd video --fields=title,embed_code \
http://www.youtube.com/watch?v=J_DV9b0x7v4
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
url='https://github.com/pculture/vidscraper',
license='BSD',
packages=find_packages(),
scripts=['bin/vidscraper-cmd'],
install_requires=[
'lxml>=2.3.4',
'oauth2>=1.5.211',
Expand Down
91 changes: 91 additions & 0 deletions vidscraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
from optparse import OptionParser

from vidscraper.suites import registry
from vidscraper.videos import Video, VideoSearch, VideoFeed

Expand Down Expand Up @@ -110,3 +113,91 @@ def auto_search(query, fields=None, order_by=None, crawl=False,
suites[suite] = search

return suites


# fetchvideo -> auto_scrape(url, fields, api_keys)


class VidscraperCommandHandler(object):
"""Command line handler for vidscraper.

This exposes functions in this module to the command line giving
vidscraper command line utility.

Subcommands are implemented in ``handle_SUBCOMMAND`` methods. See
``handle_video`` and ``handle_help`` for examples.
"""

usage = "%prog [command] [options]"

def get_commands(self):
"""Returns a list of subcommands implemented."""
return [mem.replace("handle_", "")
for mem in dir(self)
if mem.startswith("handle_")]

def build_parser(self, usage):
"""Builds the parser with universal bits."""
parser = OptionParser(usage=usage, version=__version__)
return parser

def handle_video(self):
"""Handler for auto_scrape."""
parser = self.build_parser("%prog video [options] URL")
parser.add_option("--fields", dest="fields",
help="comma-separated list of fields to retrieve. "
"e.g. --fields=a,b,c")
parser.add_option("--apikeys", dest="api_keys",
help="api keys comma separated. "
"e.g. --apikeys=key:val,key2:val")
(options, args) = parser.parse_args()

if len(args) == 0:
parser.error("URL needed.")

if options.fields:
fields = options.fields.split(",")
else:
fields = None

if options.api_keys:
api_keys = dict(mem.split(":", 1)
for mem in options.api_keys.split(","))
else:
api_keys = None

for arg in args:
print "Scraping %s" % arg
video = auto_scrape(arg, fields=fields, api_keys=api_keys)
print video.to_json(indent=2, sort_keys=True)

return 0

def handle_help(self, error=None):
"""Handles help."""
parser = self.build_parser("%prog [command]")
parser.print_help()
if error:
print ""
print "Error: " + error
print ""
print "Commands:"
for cmd in self.get_commands():
print " %s" % cmd
return 0

def main(self):
if len(sys.argv) <= 1 or sys.argv[1] in ("-h", "--help"):
return self.handle_help()

try:
cmd = sys.argv.pop(1)
cmd = "".join(c for c in cmd if c.isalpha())
handler = getattr(self, "handle_%s" % cmd)
except AttributeError:
return self.handle_help(error='%s is not a valid command.' % cmd)

return handler()

if __name__ == "__main__":
sys.exit(VidscraperCommandHandler().main())
56 changes: 56 additions & 0 deletions vidscraper/tests/unit/test_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2012 - Participatory Culture Foundation
#
# This file is part of vidscraper.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest
import json

from vidscraper.videos import Video


class VideoTestCase(unittest.TestCase):
def test_items(self):
video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4")

# Make sure items can be iterated over and that there's one
# for every field.
for i, item in enumerate(video.items()):
self.assertEqual(item[0], Video._all_fields[i])

def test_items_with_fields(self):
fields = ['title', 'user']
video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4",
fields=fields)

# Make sure items can be iterated over and that there's one
# for every field.
for i, item in enumerate(video.items()):
self.assertEqual(item[0], fields[i])

def test_to_json(self):
video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4")

data_json = video.to_json()
# Verify that we can load the json back into Python.
json.loads(data_json)
26 changes: 25 additions & 1 deletion vidscraper/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import datetime

from vidscraper.exceptions import UnhandledURL, VideoDeleted
from vidscraper.utils.search import (search_string_from_terms,
Expand Down Expand Up @@ -155,6 +157,28 @@ def _apply(self, data):
def is_loaded(self):
return self._loaded

def items(self):
"""Iterator over (field, value) for requested fields."""
for mem in self.fields:
yield (mem, getattr(self, mem))

def to_json(self, **kw):
"""Returns the video JSON-ified.

Takes keyword arguments and passes them to json.dumps().

Example:

>>> v.to_json(indent=2, sort_keys=True)
"""
def json_dump_helper(obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
raise TypeError("%r is not serializable" % (obj,))

kw['default'] = json_dump_helper
return json.dumps(dict(self.items()), **kw)


class BaseVideoIterator(object):
"""
Expand Down Expand Up @@ -457,4 +481,4 @@ def get_item_data(self, item):
return self.suite.parse_search_result(self, item)

def get_next_url(self, response):
return self.suite.get_next_search_page_url(self, response)
return self.suite.get_next_search_page_url(self, response)