Permalink
Browse files

download support for Vimeo

  • Loading branch information...
paulswartz committed Nov 29, 2011
1 parent dccb719 commit 3e78bbcb354a0870959e3f94d9be03acbd100e1b
Showing with 165 additions and 7 deletions.
  1. +44 −0 vidscraper/suites/vimeo.py
  2. +92 −0 vidscraper/tests/data/vimeo/scrape.xml
  3. +29 −7 vidscraper/tests/unit/test_vimeo.py
View
@@ -23,7 +23,9 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import time
from datetime import datetime
from xml.dom import minidom
import re
import urllib
import urllib2
@@ -37,6 +39,7 @@
from vidscraper.compat import json
from vidscraper.suites import BaseSuite, registry
from vidscraper.utils.feedparser import struct_time_to_datetime
class VimeoSuite(BaseSuite):
"""
@@ -54,6 +57,9 @@ class VimeoSuite(BaseSuite):
api_fields = set(['link', 'title', 'description', 'tags', 'guid',
'publish_datetime', 'thumbnail_url', 'user', 'user_url',
'flash_enclosure_url', 'embed_code'])
scrape_fields = set(['link', 'title', 'user', 'user_url', 'thumbnail_url',
'embed_code', 'file_url', 'file_url_mimetype',
'file_url_expires', 'file_url_is_flaky'])
oembed_endpoint = u"http://vimeo.com/api/oembed.json"
def _embed_code_from_id(self, video_id):
@@ -96,6 +102,44 @@ def _data_from_api_video(self, video):
}
return data
def get_scrape_url(self, video):
video_id = self.video_regex.match(video.url).group('video_id')
return u"http://www.vimeo.com/moogaloop/load/clip:%s" % video_id
def parse_scrape_response(self, response_text):
doc = minidom.parseString(response_text)
xml_data = {}
for key in ('url', 'caption', 'thumbnail', 'uploader_url',
'uploader_display_name', 'isHD', 'embed_code',
'request_signature', 'request_signature_expires',
'nodeId'):
xml_data[key] = doc.getElementsByTagName(
key).item(0).firstChild.data.decode('utf8')
data = {
'link': xml_data['url'],
'user': xml_data['uploader_display_name'],
'user_url': xml_data['uploader_url'],
'title': xml_data['caption'],
'thumbnail_url': xml_data['thumbnail'],
'embed_code': xml_data['embed_code'],
'file_url_is_flaky': True,
'file_url_expires': struct_time_to_datetime(time.gmtime(
int(xml_data['request_signature_expires']))),
'file_url_mimetype': u'video/x-flv',
}
base_file_url = (
'http://www.vimeo.com/moogaloop/play/clip:%(nodeId)s/'
'%(request_signature)s/%(request_signature_expires)s'
'/?q=' % xml_data)
if xml_data['isHD'] == '1':
data['file_url'] = base_file_url + 'hd'
else:
data['file_url'] = base_file_url + 'sd'
return data
def _get_user_api_url(self, user, type):
return 'http://vimeo.com/api/v2/%s/%s.json' % (user, type)
@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="utf-8"?>
<xml>
<timestamp>1322593900</timestamp>
<locked>0</locked>
<video>
<caption>Good morning, universe</caption>
<width>320</width>
<height>240</height>
<duration>29</duration>
<thumbnail>http://b.vimeocdn.com/ts/228/979/22897998_640.jpg</thumbnail>
<totalComments>47</totalComments>
<totalLikes>68</totalLikes>
<totalPlays>25170</totalPlays>
<url_clean>http://vimeo.com/2</url_clean>
<url>http://vimeo.com/2</url>
<uploader_url>http://vimeo.com/jakob</uploader_url>
<uploader_portrait>http://b.vimeocdn.com/ps/137/734/1377340_75.jpg</uploader_portrait>
<uploader_display_name>Jake Lodwick</uploader_display_name>
<nodeId>2</nodeId>
<isHD>0</isHD>
<privacy>anybody</privacy>
<isPrivate>0</isPrivate>
<isPassword>0</isPassword>
<isNobody>0</isNobody>
<embed_code>&lt;object width=&quot;400&quot; height=&quot;300&quot;&gt;&lt;param name=&quot;allowfullscreen&quot; value=&quot;true&quot; /&gt;&lt;param name=&quot;allowscriptaccess&quot; value=&quot;always&quot; /&gt;&lt;param name=&quot;movie&quot; value=&quot;http://vimeo.com/moogaloop.swf?clip_id=2&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=1&amp;amp;color=00adef&amp;amp;fullscreen=1&amp;amp;autoplay=0&amp;amp;loop=0&quot; /&gt;&lt;embed src=&quot;http://vimeo.com/moogaloop.swf?clip_id=2&amp;amp;server=vimeo.com&amp;amp;show_title=1&amp;amp;show_byline=1&amp;amp;show_portrait=1&amp;amp;color=00adef&amp;amp;fullscreen=1&amp;amp;autoplay=0&amp;amp;loop=0&quot; type=&quot;application/x-shockwave-flash&quot; allowfullscreen=&quot;true&quot; allowscriptaccess=&quot;always&quot; width=&quot;400&quot; height=&quot;300&quot;&gt;&lt;/embed&gt;&lt;/object&gt;&lt;p&gt;&lt;a href=&quot;http://vimeo.com/2&quot;&gt;Good morning, universe&lt;/a&gt; from &lt;a href=&quot;http://vimeo.com/jakob&quot;&gt;Jake Lodwick&lt;/a&gt; on &lt;a href=&quot;http://vimeo.com&quot;&gt;Vimeo&lt;/a&gt;.&lt;/p&gt;</embed_code>
<isLoggedIn>1</isLoggedIn>
<isOwner>0</isOwner>
<likeIt>0</likeIt>
<watch_later>0</watch_later>
<default_volume>48</default_volume>
<hd_preference>1</hd_preference>
<scaling_preference>1</scaling_preference>
<embed_permission>public</embed_permission>
<allow_hd_embed />
</video>
<stream_clips>
<video>
<caption>The green balloon</caption>
<thumbnail>http://b.vimeocdn.com/ts/455/416/45541667_100.jpg</thumbnail>
<url>http://vimeo.com/8</url>
<nodeId>8</nodeId>
</video>
<video>
<caption>We need more employees.</caption>
<thumbnail>http://b.vimeocdn.com/ts/455/406/45540662_100.jpg</thumbnail>
<url>http://vimeo.com/6</url>
<nodeId>6</nodeId>
</video>
<video>
<caption>Good morning, universe</caption>
<thumbnail>http://b.vimeocdn.com/ts/228/979/22897998_100.jpg</thumbnail>
<url>http://vimeo.com/2</url>
<nodeId>2</nodeId>
</video>
</stream_clips>
<source>cache</source>
<embed_settings>
<byline_badge>1</byline_badge>
<like_button>1</like_button>
<watch_later_button>1</watch_later_button>
<share_button>1</share_button>
<embed_button>1</embed_button>
<playbar>1</playbar>
<volume>1</volume>
<fullscreen_button>1</fullscreen_button>
<scaling_button>1</scaling_button>
<vimeo_logo>1</vimeo_logo>
<custom_logo>0</custom_logo>
<sticky_custom_logo>0</sticky_custom_logo>
<autoplay>0</autoplay>
<loop>0</loop>
<color>00adef</color>
<outro>text</outro>
<outro_videos_type>uploaded_videos</outro_videos_type>
<outro_videos_id />
<outro_link_url />
<outro_link_name />
<outro_text />
<outro_clip_ids />
<stream_clips>
<video />
</stream_clips>
<clipinfo_title>1</clipinfo_title>
<clipinfo_portrait>1</clipinfo_portrait>
<clipinfo_byline>1</clipinfo_byline>
</embed_settings>
<referrer />
<cached_timestamp>1322593278</cached_timestamp>
<is_mod>0</is_mod>
<request_signature>e82cb5d075e82a8cd790a1710e8b1d2f</request_signature>
<request_signature_expires>1322593900</request_signature_expires>
</xml>
@@ -35,6 +35,8 @@
class VimeoTestCase(unittest.TestCase):
def setUp(self):
self.suite = VimeoSuite()
self.base_url = "http://vimeo.com/2"
self.video = self.suite.get_video(self.base_url)
@property
def data_file_dir(self):
@@ -44,13 +46,7 @@ def data_file_dir(self):
self._data_file_dir = os.path.join(test_dir, 'data', 'vimeo')
return self._data_file_dir
class VimeoApiTestCase(VimeoTestCase):
def setUp(self):
VimeoTestCase.setUp(self)
self.base_url = "http://vimeo.com/2"
self.video = self.suite.get_video(self.base_url)
class VimeoOembedTestCase(VimeoTestCase):
def test_get_oembed_url(self):
url = self.suite.get_oembed_url(self.video)
self.assertEqual(url, "http://vimeo.com/api/oembed.json?url=http%3A%2F%2Fvimeo.com%2F2")
@@ -73,6 +69,8 @@ def test_parse_oembed_response(self):
self.assertTrue(key in data)
self.assertEqual(data[key], expected_data[key])
class VimeoApiTestCase(VimeoTestCase):
def test_get_api_url(self):
api_url = self.suite.get_api_url(self.video)
self.assertEqual(api_url, 'http://vimeo.com/api/v2/video/2.json')
@@ -100,6 +98,30 @@ def test_parse_api_response(self):
}
self.assertEqual(data, expected_data)
class VimeoScrapeTestCase(VimeoTestCase):
def get_scrape_url(self):
scrape_url = self.suite.get_scrape_url(self.video)
self.assertEqual(scrape_url, 'http://vimeo.com/moogaloop/load/clip:2')
def test_parse_scrape_response(self):
scrape_file = open(os.path.join(self.data_file_dir, 'scrape.xml'))
data = self.suite.parse_scrape_response(scrape_file.read())
self.assertTrue(isinstance(data, dict))
self.assertEqual(set(data), self.suite.scrape_fields)
expected_data = {
'title': u'Good morning, universe',
'thumbnail_url': u'http://b.vimeocdn.com/ts/228/979/22897998_640.jpg',
'link': u'http://vimeo.com/2',
'user': u'Jake Lodwick',
'user_url': u'http://vimeo.com/jakob',
'embed_code': '<object width="400" height="300"><param name="allowfullscreen" value="true" /><param name="allowscriptaccess" value="always" /><param name="movie" value="http://vimeo.com/moogaloop.swf?clip_id=2&amp;server=vimeo.com&amp;show_title=1&amp;show_byline=1&amp;show_portrait=1&amp;color=00adef&amp;fullscreen=1&amp;autoplay=0&amp;loop=0" /><embed src="http://vimeo.com/moogaloop.swf?clip_id=2&amp;server=vimeo.com&amp;show_title=1&amp;show_byline=1&amp;show_portrait=1&amp;color=00adef&amp;fullscreen=1&amp;autoplay=0&amp;loop=0" type="application/x-shockwave-flash" allowfullscreen="true" allowscriptaccess="always" width="400" height="300"></embed></object><p><a href="http://vimeo.com/2">Good morning, universe</a> from <a href="http://vimeo.com/jakob">Jake Lodwick</a> on <a href="http://vimeo.com">Vimeo</a>.</p>',
'file_url_expires': datetime.datetime(2011, 11, 29, 19, 11, 40),
'file_url_is_flaky': True,
'file_url_mimetype': u'video/x-flv',
'file_url': 'http://www.vimeo.com/moogaloop/play/clip:2/e82cb5d075e82a8cd790a1710e8b1d2f/1322593900/?q=sd'
}
for key in data:
self.assertEqual(data[key], expected_data[key])
class VimeoFeedTestCase(VimeoTestCase):
def setUp(self):

0 comments on commit 3e78bbc

Please sign in to comment.