Skip to content
This repository
Browse code

Completely rewritten

Refactored.
Now downloads videos. Saves videos with meaningful names in hierarchical
folders.
Uses youtube_dl code to download videos.
  • Loading branch information...
commit fd0f89f54edca2c2758dee3dce930b87e509366f 1 parent 24b4b4a
Alexander Bessonov authored October 25, 2012
7  README.md
Source Rendered
... ...
@@ -1,9 +1,10 @@
1  
-##Generate list of course videos from eudcation.10gen.com.
  1
+##Download course videos from eudcation.10gen.com.
2 2
 
3 3
 File `config.py` should be populated with login/password.
4 4
 
5  
-After completion, the script will create text file with youtube links, named after the course.
6  
-This file could then be used to download videos with [youtube-dl](https://github.com/rg3/youtube-dl/).
  5
+This script uses code from [youtube-dl](https://github.com/rg3/youtube-dl/) project to download videos.
  6
+
  7
+Script will skip already downloaded videos, although it will look for video links on 10gen's site.
7 8
 
8 9
 ###Dependencies:
9 10
 * Python 2.7
137  edu_10gen.py
@@ -11,6 +11,11 @@
11 11
 
12 12
 from urllib import urlencode
13 13
 
  14
+YDL_PARAMS_FILE = 'ydl_params.json'
  15
+from youtube_dl.FileDownloader import FileDownloader
  16
+from youtube_dl.InfoExtractors  import YoutubeIE
  17
+from youtube_dl.utils import sanitize_filename
  18
+
14 19
 try:
15 20
     from bs4 import BeautifulSoup
16 21
     import mechanize
@@ -30,7 +35,7 @@
30 35
 except ImportError:
31 36
     TARGETDIR = ''
32 37
 
33  
-site_url = 'https://education.10gen.com'
  38
+SITE_URL = 'https://education.10gen.com'
34 39
 login_url = '/login'
35 40
 dashboard_url = '/dashboard'
36 41
 youtube_url = 'http://www.youtube.com/watch?v='
@@ -61,63 +66,93 @@ def csrfCookie(csrftoken):
61 66
             comment=None, comment_url=None,
62 67
             rest={'HttpOnly': None}, rfc2109=False)
63 68
 
64  
-
65  
-br = mechanize.Browser()
66  
-cj = mechanize.LWPCookieJar()
67  
-csrftoken = makeCsrf()
68  
-cj.set_cookie(csrfCookie(csrftoken))
69  
-br.set_handle_robots(False)
70  
-br.set_cookiejar(cj)
71  
-br.addheaders.append(('X-CSRFToken',csrftoken))
72  
-br.addheaders.append(('Referer','https://education.10gen.com'))
73  
-try:
74  
-    login_resp = br.open(site_url + login_url, urlencode({'email':EMAIL, 'password':PASSWORD}))
75  
-except mechanize.HTTPError, e:
76  
-    print "Unexpected error:", e.code
77  
-    exit()
78  
-login_state = json.loads(login_resp.read())
79  
-
80  
-if not login_state.get('success'):
81  
-    print login_state.get('value')
82  
-    exit()
83  
-
84  
-dashboard = br.open(site_url + dashboard_url)
85  
-dashboard_soup = BeautifulSoup(dashboard.read())
86  
-username = dashboard_soup.find('section', 'user-info').findAll('span')[1].text
87  
-print 'Logged as %s\n\n' % username
88  
-
89  
-my_courses = dashboard_soup.findAll('article', 'my-course')
90  
-for my_course in my_courses:
91  
-    course_url = my_course.a['href']
92  
-    course_name = my_course.h3.text
93  
-    f = open(course_name + '.txt', 'w')
94  
-    print '%s' % course_name
95  
-    courseware_url = re.sub(r'\/info$','/courseware',course_url)
96  
-    courseware = br.open(site_url+courseware_url)
97  
-    courseware_soup = BeautifulSoup(courseware.read())
98  
-    chapters = courseware_soup.findAll('div','chapter')
99  
-    for chapter in chapters:
100  
-        chapter_title = chapter.find('h3').find('a').text
101  
-        print '\t%s' % chapter_title
102  
-        paragraphs = chapter.find('ul').findAll('li')
103  
-        for paragraph in paragraphs:
104  
-            par_name = paragraph.p.text
105  
-            par_url = paragraph.a['href']
106  
-            par = br.open(site_url + par_url)
  69
+class TenGenBrowser(object):
  70
+    def __init__(self):
  71
+        self._br = mechanize.Browser()
  72
+        self._cj = mechanize.LWPCookieJar()
  73
+        csrftoken = makeCsrf()
  74
+        self._cj.set_cookie(csrfCookie(csrftoken))
  75
+        self._br.set_handle_robots(False)
  76
+        self._br.set_cookiejar(self._cj)
  77
+        self._br.addheaders.append(('X-CSRFToken',csrftoken))
  78
+        self._br.addheaders.append(('Referer',SITE_URL))
  79
+        self._logged_in = False
  80
+        with open(YDL_PARAMS_FILE) as fydl:
  81
+            self._fd = FileDownloader(json.load(fydl))
  82
+            self._fd.add_info_extractor(YoutubeIE())
  83
+    def login(self, email, password):
  84
+        try:
  85
+            login_resp = self._br.open(SITE_URL + login_url, urlencode({'email':email, 'password':password}))
  86
+            login_state = json.loads(login_resp.read())
  87
+            self._logged_in = login_state.get('success')
  88
+            if not self._logged_in:
  89
+                print login_state.get('value')
  90
+            return self._logged_in
  91
+        except mechanize.HTTPError, e:
  92
+            sys.exit('Can\'t sign in')
  93
+    def list_courses(self):
  94
+        self.courses = []
  95
+        if self._logged_in:
  96
+            dashboard = self._br.open(SITE_URL + dashboard_url)
  97
+            dashboard_soup = BeautifulSoup(dashboard.read())
  98
+            my_courses = dashboard_soup.findAll('article', 'my-course')
  99
+            i = 0
  100
+            for my_course in my_courses:
  101
+                i += 1
  102
+                course_url = my_course.a['href']
  103
+                courseware_url = re.sub(r'\/info$','/courseware',course_url)
  104
+                course_name = my_course.h3.text
  105
+                self.courses.append({'name':course_name, 'url':courseware_url})
  106
+                print '[%02i] %s' % (i, course_name)
  107
+    def list_chapters(self, course_i):
  108
+        self.paragraphs = []
  109
+        if course_i <= len(self.courses) and course_i >= 0:
  110
+            course = self.courses[course_i - 1]
  111
+            course_name = course['name']
  112
+            courseware = self._br.open(SITE_URL+course['url'])
  113
+            courseware_soup = BeautifulSoup(courseware.read())
  114
+            chapters = courseware_soup.findAll('div','chapter')
  115
+            i = 0
  116
+            for chapter in chapters:
  117
+                i += 1
  118
+                chapter_name = chapter.find('h3').find('a').text
  119
+                print '\t[%02i] %s' % (i, chapter_name)
  120
+                paragraphs = chapter.find('ul').findAll('li')
  121
+                j = 0
  122
+                for paragraph in paragraphs:
  123
+                    j += 1
  124
+                    par_name = paragraph.p.text
  125
+                    par_url = paragraph.a['href']
  126
+                    self.paragraphs.append((course_name, i, chapter_name, par_name, par_url))
  127
+                    print '\t[%02i.%02i] %s' % (i, j, par_name)
  128
+    def download(self):
  129
+        j = 0
  130
+        for (cn, i, chn, pn, url) in self.paragraphs:
  131
+            j += 1
  132
+            par = self._br.open(SITE_URL + url)
107 133
             par_soup = BeautifulSoup(par.read())
108 134
             contents = par_soup.findAll('div','seq_contents')
109  
-            par_part = 0
  135
+            k = 0
110 136
             for content in contents:
111 137
                 content_soup = BeautifulSoup(content.text)
112 138
                 try:
  139
+                    video_type = content_soup.h2.text.strip()
113 140
                     video_stream = content_soup.find('div','video')['data-streams']
114 141
                     video_id = video_stream.split(':')[1]
115 142
                     video_url = youtube_url + video_id
116  
-                    video_type = content_soup.h2.text.strip()
117  
-                    par_part += 1
118  
-                    print '\t\t%s - %i-%s: %s' % (par_name, par_part, video_type, video_url)
119  
-                    f.writelines(video_url+'\n')
  143
+                    k += 1
  144
+                    print '[%02i.%02i.%i] %s (%s)' % (i, j, k, pn, video_type)
  145
+                    #f.writelines(video_url+'\n')
  146
+                    outtmpl = sanitize_filename(cn) + '\\' + sanitize_filename(chn) + '\\' + '%02i.%02i.%i ' % (i,j,k) + sanitize_filename('%s (%s)' % (pn, video_type)) + '.%(ext)s'
  147
+                    self._fd.params['outtmpl'] = outtmpl
  148
+                    self._fd.download([video_url])
120 149
                 except:
121 150
                     pass
122  
-    f.close()
123  
-    print '\nYou can now downlaod lecture videos with the following command:\n    youtube-dl -a "%s.txt" -A -t\n' % course_name
  151
+
  152
+
  153
+tgb = TenGenBrowser()
  154
+tgb.login(EMAIL, PASSWORD)
  155
+tgb.list_courses()
  156
+for c in range(0,len(tgb.courses)):
  157
+    tgb.list_chapters(c)
  158
+    tgb.download()
1  ydl_params.json
... ...
@@ -0,0 +1 @@
  1
+{"username": null, "listformats": null, "skip_download": false, "usenetrc": false, "max_downloads": null, "noprogress": false, "forcethumbnail": false, "forceformat": false, "format_limit": null, "ratelimit": null, "nooverwrites": true, "forceurl": false, "writeinfojson": false, "simulate": false, "playliststart": 1, "continuedl": true, "password": null, "prefer_free_formats": false, "nopart": false, "retries": 10, "updatetime": true, "consoletitle": false, "verbose": true, "forcefilename": false, "ignoreerrors": false, "logtostderr": false, "format": null, "subtitleslang": null, "quiet": false, "outtmpl": "%(id)s.%(ext)s", "rejecttitle": null, "playlistend": -1, "writedescription": false, "forcetitle": false, "forcedescription": false, "writesubtitles": false, "matchtitle": null}
689  youtube_dl/FileDownloader.py
... ...
@@ -0,0 +1,689 @@
  1
+#!/usr/bin/env python
  2
+# -*- coding: utf-8 -*-
  3
+
  4
+import httplib
  5
+import math
  6
+import os
  7
+import re
  8
+import socket
  9
+import subprocess
  10
+import sys
  11
+import time
  12
+import urllib2
  13
+
  14
+if os.name == 'nt':
  15
+	import ctypes
  16
+
  17
+from utils import *
  18
+
  19
+
  20
+class FileDownloader(object):
  21
+	"""File Downloader class.
  22
+
  23
+	File downloader objects are the ones responsible of downloading the
  24
+	actual video file and writing it to disk if the user has requested
  25
+	it, among some other tasks. In most cases there should be one per
  26
+	program. As, given a video URL, the downloader doesn't know how to
  27
+	extract all the needed information, task that InfoExtractors do, it
  28
+	has to pass the URL to one of them.
  29
+
  30
+	For this, file downloader objects have a method that allows
  31
+	InfoExtractors to be registered in a given order. When it is passed
  32
+	a URL, the file downloader handles it to the first InfoExtractor it
  33
+	finds that reports being able to handle it. The InfoExtractor extracts
  34
+	all the information about the video or videos the URL refers to, and
  35
+	asks the FileDownloader to process the video information, possibly
  36
+	downloading the video.
  37
+
  38
+	File downloaders accept a lot of parameters. In order not to saturate
  39
+	the object constructor with arguments, it receives a dictionary of
  40
+	options instead. These options are available through the params
  41
+	attribute for the InfoExtractors to use. The FileDownloader also
  42
+	registers itself as the downloader in charge for the InfoExtractors
  43
+	that are added to it, so this is a "mutual registration".
  44
+
  45
+	Available options:
  46
+
  47
+	username:         Username for authentication purposes.
  48
+	password:         Password for authentication purposes.
  49
+	usenetrc:         Use netrc for authentication instead.
  50
+	quiet:            Do not print messages to stdout.
  51
+	forceurl:         Force printing final URL.
  52
+	forcetitle:       Force printing title.
  53
+	forcethumbnail:   Force printing thumbnail URL.
  54
+	forcedescription: Force printing description.
  55
+	forcefilename:    Force printing final filename.
  56
+	simulate:         Do not download the video files.
  57
+	format:           Video format code.
  58
+	format_limit:     Highest quality format to try.
  59
+	outtmpl:          Template for output names.
  60
+	ignoreerrors:     Do not stop on download errors.
  61
+	ratelimit:        Download speed limit, in bytes/sec.
  62
+	nooverwrites:     Prevent overwriting files.
  63
+	retries:          Number of times to retry for HTTP error 5xx
  64
+	continuedl:       Try to continue downloads if possible.
  65
+	noprogress:       Do not print the progress bar.
  66
+	playliststart:    Playlist item to start at.
  67
+	playlistend:      Playlist item to end at.
  68
+	matchtitle:       Download only matching titles.
  69
+	rejecttitle:      Reject downloads for matching titles.
  70
+	logtostderr:      Log messages to stderr instead of stdout.
  71
+	consoletitle:     Display progress in console window's titlebar.
  72
+	nopart:           Do not use temporary .part files.
  73
+	updatetime:       Use the Last-modified header to set output file timestamps.
  74
+	writedescription: Write the video description to a .description file
  75
+	writeinfojson:    Write the video description to a .info.json file
  76
+	writesubtitles:   Write the video subtitles to a .srt file
  77
+	subtitleslang:    Language of the subtitles to download
  78
+	"""
  79
+
  80
+	params = None
  81
+	_ies = []
  82
+	_pps = []
  83
+	_download_retcode = None
  84
+	_num_downloads = None
  85
+	_screen_file = None
  86
+
  87
+	def __init__(self, params):
  88
+		"""Create a FileDownloader object with the given options."""
  89
+		self._ies = []
  90
+		self._pps = []
  91
+		self._download_retcode = 0
  92
+		self._num_downloads = 0
  93
+		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  94
+		self.params = params
  95
+
  96
+	@staticmethod
  97
+	def format_bytes(bytes):
  98
+		if bytes is None:
  99
+			return 'N/A'
  100
+		if type(bytes) is str:
  101
+			bytes = float(bytes)
  102
+		if bytes == 0.0:
  103
+			exponent = 0
  104
+		else:
  105
+			exponent = long(math.log(bytes, 1024.0))
  106
+		suffix = 'bkMGTPEZY'[exponent]
  107
+		converted = float(bytes) / float(1024 ** exponent)
  108
+		return '%.2f%s' % (converted, suffix)
  109
+
  110
+	@staticmethod
  111
+	def calc_percent(byte_counter, data_len):
  112
+		if data_len is None:
  113
+			return '---.-%'
  114
+		return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
  115
+
  116
+	@staticmethod
  117
+	def calc_eta(start, now, total, current):
  118
+		if total is None:
  119
+			return '--:--'
  120
+		dif = now - start
  121
+		if current == 0 or dif < 0.001: # One millisecond
  122
+			return '--:--'
  123
+		rate = float(current) / dif
  124
+		eta = long((float(total) - float(current)) / rate)
  125
+		(eta_mins, eta_secs) = divmod(eta, 60)
  126
+		if eta_mins > 99:
  127
+			return '--:--'
  128
+		return '%02d:%02d' % (eta_mins, eta_secs)
  129
+
  130
+	@staticmethod
  131
+	def calc_speed(start, now, bytes):
  132
+		dif = now - start
  133
+		if bytes == 0 or dif < 0.001: # One millisecond
  134
+			return '%10s' % '---b/s'
  135
+		return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
  136
+
  137
+	@staticmethod
  138
+	def best_block_size(elapsed_time, bytes):
  139
+		new_min = max(bytes / 2.0, 1.0)
  140
+		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  141
+		if elapsed_time < 0.001:
  142
+			return long(new_max)
  143
+		rate = bytes / elapsed_time
  144
+		if rate > new_max:
  145
+			return long(new_max)
  146
+		if rate < new_min:
  147
+			return long(new_min)
  148
+		return long(rate)
  149
+
  150
+	@staticmethod
  151
+	def parse_bytes(bytestr):
  152
+		"""Parse a string indicating a byte quantity into a long integer."""
  153
+		matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  154
+		if matchobj is None:
  155
+			return None
  156
+		number = float(matchobj.group(1))
  157
+		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  158
+		return long(round(number * multiplier))
  159
+
  160
+	def add_info_extractor(self, ie):
  161
+		"""Add an InfoExtractor object to the end of the list."""
  162
+		self._ies.append(ie)
  163
+		ie.set_downloader(self)
  164
+
  165
+	def add_post_processor(self, pp):
  166
+		"""Add a PostProcessor object to the end of the chain."""
  167
+		self._pps.append(pp)
  168
+		pp.set_downloader(self)
  169
+
  170
+	def to_screen(self, message, skip_eol=False):
  171
+		"""Print message to stdout if not in quiet mode."""
  172
+		assert type(message) == type(u'')
  173
+		if not self.params.get('quiet', False):
  174
+			terminator = [u'\n', u''][skip_eol]
  175
+			output = message + terminator
  176
+			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
  177
+				output = output.encode(preferredencoding(), 'ignore')
  178
+			self._screen_file.write(output)
  179
+			self._screen_file.flush()
  180
+
  181
+	def to_stderr(self, message):
  182
+		"""Print message to stderr."""
  183
+		print >>sys.stderr, message.encode(preferredencoding())
  184
+
  185
+	def to_cons_title(self, message):
  186
+		"""Set console/terminal window title to message."""
  187
+		if not self.params.get('consoletitle', False):
  188
+			return
  189
+		if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
  190
+			# c_wchar_p() might not be necessary if `message` is
  191
+			# already of type unicode()
  192
+			ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  193
+		elif 'TERM' in os.environ:
  194
+			sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
  195
+
  196
+	def fixed_template(self):
  197
+		"""Checks if the output template is fixed."""
  198
+		return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
  199
+
  200
+	def trouble(self, message=None):
  201
+		"""Determine action to take when a download problem appears.
  202
+
  203
+		Depending on if the downloader has been configured to ignore
  204
+		download errors or not, this method may throw an exception or
  205
+		not when errors are found, after printing the message.
  206
+		"""
  207
+		if message is not None:
  208
+			self.to_stderr(message)
  209
+		if not self.params.get('ignoreerrors', False):
  210
+			raise DownloadError(message)
  211
+		self._download_retcode = 1
  212
+
  213
+	def slow_down(self, start_time, byte_counter):
  214
+		"""Sleep if the download speed is over the rate limit."""
  215
+		rate_limit = self.params.get('ratelimit', None)
  216
+		if rate_limit is None or byte_counter == 0:
  217
+			return
  218
+		now = time.time()
  219
+		elapsed = now - start_time
  220
+		if elapsed <= 0.0:
  221
+			return
  222
+		speed = float(byte_counter) / elapsed
  223
+		if speed > rate_limit:
  224
+			time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
  225
+
  226
+	def temp_name(self, filename):
  227
+		"""Returns a temporary filename for the given filename."""
  228
+		if self.params.get('nopart', False) or filename == u'-' or \
  229
+				(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  230
+			return filename
  231
+		return filename + u'.part'
  232
+
  233
+	def undo_temp_name(self, filename):
  234
+		if filename.endswith(u'.part'):
  235
+			return filename[:-len(u'.part')]
  236
+		return filename
  237
+
  238
+	def try_rename(self, old_filename, new_filename):
  239
+		try:
  240
+			if old_filename == new_filename:
  241
+				return
  242
+			os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  243
+		except (IOError, OSError), err:
  244
+			self.trouble(u'ERROR: unable to rename file')
  245
+
  246
+	def try_utime(self, filename, last_modified_hdr):
  247
+		"""Try to set the last-modified time of the given file."""
  248
+		if last_modified_hdr is None:
  249
+			return
  250
+		if not os.path.isfile(encodeFilename(filename)):
  251
+			return
  252
+		timestr = last_modified_hdr
  253
+		if timestr is None:
  254
+			return
  255
+		filetime = timeconvert(timestr)
  256
+		if filetime is None:
  257
+			return filetime
  258
+		try:
  259
+			os.utime(filename, (time.time(), filetime))
  260
+		except:
  261
+			pass
  262
+		return filetime
  263
+
  264
+	def report_writedescription(self, descfn):
  265
+		""" Report that the description file is being written """
  266
+		self.to_screen(u'[info] Writing video description to: ' + descfn)
  267
+
  268
+	def report_writesubtitles(self, srtfn):
  269
+		""" Report that the subtitles file is being written """
  270
+		self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
  271
+
  272
+	def report_writeinfojson(self, infofn):
  273
+		""" Report that the metadata file has been written """
  274
+		self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
  275
+
  276
+	def report_destination(self, filename):
  277
+		"""Report destination filename."""
  278
+		self.to_screen(u'[download] Destination: ' + filename)
  279
+
  280
+	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
  281
+		"""Report download progress."""
  282
+		if self.params.get('noprogress', False):
  283
+			return
  284
+		self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
  285
+				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
  286
+		self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
  287
+				(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  288
+
  289
+	def report_resuming_byte(self, resume_len):
  290
+		"""Report attempt to resume at given byte."""
  291
+		self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
  292
+
  293
+	def report_retry(self, count, retries):
  294
+		"""Report retry in case of HTTP error 5xx"""
  295
+		self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
  296
+
  297
+	def report_file_already_downloaded(self, file_name):
  298
+		"""Report file has already been fully downloaded."""
  299
+		try:
  300
+			self.to_screen(u'[download] %s has already been downloaded' % file_name)
  301
+		except (UnicodeEncodeError), err:
  302
+			self.to_screen(u'[download] The file has already been downloaded')
  303
+
  304
+	def report_unable_to_resume(self):
  305
+		"""Report it was impossible to resume download."""
  306
+		self.to_screen(u'[download] Unable to resume')
  307
+
  308
+	def report_finish(self):
  309
+		"""Report download finished."""
  310
+		if self.params.get('noprogress', False):
  311
+			self.to_screen(u'[download] Download completed')
  312
+		else:
  313
+			self.to_screen(u'')
  314
+
  315
+	def increment_downloads(self):
  316
+		"""Increment the ordinal that assigns a number to each file."""
  317
+		self._num_downloads += 1
  318
+
  319
+	def prepare_filename(self, info_dict):
  320
+		"""Generate the output filename."""
  321
+		try:
  322
+			template_dict = dict(info_dict)
  323
+			template_dict['epoch'] = unicode(long(time.time()))
  324
+			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
  325
+			filename = self.params['outtmpl'] % template_dict
  326
+			return filename
  327
+		except (ValueError, KeyError), err:
  328
+			self.trouble(u'ERROR: invalid system charset or erroneous output template')
  329
+			return None
  330
+
  331
+	def _match_entry(self, info_dict):
  332
+		""" Returns None iff the file should be downloaded """
  333
+
  334
+		title = info_dict['title']
  335
+		matchtitle = self.params.get('matchtitle', False)
  336
+		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
  337
+			return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
  338
+		rejecttitle = self.params.get('rejecttitle', False)
  339
+		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
  340
+			return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  341
+		return None
  342
+
  343
+	def process_info(self, info_dict):
  344
+		"""Process a single dictionary returned by an InfoExtractor."""
  345
+
  346
+		info_dict['stitle'] = sanitize_filename(info_dict['title'])
  347
+
  348
+		reason = self._match_entry(info_dict)
  349
+		if reason is not None:
  350
+			self.to_screen(u'[download] ' + reason)
  351
+			return
  352
+
  353
+		max_downloads = self.params.get('max_downloads')
  354
+		if max_downloads is not None:
  355
+			if self._num_downloads > int(max_downloads):
  356
+				raise MaxDownloadsReached()
  357
+
  358
+		filename = self.prepare_filename(info_dict)
  359
+
  360
+		# Forced printings
  361
+		if self.params.get('forcetitle', False):
  362
+			print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
  363
+		if self.params.get('forceurl', False):
  364
+			print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
  365
+		if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
  366
+			print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
  367
+		if self.params.get('forcedescription', False) and 'description' in info_dict:
  368
+			print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
  369
+		if self.params.get('forcefilename', False) and filename is not None:
  370
+			print filename.encode(preferredencoding(), 'xmlcharrefreplace')
  371
+		if self.params.get('forceformat', False):
  372
+			print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
  373
+
  374
+		# Do nothing else if in simulate mode
  375
+		if self.params.get('simulate', False):
  376
+			return
  377
+
  378
+		if filename is None:
  379
+			return
  380
+
  381
+		try:
  382
+			dn = os.path.dirname(encodeFilename(filename))
  383
+			if dn != '' and not os.path.exists(dn): # dn is already encoded
  384
+				os.makedirs(dn)
  385
+		except (OSError, IOError), err:
  386
+			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
  387
+			return
  388
+
  389
+		if self.params.get('writedescription', False):
  390
+			try:
  391
+				descfn = filename + u'.description'
  392
+				self.report_writedescription(descfn)
  393
+				descfile = open(encodeFilename(descfn), 'wb')
  394
+				try:
  395
+					descfile.write(info_dict['description'].encode('utf-8'))
  396
+				finally:
  397
+					descfile.close()
  398
+			except (OSError, IOError):
  399
+				self.trouble(u'ERROR: Cannot write description file ' + descfn)
  400
+				return
  401
+
  402
+		if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
  403
+			# subtitles download errors are already managed as troubles in relevant IE
  404
+			# that way it will silently go on when used with unsupporting IE
  405
+			try:
  406
+				srtfn = filename.rsplit('.', 1)[0] + u'.srt'
  407
+				self.report_writesubtitles(srtfn)
  408
+				srtfile = open(encodeFilename(srtfn), 'wb')
  409
+				try:
  410
+					srtfile.write(info_dict['subtitles'].encode('utf-8'))
  411
+				finally:
  412
+					srtfile.close()
  413
+			except (OSError, IOError):
  414
+				self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
  415
+				return
  416
+
  417
+		if self.params.get('writeinfojson', False):
  418
+			infofn = filename + u'.info.json'
  419
+			self.report_writeinfojson(infofn)
  420
+			try:
  421
+				json.dump
  422
+			except (NameError,AttributeError):
  423
+				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
  424
+				return
  425
+			try:
  426
+				infof = open(encodeFilename(infofn), 'wb')
  427
+				try:
  428
+					json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
  429
+					json.dump(json_info_dict, infof)
  430
+				finally:
  431
+					infof.close()
  432
+			except (OSError, IOError):
  433
+				self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
  434
+				return
  435
+
  436
+		if not self.params.get('skip_download', False):
  437
+			if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
  438
+				success = True
  439
+			else:
  440
+				try:
  441
+					success = self._do_download(filename, info_dict)
  442
+				except (OSError, IOError), err:
  443
+					raise UnavailableVideoError
  444
+				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  445
+					self.trouble(u'ERROR: unable to download video data: %s' % str(err))
  446
+					return
  447
+				except (ContentTooShortError, ), err:
  448
+					self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
  449
+					return
  450
+
  451
+			if success:
  452
+				try:
  453
+					self.post_process(filename, info_dict)
  454
+				except (PostProcessingError), err:
  455
+					self.trouble(u'ERROR: postprocessing: %s' % str(err))
  456
+					return
  457
+
  458
+	def download(self, url_list):
  459
+		"""Download a given list of URLs."""
  460
+		if len(url_list) > 1 and self.fixed_template():
  461
+			raise SameFileError(self.params['outtmpl'])
  462
+
  463
+		for url in url_list:
  464
+			suitable_found = False
  465
+			for ie in self._ies:
  466
+				# Go to next InfoExtractor if not suitable
  467
+				if not ie.suitable(url):
  468
+					continue
  469
+
  470
+				# Suitable InfoExtractor found
  471
+				suitable_found = True
  472
+
  473
+				# Extract information from URL and process it
  474
+				videos = ie.extract(url)
  475
+				for video in videos or []:
  476
+					video['extractor'] = ie.IE_NAME
  477
+					try:
  478
+						self.increment_downloads()
  479
+						self.process_info(video)
  480
+					except UnavailableVideoError:
  481
+						self.trouble(u'\nERROR: unable to download video')
  482
+
  483
+				# Suitable InfoExtractor had been found; go to next URL
  484
+				break
  485
+
  486
+			if not suitable_found:
  487
+				self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
  488
+
  489
+		return self._download_retcode
  490
+
  491
+	def post_process(self, filename, ie_info):
  492
+		"""Run the postprocessing chain on the given file."""
  493
+		info = dict(ie_info)
  494
+		info['filepath'] = filename
  495
+		for pp in self._pps:
  496
+			info = pp.run(info)
  497
+			if info is None:
  498
+				break
  499
+
  500
+	def _download_with_rtmpdump(self, filename, url, player_url):
  501
+		self.report_destination(filename)
  502
+		tmpfilename = self.temp_name(filename)
  503
+
  504
+		# Check for rtmpdump first
  505
+		try:
  506
+			subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
  507
+		except (OSError, IOError):
  508
+			self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
  509
+			return False
  510
+
  511
+		# Download using rtmpdump. rtmpdump returns exit code 2 when
  512
+		# the connection was interrumpted and resuming appears to be
  513
+		# possible. This is part of rtmpdump's normal usage, AFAIK.
  514
+		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
  515
+		args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
  516
+		if self.params.get('verbose', False):
  517
+			try:
  518
+				import pipes
  519
+				shell_quote = lambda args: ' '.join(map(pipes.quote, args))
  520
+			except ImportError:
  521
+				shell_quote = repr
  522
+			self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
  523
+		retval = subprocess.call(args)
  524
+		while retval == 2 or retval == 1:
  525
+			prevsize = os.path.getsize(encodeFilename(tmpfilename))
  526
+			self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
  527
+			time.sleep(5.0) # This seems to be needed
  528
+			retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
  529
+			cursize = os.path.getsize(encodeFilename(tmpfilename))
  530
+			if prevsize == cursize and retval == 1:
  531
+				break
  532
+			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
  533
+			if prevsize == cursize and retval == 2 and cursize > 1024:
  534
+				self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
  535
+				retval = 0
  536
+				break
  537
+		if retval == 0:
  538
+			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
  539
+			self.try_rename(tmpfilename, filename)
  540
+			return True
  541
+		else:
  542
+			self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
  543
+			return False
  544
+
  545
+	def _do_download(self, filename, info_dict):
  546
+		url = info_dict['url']
  547
+		player_url = info_dict.get('player_url', None)
  548
+
  549
+		# Check file already present
  550
+		if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
  551
+			self.report_file_already_downloaded(filename)
  552
+			return True
  553
+
  554
+		# Attempt to download using rtmpdump
  555
+		if url.startswith('rtmp'):
  556
+			return self._download_with_rtmpdump(filename, url, player_url)
  557
+
  558
+		tmpfilename = self.temp_name(filename)
  559
+		stream = None
  560
+
  561
+		# Do not include the Accept-Encoding header
  562
+		headers = {'Youtubedl-no-compression': 'True'}
  563
+		basic_request = urllib2.Request(url, None, headers)
  564
+		request = urllib2.Request(url, None, headers)
  565
+
  566
+		# Establish possible resume length
  567
+		if os.path.isfile(encodeFilename(tmpfilename)):
  568
+			resume_len = os.path.getsize(encodeFilename(tmpfilename))
  569
+		else:
  570
+			resume_len = 0
  571
+
  572
+		open_mode = 'wb'
  573
+		if resume_len != 0:
  574
+			if self.params.get('continuedl', False):
  575
+				self.report_resuming_byte(resume_len)
  576
+				request.add_header('Range','bytes=%d-' % resume_len)
  577
+				open_mode = 'ab'
  578
+			else:
  579
+				resume_len = 0
  580
+
  581
+		count = 0
  582
+		retries = self.params.get('retries', 0)
  583
+		while count <= retries:
  584
+			# Establish connection
  585
+			try:
  586
+				if count == 0 and 'urlhandle' in info_dict:
  587
+					data = info_dict['urlhandle']
  588
+				data = urllib2.urlopen(request)
  589
+				break
  590
+			except (urllib2.HTTPError, ), err:
  591
+				if (err.code < 500 or err.code >= 600) and err.code != 416:
  592
+					# Unexpected HTTP error
  593
+					raise
  594
+				elif err.code == 416:
  595
+					# Unable to resume (requested range not satisfiable)
  596
+					try:
  597
+						# Open the connection again without the range header
  598
+						data = urllib2.urlopen(basic_request)
  599
+						content_length = data.info()['Content-Length']
  600
+					except (urllib2.HTTPError, ), err:
  601
+						if err.code < 500 or err.code >= 600:
  602
+							raise
  603
+					else:
  604
+						# Examine the reported length
  605
+						if (content_length is not None and
  606
+								(resume_len - 100 < long(content_length) < resume_len + 100)):
  607
+							# The file had already been fully downloaded.
  608
+							# Explanation to the above condition: in issue #175 it was revealed that
  609
+							# YouTube sometimes adds or removes a few bytes from the end of the file,
  610
+							# changing the file size slightly and causing problems for some users. So
  611
+							# I decided to implement a suggested change and consider the file
  612
+							# completely downloaded if the file size differs less than 100 bytes from
  613
+							# the one in the hard drive.
  614
+							self.report_file_already_downloaded(filename)
  615
+							self.try_rename(tmpfilename, filename)
  616
+							return True
  617
+						else:
  618
+							# The length does not match, we start the download over
  619
+							self.report_unable_to_resume()
  620
+							open_mode = 'wb'
  621
+							break
  622
+			# Retry
  623
+			count += 1
  624
+			if count <= retries:
  625
+				self.report_retry(count, retries)
  626
+
  627
+		if count > retries:
  628
+			self.trouble(u'ERROR: giving up after %s retries' % retries)
  629
+			return False
  630
+
  631
+		data_len = data.info().get('Content-length', None)
  632
+		if data_len is not None:
  633
+			data_len = long(data_len) + resume_len
  634
+		data_len_str = self.format_bytes(data_len)
  635
+		byte_counter = 0 + resume_len
  636
+		block_size = 1024
  637
+		start = time.time()
  638
+		while True:
  639
+			# Download and write
  640
+			before = time.time()
  641
+			data_block = data.read(block_size)
  642
+			after = time.time()
  643
+			if len(data_block) == 0:
  644
+				break
  645
+			byte_counter += len(data_block)
  646
+
  647
+			# Open file just in time
  648
+			if stream is None:
  649
+				try:
  650
+					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  651
+					assert stream is not None
  652
+					filename = self.undo_temp_name(tmpfilename)
  653
+					self.report_destination(filename)
  654
+				except (OSError, IOError), err:
  655
+					self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
  656
+					return False
  657
+			try:
  658
+				stream.write(data_block)
  659
+			except (IOError, OSError), err:
  660
+				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
  661
+				return False
  662
+			block_size = self.best_block_size(after - before, len(data_block))
  663
+
  664
+			# Progress message
  665
+			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
  666
+			if data_len is None:
  667
+				self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
  668
+			else:
  669
+				percent_str = self.calc_percent(byte_counter, data_len)
  670
+				eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  671
+				self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  672
+
  673
+			# Apply rate limit
  674
+			self.slow_down(start, byte_counter - resume_len)
  675
+
  676
+		if stream is None:
  677
+			self.trouble(u'\nERROR: Did not get any data blocks')
  678
+			return False
  679
+		stream.close()
  680
+		self.report_finish()
  681
+		if data_len is not None and byte_counter != data_len:
  682
+			raise ContentTooShortError(byte_counter, long(data_len))
  683
+		self.try_rename(tmpfilename, filename)
  684
+
  685
+		# Update file modification time
  686
+		if self.params.get('updatetime', True):
  687
+			info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  688
+
  689
+		return True
3,365  youtube_dl/InfoExtractors.py
3365 additions, 0 deletions not shown
198  youtube_dl/PostProcessor.py
... ...
@@ -0,0 +1,198 @@
  1
+#!/usr/bin/env python
  2
+# -*- coding: utf-8 -*-
  3
+
  4
+import os
  5
+import subprocess
  6
+import sys
  7
+import time
  8
+
  9
+from utils import *
  10
+
  11
+
  12
+class PostProcessor(object):
  13
+	"""Post Processor class.
  14
+
  15
+	PostProcessor objects can be added to downloaders with their
  16
+	add_post_processor() method. When the downloader has finished a
  17
+	successful download, it will take its internal chain of PostProcessors
  18
+	and start calling the run() method on each one of them, first with
  19
+	an initial argument and then with the returned value of the previous
  20
+	PostProcessor.
  21
+
  22
+	The chain will be stopped if one of them ever returns None or the end
  23
+	of the chain is reached.
  24
+
  25
+	PostProcessor objects follow a "mutual registration" process similar
  26
+	to InfoExtractor objects.
  27
+	"""
  28
+
  29
+	_downloader = None
  30
+
  31
+	def __init__(self, downloader=None):
  32
+		self._downloader = downloader
  33
+
  34
+	def set_downloader(self, downloader):
  35
+		"""Sets the downloader for this PP."""
  36
+		self._downloader = downloader
  37
+
  38
+	def run(self, information):
  39
+		"""Run the PostProcessor.
  40
+
  41
+		The "information" argument is a dictionary like the ones
  42
+		composed by InfoExtractors. The only difference is that this
  43
+		one has an extra field called "filepath" that points to the
  44
+		downloaded file.
  45
+
  46
+		When this method returns None, the postprocessing chain is
  47
+		stopped. However, this method may return an information
  48
+		dictionary that will be passed to the next postprocessing
  49
+		object in the chain. It can be the one it received after
  50
+		changing some fields.
  51
+
  52
+		In addition, this method may raise a PostProcessingError
  53
+		exception that will be taken into account by the downloader
  54
+		it was called from.
  55
+		"""
  56
+		return information # by default, do nothing
  57
+
  58
+class AudioConversionError(BaseException):
  59
+	def __init__(self, message):
  60
+		self.message = message
  61
+
  62
+class FFmpegExtractAudioPP(PostProcessor):
  63
+	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
  64
+		PostProcessor.__init__(self, downloader)
  65
+		if preferredcodec is None:
  66
+			preferredcodec = 'best'
  67
+		self._preferredcodec = preferredcodec
  68
+		self._preferredquality = preferredquality
  69
+		self._keepvideo = keepvideo
  70
+		self._exes = self.detect_executables()
  71
+
  72
+	@staticmethod
  73
+	def detect_executables():
  74
+		def executable(exe):
  75
+			try:
  76
+				subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  77
+			except OSError:
  78
+				return False
  79
+			return exe
  80
+		programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  81
+		return dict((program, executable(program)) for program in programs)
  82
+
  83
+	def get_audio_codec(self, path):
  84
+		if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
  85
+		try:
  86
+			cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', '--', encodeFilename(path)]
  87
+			handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
  88
+			output = handle.communicate()[0]
  89
+			if handle.wait() != 0:
  90
+				return None
  91
+		except (IOError, OSError):
  92
+			return None
  93
+		audio_codec = None
  94
+		for line in output.split('\n'):
  95
+			if line.startswith('codec_name='):
  96
+				audio_codec = line.split('=')[1].strip()
  97
+			elif line.strip() == 'codec_type=audio' and audio_codec is not None:
  98
+				return audio_codec
  99
+		return None
  100
+
  101
+	def run_ffmpeg(self, path, out_path, codec, more_opts):
  102
+		if not self._exes['ffmpeg'] and not self._exes['avconv']:
  103
+			raise AudioConversionError('ffmpeg or avconv not found. Please install one.')	
  104
+		if codec is None:
  105
+			acodec_opts = []
  106
+		else:
  107
+			acodec_opts = ['-acodec', codec]
  108
+		cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn']
  109
+			   + acodec_opts + more_opts +
  110
+			   ['--', encodeFilename(out_path)])
  111
+		p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  112
+		stdout,stderr = p.communicate()
  113
+		if p.returncode != 0:
  114
+			msg = stderr.strip().split('\n')[-1]
  115
+			raise AudioConversionError(msg)
  116
+
  117
+	def run(self, information):
  118
+		path = information['filepath']
  119
+
  120
+		filecodec = self.get_audio_codec(path)
  121
+		if filecodec is None:
  122
+			self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
  123
+			return None
  124
+
  125
+		more_opts = []
  126
+		if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
  127
+			if self._preferredcodec == 'm4a' and filecodec == 'aac':
  128
+				# Lossless, but in another container
  129
+				acodec = 'copy'
  130
+				extension = self._preferredcodec
  131
+				more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  132
+			elif filecodec in ['aac', 'mp3', 'vorbis']:
  133
+				# Lossless if possible
  134
+				acodec = 'copy'
  135
+				extension = filecodec
  136
+				if filecodec == 'aac':
  137
+					more_opts = ['-f', 'adts']
  138
+				if filecodec == 'vorbis':
  139
+					extension = 'ogg'
  140
+			else:
  141
+				# MP3 otherwise.
  142
+				acodec = 'libmp3lame'
  143
+				extension = 'mp3'
  144
+				more_opts = []
  145
+				if self._preferredquality is not None:
  146
+					if int(self._preferredquality) < 10:
  147
+						more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  148
+					else:
  149
+						more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
  150
+		else:
  151
+			# We convert the audio (lossy)
  152
+			acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
  153
+			extension = self._preferredcodec
  154
+			more_opts = []
  155
+			if self._preferredquality is not None:
  156
+				if int(self._preferredquality) < 10:
  157
+					more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  158
+				else:
  159
+					more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
  160
+			if self._preferredcodec == 'aac':
  161
+				more_opts += ['-f', 'adts']
  162
+			if self._preferredcodec == 'm4a':
  163
+				more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  164
+			if self._preferredcodec == 'vorbis':
  165
+				extension = 'ogg'
  166
+			if self._preferredcodec == 'wav':
  167
+				extension = 'wav'
  168
+				more_opts += ['-f', 'wav']
  169
+
  170
+		prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
  171
+		new_path = prefix + sep + extension
  172
+		self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
  173
+		try:
  174
+			self.run_ffmpeg(path, new_path, acodec, more_opts)
  175
+		except:
  176
+			etype,e,tb = sys.exc_info()
  177
+			if isinstance(e, AudioConversionError):
  178
+				self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
  179
+			else:
  180
+				self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg'))
  181
+			return None
  182
+
  183
+ 		# Try to update the date time for extracted audio file.
  184
+		if information.get('filetime') is not None:
  185
+			try:
  186
+				os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
  187
+			except:
  188
+				self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
  189
+
  190
+		if not self._keepvideo:
  191
+			try:
  192
+				os.remove(encodeFilename(path))
  193
+			except (IOError, OSError):
  194
+				self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
  195
+				return None
  196
+
  197
+		information['filepath'] = new_path
  198
+		return information
557  youtube_dl/__init__.py
... ...
@@ -0,0 +1,557 @@
  1
+#!/usr/bin/env python
  2
+# -*- coding: utf-8 -*-
  3
+
  4
+from __future__ import with_statement
  5
+
  6
+__authors__  = (
  7
+	'Ricardo Garcia Gonzalez',
  8
+	'Danny Colligan',
  9
+	'Benjamin Johnson',
  10
+	'Vasyl\' Vavrychuk',
  11
+	'Witold Baryluk',
  12
+	'Paweł Paprota',
  13
+	'Gergely Imreh',
  14
+	'Rogério Brito',
  15
+	'Philipp Hagemeister',
  16
+	'Sören Schulze',
  17
+	'Kevin Ngo',
  18
+	'Ori Avtalion',
  19
+	'shizeeg',
  20
+	'Filippo Valsorda',
  21
+	)
  22
+
  23
+__license__ = 'Public Domain'
  24
+__version__ = '2012.10.09'
  25
+
  26
+UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  27
+UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
  28
+UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
  29
+
  30
+
  31
+import cookielib
  32
+import getpass
  33
+import optparse
  34
+import os
  35
+import re
  36
+import shlex
  37
+import socket
  38
+import subprocess
  39
+import sys
  40
+import urllib2
  41
+import warnings
  42
+
  43
+from utils import *
  44
+from FileDownloader import *
  45
+from InfoExtractors import *
  46
+from PostProcessor import *
  47
+
  48
+def updateSelf(downloader, filename):
  49
+	''' Update the program file with the latest version from the repository '''
  50
+	# Note: downloader only used for options
  51
+	
  52
+	if not os.access(filename, os.W_OK):
  53
+		sys.exit('ERROR: no write permissions on %s' % filename)
  54
+
  55
+	downloader.to_screen(u'Updating to latest version...')
  56
+
  57
+	urlv = urllib2.urlopen(UPDATE_URL_VERSION)
  58
+	newversion = urlv.read().strip()
  59
+	if newversion == __version__:
  60
+		downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
  61
+		return
  62
+	urlv.close()
  63
+
  64
+	if hasattr(sys, "frozen"): #py2exe
  65
+		exe = os.path.abspath(filename)
  66
+		directory = os.path.dirname(exe)
  67
+		if not os.access(directory, os.W_OK):
  68
+			sys.exit('ERROR: no write permissions on %s' % directory)
  69
+			
  70
+		try:
  71
+			urlh = urllib2.urlopen(UPDATE_URL_EXE)
  72
+			newcontent = urlh.read()
  73
+			urlh.close()
  74
+			with open(exe + '.new', 'wb') as outf:
  75
+				outf.write(newcontent)
  76
+		except (IOError, OSError), err:
  77
+			sys.exit('ERROR: unable to download latest version')