Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Support download of multipart videos #995

Closed
wants to merge 8 commits into from

6 participants

@jaimeMF
Collaborator

This is my solution for the issue I posted in #991

A parts field can be set in the info_dict containing the urls of the different parts, when all the parts have been downloaded they are joined using the FFmpegJoinVideos postprocessor

I have tested it with a Tudou video: http://www.tudou.com/programs/view/J-P3o-chBp4/ (It's the only IE I have found with multiple parts videos)

@jaimeMF jaimeMF referenced this pull request
Closed

Add flv file merger #1012

@jaimeMF
Collaborator

I've changed the parts list to a list of dictionaries, since some videos may require to specify rtmpdump parameters and it's more flexible.

@jaimeMF
Collaborator

I have make the class for joining videos behave like a normal postprocessor. I think this is a better approach, so if you find it ok, I'll squash to less commits for merging it.

@FiloSottile
Collaborator

Does this work with Youku and solve #991?

@FiloSottile
Collaborator

(yes, I'm going through PRs, we have too much of a backlog here)

@jaimeMF
Collaborator

I haven't implemented it for Youku, if you post a url that's splitted in parts I'll look into it.
It would be a good approach for #991 (I'm not sure if just concatening the fragments would be enough). But we would have to do it automatically, otherwise you can end with a hundred *.ts fragments of just a few seconds (on Tudou you get parts of a few minutes, it's not that bad). There's one problem: the fragments could be encrypted (we currently use ffmpeg for downloading with the manifest and it handles them), I would (personally) write a custom downloader for m3u8 manifests that could handle all cases, probably it would the call the postprocessor for joining the fragments when it have finished.

@FiloSottile
Collaborator

Ok! I'll look for a Youku test vector.

Please reopen an issue for m3u8.

@jaimeMF
Collaborator

Do you mean for writing a custom downloader?, we already support it with ffmpeg.

@FiloSottile
Collaborator

Uh, ok sorry, I don't know that part of the code. Anyway yes, just not to lose the discussion here and in #991

@phihag
Collaborator

Sorry for the delay. I'd like to merge this now, but the conflicts are kind of scary. Can you rebase or merge onto the current master?

@jaimeMF
Collaborator

I'll try to rebase it tonight.

@jaimeMF
Collaborator

It should be ready now for merge, I have tested it again (only with the 3 first parts of the video, the connection is ultra-slow) and it works fine.

@malept

Out of curiosity, any particular reason why this hasn't been merged yet? FWIW, I have an updated branch of this, merged with master as of 3/26: malept@197e662

It also contains modifications to the comedycentral and mtv extractors to merge videos in the playlist if --join-parts is specified.

@naglis naglis referenced this pull request
Open

Parleys Support #1876

@mcepl

Out of curiosity, any particular reason why this hasn't been merged yet?

Well, I would think failing tests could have something to do with it? ;)

@yan12125
Collaborator

Merge conflicts is now severe again. Could you post a new version?

@jaimeMF
Collaborator

Since I don't really need this feature and I don't have time right now, I'm closing the PR. If someone wants to update it, feel free to do so and open a new PR.

@jaimeMF jaimeMF closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
3  test/test_download.py
@@ -151,8 +151,9 @@ def try_rm_tcs_files():
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
# Check for the presence of mandatory fields
- for key in ('id', 'url', 'title', 'ext'):
+ for key in ('id', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
+ self.assertTrue(any(key in info_dict.keys() and info_dict[key] for key in ('url', 'parts')))
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
View
36 youtube_dl/PostProcessor.py
@@ -2,6 +2,7 @@
import subprocess
import sys
import time
+import io
from .utils import (
@@ -10,6 +11,7 @@
PostProcessingError,
shell_quote,
subtitles_filename,
+ build_part_filename,
)
@@ -78,15 +80,15 @@ def executable(exe):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs)
- def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, input_opts=[]):
if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path)])
- cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
- + opts +
+ cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] +
+ input_opts + files_cmd + opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
if self._downloader.params.get('verbose', False):
@@ -98,8 +100,8 @@ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
- def run_ffmpeg(self, path, out_path, opts):
- self.run_ffmpeg_multiple_files([path], out_path, opts)
+ def run_ffmpeg(self, path, out_path, opts, input_opts=[]):
+ self.run_ffmpeg_multiple_files([path], out_path, opts, input_opts)
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
@@ -509,3 +511,27 @@ def run(self, info):
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info
+
+
+class FFmpegJoinVideosPP(FFmpegPostProcessor):
+ def run(self, information):
+ filename = information['filepath']
+ parts = information.get('parts')
+ if parts is None or len(parts) == 1:
+ return (True, information)
+ parts_files = [build_part_filename(filename, i) for (i, _) in enumerate(parts)]
+ files_file = u'%s.videos' % filename
+ with io.open(encodeFilename(files_file), 'w', encoding='utf-8') as f:
+ for video in parts_files:
+ f.write(u'file \'%s\'\n' % video)
+ self._downloader.to_screen(u'[ffmpeg] Joining video parts, destination: %s' % filename)
+ try:
+ self.run_ffmpeg(files_file, filename, ['-c', 'copy'], ['-f', 'concat'])
+ except FFmpegPostProcessorError:
+ return False, information
+ os.remove(encodeFilename(files_file))
+ # We have to manually remove the parts if requested
+ if not self._downloader.params.get('keepvideo', False):
+ for part_file in parts_files:
+ os.remove(encodeFilename(part_file))
+ return (True, information)
View
22 youtube_dl/YoutubeDL.py
@@ -43,6 +43,7 @@
SameFileError,
sanitize_filename,
subtitles_filename,
+ build_part_filename,
takewhile_inclusive,
UnavailableVideoError,
write_json_file,
@@ -773,7 +774,26 @@ def process_info(self, info_dict):
success = True
else:
try:
- success = self.fd._do_download(filename, info_dict)
+ parts = info_dict.get('parts',[])
+ if not parts:
+ success = self.fd._do_download(filename, info_dict)
+ elif len(parts) == 1:
+ info_dict.update(parts[0])
+ success = self.fd._do_download(filename, info_dict)
+ else:
+ # We check if the final video has already been downloaded
+ if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)):
+ self.fd.report_file_already_downloaded(filename)
+ success = True
+ else:
+ parts_success = []
+ self.to_screen(u'[info] Downloading %s parts' % len(parts))
+ for (i, part) in enumerate(parts):
+ part_info = dict(info_dict)
+ part_info.update(part)
+ part_filename = build_part_filename(filename, i)
+ parts_success.append(self.fd._do_download(part_filename, part_info))
+ success = all(parts_success)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return
View
5 youtube_dl/__init__.py
@@ -76,6 +76,7 @@
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP,
+ FFmpegJoinVideosPP,
)
@@ -390,6 +391,8 @@ def _hide_login_info(opts):
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
+ postproc.add_option('--join-parts', action='store_true', dest='joinparts', default=False,
+ help='Join the video parts if the video is splitted in different parts.')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
@@ -658,6 +661,8 @@ def _real_main(argv=None):
ydl.add_default_info_extractors()
# PostProcessors
+ if opts.joinparts:
+ ydl.add_post_processor(FFmpegJoinVideosPP())
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
View
3  youtube_dl/extractor/common.py
@@ -55,6 +55,9 @@ class InfoExtractor(object):
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform.
+ parts: A list of info_dicts for each of the parts of the video,
+ it must include the url field, if it's a rtmp download it
+ can contain additional fields for rtmpdump.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
View
29 youtube_dl/extractor/tudou.py
@@ -10,7 +10,7 @@ class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
_TESTS = [{
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
- u'file': u'159448201.f4v',
+ u'file': u'2xN2duXMxmw.f4v',
u'md5': u'140a49ed444bd22f93330985d8475fcb',
u'info_dict': {
u"title": u"卡马乔国足开大脚长传冲吊集锦"
@@ -58,21 +58,20 @@ def _real_extract(self, url):
# It looks like the keys are the arguments that have to be passed as
# the hd field in the request url, we pick the higher
quality = sorted(segments.keys())[-1]
- parts = segments[quality]
- result = []
- len_parts = len(parts)
- if len_parts > 1:
- self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
- for part in parts:
+ segs = segments[quality]
+ parts = []
+ len_segs = len(segs)
+ if len_segs > 1:
+ self.to_screen(u'%s: found %s parts' % (video_id, len_segs))
+ for part in segs:
part_id = part['k']
final_url = self._url_for_id(part_id, quality)
ext = (final_url.split('?')[0]).split('.')[-1]
- part_info = {'id': part_id,
- 'url': final_url,
- 'ext': ext,
- 'title': title,
- 'thumbnail': thumbnail_url,
- }
- result.append(part_info)
+ parts.append({'url': final_url})
- return result
+ return {'id': video_id,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ 'parts': parts,
+ }
View
4 youtube_dl/utils.py
@@ -775,6 +775,10 @@ def determine_ext(url, default_ext=u'unknown_video'):
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+def build_part_filename(final_filename, part_index):
+ (name, ext) = os.path.splitext(final_filename)
+ return '%s.%d%s' % (name, part_index, ext)
+
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or
Something went wrong with that request. Please try again.