Skip to content

Commit

Permalink
[f4m] Prefer baseURL for relative URLs (closes #14660)
Browse files Browse the repository at this point in the history
  • Loading branch information
dstftw committed Nov 4, 2017
1 parent cd670be commit 48107c1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
25 changes: 17 additions & 8 deletions youtube_dl/downloader/f4m.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))


def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)


def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url


class F4mFD(FragmentFD):
Expand Down Expand Up @@ -330,13 +339,13 @@ def real_download(self, filename, info_dict):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]

base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url

base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
Expand Down
14 changes: 7 additions & 7 deletions youtube_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..utils import (
NO_DEFAULT,
age_restricted,
Expand Down Expand Up @@ -1239,11 +1242,8 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()

manifest_base_url = get_base_url(manifest)

bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
Expand Down Expand Up @@ -1275,7 +1275,7 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
Expand Down

0 comments on commit 48107c1

Please sign in to comment.