Skip to content

Commit

Permalink
Fix for #763 (#767)
Browse files Browse the repository at this point in the history
* Fix KeyError: 'assets', and minor refactor
* Fixed tests. Changes to YT broke a single test, and updating the mock had cascading effects on other tests. This updates the tests to reflect a more recent YT page, and introduces os-agnostic file paths for testing on windows.
* Fixed flake8 issues.
  • Loading branch information
tfdahlin committed Oct 26, 2020
1 parent 8464dc1 commit eec6f64
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 97 deletions.
2 changes: 1 addition & 1 deletion pytube/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def description(self) -> str:
"""
return self.player_response.get("videoDetails", {}).get(
"shortDescription"
) or extract._get_vid_descr(self.watch_html)
)

@property
def rating(self) -> float:
Expand Down
71 changes: 28 additions & 43 deletions pytube/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import logging
import re
from collections import OrderedDict
from html.parser import HTMLParser
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from urllib.parse import parse_qs
from urllib.parse import parse_qsl
Expand All @@ -17,44 +15,13 @@
from urllib.parse import urlencode

from pytube.cipher import Cipher
from pytube.exceptions import HTMLParseError
from pytube.exceptions import LiveStreamError
from pytube.exceptions import RegexMatchError
from pytube.helpers import regex_search

logger = logging.getLogger(__name__)


class PytubeHTMLParser(HTMLParser):
in_vid_descr = False
in_vid_descr_br = False
vid_descr = ""

def handle_starttag(self, tag, attrs):
if tag == "p":
for attr in attrs:
if attr[0] == "id" and attr[1] == "eow-description":
self.in_vid_descr = True

def handle_endtag(self, tag):
if self.in_vid_descr and tag == "p":
self.in_vid_descr = False

def handle_startendtag(self, tag, attrs):
if self.in_vid_descr and tag == "br":
self.in_vid_descr_br = True

def handle_data(self, data):
if self.in_vid_descr_br:
self.vid_descr += f"\n{data}"
self.in_vid_descr_br = False
elif self.in_vid_descr:
self.vid_descr += data

def error(self, message):
raise HTMLParseError(message)


def is_age_restricted(watch_html: str) -> bool:
"""Check if content is age restricted.
Expand Down Expand Up @@ -151,7 +118,7 @@ def js_url(html: str) -> str:
:param str html:
The html contents of the watch page.
"""
base_js = get_ytplayer_config(html)["assets"]["js"]
base_js = get_ytplayer_js(html)
return "https://youtube.com" + base_js


Expand Down Expand Up @@ -182,6 +149,31 @@ def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
return mime_type, [c.strip() for c in codecs.split(",")]


def get_ytplayer_js(html: str) -> Any:
"""Get the YouTube player base JavaScript path.
:param str html
The html contents of the watch page.
:rtype: str
:returns:
Path to YouTube's base.js file.
"""
js_url_patterns = [
r"\"jsUrl\":\"([^\"]*)\"",
]
for pattern in js_url_patterns:
regex = re.compile(pattern)
function_match = regex.search(html)
if function_match:
logger.debug("finished regex search, matched: %s", pattern)
yt_player_js = function_match.group(1)
return yt_player_js

raise RegexMatchError(
caller="get_ytplayer_js", pattern="js_url_patterns"
)


def get_ytplayer_config(html: str) -> Any:
"""Get the YouTube player configuration data from the watch html.
Expand Down Expand Up @@ -215,13 +207,6 @@ def get_ytplayer_config(html: str) -> Any:
)


def _get_vid_descr(html: Optional[str]) -> str:
html_parser = PytubeHTMLParser()
if html:
html_parser.feed(html)
return html_parser.vid_descr


def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
"""Apply the decrypted signature to the stream manifest.
Expand Down Expand Up @@ -316,11 +301,11 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
except KeyError:
cipher_url = [
parse_qs(
formats[i][
data[
"cipher" if "cipher" in data.keys() else "signatureCipher"
]
)
for i, data in enumerate(formats)
for data in formats
]
stream_data[key] = [
{
Expand Down
Binary file modified tests/mocks/yt-video-9bZkp7q19f0.json.gz
Binary file not shown.
9 changes: 5 additions & 4 deletions tests/test_captions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import os
from unittest import mock
from unittest.mock import MagicMock
from unittest.mock import mock_open
Expand Down Expand Up @@ -71,7 +72,7 @@ def test_download(srt):
)
caption.download("title")
assert (
open_mock.call_args_list[0][0][0].split("/")[-1] == "title (en).srt"
open_mock.call_args_list[0][0][0].split(os.path.sep)[-1] == "title (en).srt"
)


Expand All @@ -89,7 +90,7 @@ def test_download_with_prefix(srt):
)
caption.download("title", filename_prefix="1 ")
assert (
open_mock.call_args_list[0][0][0].split("/")[-1]
open_mock.call_args_list[0][0][0].split(os.path.sep)[-1]
== "1 title (en).srt"
)

Expand All @@ -108,7 +109,7 @@ def test_download_with_output_path(srt):
}
)
file_path = caption.download("title", output_path="blah")
assert file_path == "/target/title (en).srt"
assert file_path == os.path.join("/target","title (en).srt")
captions.target_directory.assert_called_with("blah")


Expand All @@ -126,7 +127,7 @@ def test_download_xml_and_trim_extension(xml):
)
caption.download("title.xml", srt=False)
assert (
open_mock.call_args_list[0][0][0].split("/")[-1] == "title (en).xml"
open_mock.call_args_list[0][0][0].split(os.path.sep)[-1] == "title (en).xml"
)


Expand Down
21 changes: 1 addition & 20 deletions tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_info_url_age_restricted(cipher_signature):

def test_js_url(cipher_signature):
expected = (
"https://youtube.com/yts/jsbin/player_ias-vflWQEEag/en_US/base.js"
"https://youtube.com/s/player/4a1799bd/player_ias.vflset/en_US/base.js"
)
result = extract.js_url(cipher_signature.watch_html)
assert expected == result
Expand All @@ -52,25 +52,6 @@ def test_non_age_restricted(cipher_signature):
assert not extract.is_age_restricted(cipher_signature.watch_html)


def test_get_vid_desc(cipher_signature):
expected = (
"PSY - ‘I LUV IT’ M/V @ https://youtu.be/Xvjnoagk6GU\n"
"PSY - ‘New Face’ M/V @https://youtu.be/OwJPPaEyqhI\n"
"PSY - 8TH ALBUM '4X2=8' on iTunes @\n"
"https://smarturl.it/PSY_8thAlbum\n"
"PSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam\n"
"#PSY #싸이 #GANGNAMSTYLE #강남스타일\n"
"More about PSY@\nhttp://www.youtube.com/officialpsy\n"
"http://www.facebook.com/officialpsy\n"
"http://twitter.com/psy_oppa\n"
"https://www.instagram.com/42psy42\n"
"http://iTunes.com/PSY\n"
"http://sptfy.com/PSY\n"
"http://weibo.com/psyoppa"
)
assert extract._get_vid_descr(cipher_signature.watch_html) == expected


def test_mime_type_codec():
mime_type, mime_subtype = extract.mime_type_codec(
'audio/webm; codecs="opus"'
Expand Down
3 changes: 2 additions & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import os
from unittest import mock

import pytest
Expand Down Expand Up @@ -61,7 +62,7 @@ def cached_func(stuff):
@mock.patch("os.getcwd", return_value="/cwd")
@mock.patch("os.makedirs")
def test_target_directory_with_relative_path(_, __, makedirs): # noqa: PT019
assert target_directory("test") == "/cwd/test"
assert target_directory("test") == os.path.join("/cwd", "test")
makedirs.assert_called()


Expand Down
6 changes: 3 additions & 3 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_get_highest_resolution(cipher_signature):
def test_filter_is_dash(cipher_signature):
streams = cipher_signature.streams.filter(is_dash=False)
itags = [s.itag for s in streams]
assert itags == [18, 398, 397, 396, 395, 394]
assert itags == [18, 399, 398, 397, 396, 395, 394]


def test_get_audio_only(cipher_signature):
Expand All @@ -156,13 +156,13 @@ def test_get_audio_only_with_subtype(cipher_signature):


def test_sequence(cipher_signature):
assert len(cipher_signature.streams) == 22
assert len(cipher_signature.streams) == 23
assert cipher_signature.streams[0] is not None


def test_otf(cipher_signature):
non_otf = cipher_signature.streams.otf()
assert len(non_otf) == 22
assert len(non_otf) == 23

otf = cipher_signature.streams.otf(True)
assert len(otf) == 0
Expand Down
44 changes: 19 additions & 25 deletions tests/test_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_title(cipher_signature):


def test_expiration(cipher_signature):
assert cipher_signature.streams[0].expiration == datetime(2020, 1, 16, 5, 12, 5)
assert cipher_signature.streams[0].expiration == datetime(2020, 10, 24, 11, 7, 41)


def test_caption_tracks(presigned_video):
Expand Down Expand Up @@ -93,35 +93,17 @@ def test_description(cipher_signature):
)
assert cipher_signature.description == expected

cipher_signature.player_response = {}
expected = (
"PSY - ‘I LUV IT’ M/V @ https://youtu.be/Xvjnoagk6GU\n"
"PSY - ‘New Face’ M/V @https://youtu.be/OwJPPaEyqhI\n"
"PSY - 8TH ALBUM '4X2=8' on iTunes @\n"
"https://smarturl.it/PSY_8thAlbum\n"
"PSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam\n"
"#PSY #싸이 #GANGNAMSTYLE #강남스타일\n"
"More about PSY@\nhttp://www.youtube.com/officialpsy\n"
"http://www.facebook.com/officialpsy\n"
"http://twitter.com/psy_oppa\n"
"https://www.instagram.com/42psy42\n"
"http://iTunes.com/PSY\n"
"http://sptfy.com/PSY\n"
"http://weibo.com/psyoppa"
)
assert cipher_signature.description == expected


def test_rating(cipher_signature):
assert cipher_signature.rating == 4.522203
assert cipher_signature.rating == 4.5375643


def test_length(cipher_signature):
assert cipher_signature.length == 252


def test_views(cipher_signature):
assert cipher_signature.views == 3494704859
assert cipher_signature.views == 3830838693


@mock.patch(
Expand Down Expand Up @@ -149,7 +131,10 @@ def test_download_with_prefix(cipher_signature):
with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
stream = cipher_signature.streams[0]
file_path = stream.download(filename_prefix="prefix")
assert file_path == "/target/prefixPSY - GANGNAM STYLE(강남스타일) MV.mp4"
assert file_path == os.path.join(
"/target",
"prefixPSY - GANGNAM STYLE(강남스타일) MV.mp4"
)


@mock.patch(
Expand All @@ -164,7 +149,10 @@ def test_download_with_filename(cipher_signature):
with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
stream = cipher_signature.streams[0]
file_path = stream.download(filename="cool name bro")
assert file_path == "/target/cool name bro.mp4"
assert file_path == os.path.join(
"/target",
"cool name bro.mp4"
)


@mock.patch(
Expand All @@ -181,7 +169,10 @@ def test_download_with_existing(cipher_signature):
stream = cipher_signature.streams[0]
os.path.getsize = Mock(return_value=stream.filesize)
file_path = stream.download()
assert file_path == "/target/PSY - GANGNAM STYLE(강남스타일) MV.mp4"
assert file_path == os.path.join(
"/target",
"PSY - GANGNAM STYLE(강남스타일) MV.mp4"
)
assert not request.stream.called


Expand All @@ -199,7 +190,10 @@ def test_download_with_existing_no_skip(cipher_signature):
stream = cipher_signature.streams[0]
os.path.getsize = Mock(return_value=stream.filesize)
file_path = stream.download(skip_existing=False)
assert file_path == "/target/PSY - GANGNAM STYLE(강남스타일) MV.mp4"
assert file_path == os.path.join(
"/target",
"PSY - GANGNAM STYLE(강남스타일) MV.mp4"
)
assert request.stream.called


Expand Down

1 comment on commit eec6f64

@SadeghSaket
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, I receive this error message now:

Traceback (most recent call last):
File "C:\x\YoutubeDownloader\main.py", line 5, in
yt = YouTube(link)
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube_main_.py", line 92, in init
self.descramble()
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube_main_.py", line 140, in descramble
apply_signature(self.player_config_args, fmt, self.js)
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube\extract.py", line 209, in apply_signature
cipher = Cipher(js=js)
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube\cipher.py", line 30, in init
self.transform_plan: List[str] = get_transform_plan(js)
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube\cipher.py", line 149, in get_transform_plan
return regex_search(pattern, js, group=1).split(";")
File "C:\Users\x\AppData\Local\Programs\Python\Python39\lib\site-packages\pytube\helpers.py", line 34, in regex_search
raise RegexMatchError(caller="regex_search", pattern=pattern)
pytube.exceptions.RegexMatchError: regex_search: could not find match for Rda=function(\w){[a-z=.(")];(.);(?:.+)}

Please sign in to comment.