diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8fe6c2dea..9dbc5cfad 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 15.0.0 +current_version = 15.0.6 commit = True tag = True diff --git a/README.md b/README.md index 6a6e3f2f0..e99559ac1 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,6 @@

pytube logo

-

- pypi - - -

### Actively soliciting contributors! @@ -52,16 +47,10 @@ This guide covers the most basic usage of the library. For more detailed informa Pytube requires an installation of Python 3.6 or greater, as well as pip. (Pip is typically bundled with Python [installations](https://python.org/downloads).) -To install from PyPI with pip: - -```bash -$ python -m pip install pytube -``` - -Sometimes, the PyPI release becomes slightly outdated. To install from the source with pip: +To install from the source with pip: ```bash -$ python -m pip install git+https://github.com/pytube/pytube +$ python -m pip install git+https://github.com/MemoKing34/pytube ``` ### Using pytube in a Python script diff --git a/pytube/__main__.py b/pytube/__main__.py index 60451d36c..2bbf75f69 100644 --- a/pytube/__main__.py +++ b/pytube/__main__.py @@ -7,7 +7,7 @@ """ import logging -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Union import pytube import pytube.exceptions as exceptions @@ -61,6 +61,9 @@ def __init__( self._embed_html: Optional[str] = None self._player_config_args: Optional[Dict] = None # inline js in the html containing self._age_restricted: Optional[bool] = None + self._has_multiple_audiotrack: Optional[bool] = None + self._language: Optional[str] = None # These are for multiple audio track + self._language_code: Optional[str] = None # These are for multiple audio track self._fmt_streams: Optional[List[Stream]] = None @@ -96,26 +99,52 @@ def __eq__(self, o: object) -> bool: return type(o) == type(self) and o.watch_url == self.watch_url @property - def watch_html(self): + def watch_html(self) -> Optional[str]: if self._watch_html: return self._watch_html self._watch_html = request.get(url=self.watch_url) return self._watch_html @property - def embed_html(self): + def embed_html(self) -> Optional[str]: if self._embed_html: return self._embed_html self._embed_html = request.get(url=self.embed_url) return self._embed_html @property - def age_restricted(self): + def age_restricted(self) -> Optional[bool]: if self._age_restricted: return self._age_restricted self._age_restricted = extract.is_age_restricted(self.watch_html) return self._age_restricted + @property + def has_multiple_audiotrack(self) -> Optional[bool]: + if self._has_multiple_audiotrack: + return self._has_multiple_audiotrack + self._has_multiple_audiotrack = extract.has_multiple_audiotrack(self.watch_html) + return self._has_multiple_audiotrack + + @property + def language(self) -> Optional[str]: + if (not self.has_multiple_audiotrack) or self._language: + return self._language + tracks: List[Dict[str, Union[str, bool]]] = [format.get("audioTrack") for format in self.vid_info.get("streamingData", {}).get("adaptiveFormats", []) if format.get("audioTrack", False)] + for track in tracks: + if track.get("audioIsDefault", False): + self._language = track.get("displayName") + self._language_code = track.get("id").split(".")[0] + return self._language + + @property + def language_code(self) -> Optional[str]: + if self.language or self._language_code: + return self._language_code + self.language + return self._language_code + + @property def js_url(self): if self._js_url: @@ -466,14 +495,14 @@ def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], Non """ self.stream_monostate.on_complete = func - @staticmethod - def from_id(video_id: str) -> "YouTube": + @classmethod + def from_id(cls, video_id: str) -> "YouTube": """Construct a :class:`YouTube ` object from a video id. :param str video_id: The video id of the YouTube video. :rtype: :class:`YouTube ` - + """ - return YouTube(f"https://www.youtube.com/watch?v={video_id}") + return cls(f"https://www.youtube.com/watch?v={video_id}") diff --git a/pytube/captions.py b/pytube/captions.py index fe84bec3f..774f90fdc 100644 --- a/pytube/captions.py +++ b/pytube/captions.py @@ -82,24 +82,27 @@ def xml_caption_to_srt(self, xml_captions: str) -> str: XML formatted caption tracks. """ segments = [] - root = ElementTree.fromstring(xml_captions) - for i, child in enumerate(list(root)): - text = child.text or "" - caption = unescape(text.replace("\n", " ").replace(" ", " "),) - try: - duration = float(child.attrib["dur"]) - except KeyError: - duration = 0.0 - start = float(child.attrib["start"]) - end = start + duration - sequence_number = i + 1 # convert from 0-indexed to 1. - line = "{seq}\n{start} --> {end}\n{text}\n".format( - seq=sequence_number, - start=self.float_to_srt_time_format(start), - end=self.float_to_srt_time_format(end), - text=caption, - ) - segments.append(line) + root = ElementTree.fromstring(xml_captions)[0] + i=0 + for child in list(root): + if child.tag == 'p': + caption = child.text + caption = unescape(caption.replace("\n", " ").replace(" ", " "),) + try: + duration = float(child.attrib["d"])/1000.0 + except KeyError: + duration = 0.0 + start = float(child.attrib["t"])/1000.0 + end = start + duration + sequence_number = i + 1 # convert from 0-indexed to 1. + line = "{seq}\n{start} --> {end}\n{text}\n".format( + seq=sequence_number, + start=self.float_to_srt_time_format(start), + end=self.float_to_srt_time_format(end), + text=caption, + ) + segments.append(line) + i += 1 return "\n".join(segments).strip() def download( diff --git a/pytube/extract.py b/pytube/extract.py index d08321408..87ab48213 100644 --- a/pytube/extract.py +++ b/pytube/extract.py @@ -89,6 +89,22 @@ def is_age_restricted(watch_html: str) -> bool: return True +def has_multiple_audiotrack(watch_html: str) -> bool: + """Check if content has multiple audio tracks + + :param str watch_html: + The html contents of the watch page. + :rtype: bool + :returns: + Whether or not the content has multiple audio track + """ + try: + regex_search(r"displayName", watch_html, group=0) + except RegexMatchError: + return False + return True + + def playability_status(watch_html: str) -> (str, str): """Return the playability status and status explanation of a video. diff --git a/pytube/query.py b/pytube/query.py index 72d23911b..57ebed72e 100644 --- a/pytube/query.py +++ b/pytube/query.py @@ -36,6 +36,7 @@ def filter( progressive=None, adaptive=None, is_dash=None, + lang_code=None, custom_filter_functions=None, ): """Apply the given filtering criterion. @@ -113,6 +114,11 @@ def filter( :param bool only_video: Excludes streams with audio tracks. + + :param lang_code: + Includes only given language code. + :type lang_code: + str or None :param custom_filter_functions: (optional) Interface for defining complex filters without @@ -168,6 +174,9 @@ def filter( if adaptive: filters.append(lambda s: s.is_adaptive) + + if lang_code: + filters.append(lambda s: s.language_code == lang_code) if custom_filter_functions: filters.extend(custom_filter_functions) diff --git a/pytube/streams.py b/pytube/streams.py index 179c1aec8..03d8351b4 100644 --- a/pytube/streams.py +++ b/pytube/streams.py @@ -58,18 +58,24 @@ def __init__( # streams return NoneType for audio/video depending. self.video_codec, self.audio_codec = self.parse_codecs() + # This is only for multiple audio track + self.language: Optional[str] = stream.get("audioTrack", {}).get("displayName") + self.audio_id: Optional[str] = stream.get("audioTrack", {}).get("id") + self.language_code: Optional[str] = self.audio_id.split(".")[0] if self.language else None + self.is_default_audio: Optional[bool] = stream.get("audioTrack", {}).get("audioIsDefault", False) + self.is_otf: bool = stream["is_otf"] self.bitrate: Optional[int] = stream["bitrate"] # filesize in bytes self._filesize: Optional[int] = int(stream.get('contentLength', 0)) - + # filesize in kilobytes self._filesize_kb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 * 1000) / 1000) - + # filesize in megabytes self._filesize_mb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 * 1000) / 1000) - + # filesize in gigabytes(fingers crossed we don't need terabytes going forward though) self._filesize_gb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 / 1024 * 1000) / 1000) @@ -121,6 +127,14 @@ def includes_video_track(self) -> bool: """ return self.is_progressive or self.type == "video" + @property + def is_original_language(self) -> bool: + """Whether the stream is original language. + + :rtype: bool + """ + return self.is_default_audio + def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]: """Get the video/audio codecs from list of codecs. @@ -160,7 +174,7 @@ def filesize(self) -> int: raise self._filesize = request.seq_filesize(self.url) return self._filesize - + @property def filesize_kb(self) -> float: """File size of the media stream in kilobytes. @@ -177,7 +191,7 @@ def filesize_kb(self) -> float: raise self._filesize_kb = float(ceil(request.seq_filesize(self.url)/1024 * 1000) / 1000) return self._filesize_kb - + @property def filesize_mb(self) -> float: """File size of the media stream in megabytes. @@ -211,7 +225,7 @@ def filesize_gb(self) -> float: raise self._filesize_gb = float(ceil(request.seq_filesize(self.url)/1024/1024/1024 * 1000) / 1000) return self._filesize_gb - + @property def title(self) -> str: """Get title of video @@ -432,5 +446,7 @@ def __repr__(self) -> str: parts.extend(['vcodec="{s.video_codec}"']) else: parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"']) + if self.language_code: + parts.extend(['lang="{s.language}"', 'code="{s.language_code}"']) parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"']) return f"" diff --git a/pytube/version.py b/pytube/version.py index e2a3cbc7c..97ce67824 100644 --- a/pytube/version.py +++ b/pytube/version.py @@ -1,4 +1,4 @@ -__version__ = "15.0.0" +__version__ = "15.0.6" if __name__ == "__main__": print(__version__)