pytube · MemoKing34 · Dec 2, 2023 · Dec 17, 2023 · Dec 17, 2023 · Dec 17, 2023
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 15.0.0
+current_version = 15.0.6
 commit = True
 tag = True
 

diff --git a/README.md b/README.md
@@ -2,11 +2,6 @@
   <p>
     <a href="#"><img src="https://assets.nickficano.com/gh-pytube.min.svg" width="456" height="143" alt="pytube logo" /></a>
   </p>
-  <p align="center">
-	<a href="https://pypi.org/project/pytube/"><img src="https://img.shields.io/pypi/dm/pytube?style=flat-square" alt="pypi"/></a>
-	<a href="https://pytube.io/en/latest/"><img src="https://readthedocs.org/projects/python-pytube/badge/?version=latest&style=flat-square" /></a>
-	<a href="https://pypi.org/project/pytube/"><img src="https://img.shields.io/pypi/v/pytube?style=flat-square" /></a>
-  </p>
 </div>
 
 ### Actively soliciting contributors!
@@ -52,16 +47,10 @@ This guide covers the most basic usage of the library. For more detailed informa
 
 Pytube requires an installation of Python 3.6 or greater, as well as pip. (Pip is typically bundled with Python [installations](https://python.org/downloads).)
 
-To install from PyPI with pip:
-
-```bash
-$ python -m pip install pytube
-```
-
-Sometimes, the PyPI release becomes slightly outdated. To install from the source with pip:
+To install from the source with pip:
 
 ```bash
-$ python -m pip install git+https://github.com/pytube/pytube
+$ python -m pip install git+https://github.com/MemoKing34/pytube
 ```
 
 ### Using pytube in a Python script

diff --git a/pytube/__main__.py b/pytube/__main__.py
@@ -7,7 +7,7 @@
 
 """
 import logging
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import pytube
 import pytube.exceptions as exceptions
@@ -61,6 +61,9 @@ def __init__(
         self._embed_html: Optional[str] = None
         self._player_config_args: Optional[Dict] = None  # inline js in the html containing
         self._age_restricted: Optional[bool] = None
+        self._has_multiple_audiotrack: Optional[bool] = None
+        self._language: Optional[str] = None       # These are for multiple audio track
+        self._language_code: Optional[str] = None  # These are for multiple audio track
 
         self._fmt_streams: Optional[List[Stream]] = None
 
@@ -96,26 +99,52 @@ def __eq__(self, o: object) -> bool:
         return type(o) == type(self) and o.watch_url == self.watch_url
 
     @property
-    def watch_html(self):
+    def watch_html(self) -> Optional[str]:
         if self._watch_html:
             return self._watch_html
         self._watch_html = request.get(url=self.watch_url)
         return self._watch_html
 
     @property
-    def embed_html(self):
+    def embed_html(self) -> Optional[str]:
         if self._embed_html:
             return self._embed_html
         self._embed_html = request.get(url=self.embed_url)
         return self._embed_html
 
     @property
-    def age_restricted(self):
+    def age_restricted(self) -> Optional[bool]:
         if self._age_restricted:
             return self._age_restricted
         self._age_restricted = extract.is_age_restricted(self.watch_html)
         return self._age_restricted
 
+    @property
+    def has_multiple_audiotrack(self) -> Optional[bool]:
+        if self._has_multiple_audiotrack:
+            return self._has_multiple_audiotrack
+        self._has_multiple_audiotrack = extract.has_multiple_audiotrack(self.watch_html)
+        return self._has_multiple_audiotrack
+
+    @property
+    def language(self) -> Optional[str]:
+        if (not self.has_multiple_audiotrack) or self._language:
+            return self._language
+        tracks: List[Dict[str, Union[str, bool]]] = [format.get("audioTrack") for format in self.vid_info.get("streamingData", {}).get("adaptiveFormats", []) if format.get("audioTrack", False)]
+        for track in tracks:
+            if track.get("audioIsDefault", False):
+                self._language = track.get("displayName")
+                self._language_code = track.get("id").split(".")[0]
+        return self._language
+
+    @property
+    def language_code(self) -> Optional[str]:
+        if self.language or self._language_code:
+            return self._language_code
+        self.language
+        return self._language_code
+
+
     @property
     def js_url(self):
         if self._js_url:
@@ -466,14 +495,14 @@ def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], Non
         """
         self.stream_monostate.on_complete = func
 
-    @staticmethod
-    def from_id(video_id: str) -> "YouTube":
+    @classmethod
+    def from_id(cls, video_id: str) -> "YouTube":
         """Construct a :class:`YouTube <YouTube>` object from a video id.
 
         :param str video_id:
             The video id of the YouTube video.
 
         :rtype: :class:`YouTube <YouTube>`
-        
+
         """
-        return YouTube(f"https://www.youtube.com/watch?v={video_id}")
+        return cls(f"https://www.youtube.com/watch?v={video_id}")
diff --git a/pytube/captions.py b/pytube/captions.py
@@ -82,24 +82,27 @@ def xml_caption_to_srt(self, xml_captions: str) -> str:
             XML formatted caption tracks.
         """
         segments = []
-        root = ElementTree.fromstring(xml_captions)
-        for i, child in enumerate(list(root)):
-            text = child.text or ""
-            caption = unescape(text.replace("\n", " ").replace("  ", " "),)
-            try:
-                duration = float(child.attrib["dur"])
-            except KeyError:
-                duration = 0.0
-            start = float(child.attrib["start"])
-            end = start + duration
-            sequence_number = i + 1  # convert from 0-indexed to 1.
-            line = "{seq}\n{start} --> {end}\n{text}\n".format(
-                seq=sequence_number,
-                start=self.float_to_srt_time_format(start),
-                end=self.float_to_srt_time_format(end),
-                text=caption,
-            )
-            segments.append(line)
+        root = ElementTree.fromstring(xml_captions)[0]
+        i=0
+        for child in list(root):
+            if child.tag == 'p':
+                caption = child.text
+                caption = unescape(caption.replace("\n", " ").replace("  ", " "),)
+                try:
+                    duration = float(child.attrib["d"])/1000.0
+                except KeyError:
+                    duration = 0.0
+                start = float(child.attrib["t"])/1000.0
+                end = start + duration
+                sequence_number = i + 1  # convert from 0-indexed to 1.
+                line = "{seq}\n{start} --> {end}\n{text}\n".format(
+                    seq=sequence_number,
+                    start=self.float_to_srt_time_format(start),
+                    end=self.float_to_srt_time_format(end),
+                    text=caption,
+                )
+                segments.append(line)
+                i += 1
         return "\n".join(segments).strip()
 
     def download(

diff --git a/pytube/extract.py b/pytube/extract.py
@@ -89,6 +89,22 @@ def is_age_restricted(watch_html: str) -> bool:
     return True
 
 
+def has_multiple_audiotrack(watch_html: str) -> bool:
+    """Check if content has multiple audio tracks
+
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content has multiple audio track
+    """
+    try:
+        regex_search(r"displayName", watch_html, group=0)
+    except RegexMatchError:
+        return False
+    return True
+
+
 def playability_status(watch_html: str) -> (str, str):
     """Return the playability status and status explanation of a video.
 

diff --git a/pytube/query.py b/pytube/query.py
@@ -36,6 +36,7 @@ def filter(
         progressive=None,
         adaptive=None,
         is_dash=None,
+        lang_code=None,
         custom_filter_functions=None,
     ):
         """Apply the given filtering criterion.
@@ -113,6 +114,11 @@ def filter(
 
         :param bool only_video:
             Excludes streams with audio tracks.
+
+        :param lang_code:
+            Includes only given language code.
+        :type lang_code:
+            str or None
 
         :param custom_filter_functions:
             (optional) Interface for defining complex filters without
@@ -168,6 +174,9 @@ def filter(
 
         if adaptive:
             filters.append(lambda s: s.is_adaptive)
+
+        if lang_code:
+            filters.append(lambda s: s.language_code == lang_code)
 
         if custom_filter_functions:
             filters.extend(custom_filter_functions)

diff --git a/pytube/streams.py b/pytube/streams.py
@@ -58,18 +58,24 @@ def __init__(
         # streams return NoneType for audio/video depending.
         self.video_codec, self.audio_codec = self.parse_codecs()
 
+        # This is only for multiple audio track
+        self.language: Optional[str] = stream.get("audioTrack", {}).get("displayName")
+        self.audio_id: Optional[str] = stream.get("audioTrack", {}).get("id")
+        self.language_code: Optional[str] = self.audio_id.split(".")[0] if self.language else None
+        self.is_default_audio: Optional[bool] = stream.get("audioTrack", {}).get("audioIsDefault", False)
+
         self.is_otf: bool = stream["is_otf"]
         self.bitrate: Optional[int] = stream["bitrate"]
 
         # filesize in bytes
         self._filesize: Optional[int] = int(stream.get('contentLength', 0))
-        
+
         # filesize in kilobytes
         self._filesize_kb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 * 1000) / 1000)
-        
+
         # filesize in megabytes
         self._filesize_mb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 * 1000) / 1000)
-        
+
         # filesize in gigabytes(fingers crossed we don't need terabytes going forward though)
         self._filesize_gb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 / 1024 * 1000) / 1000)
 
@@ -121,6 +127,14 @@ def includes_video_track(self) -> bool:
         """
         return self.is_progressive or self.type == "video"
 
+    @property
+    def is_original_language(self) -> bool:
+        """Whether the stream is original language.
+
+        :rtype: bool
+        """
+        return self.is_default_audio
+
     def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]:
         """Get the video/audio codecs from list of codecs.
 
@@ -160,7 +174,7 @@ def filesize(self) -> int:
                     raise
                 self._filesize = request.seq_filesize(self.url)
         return self._filesize
-    
+
     @property
     def filesize_kb(self) -> float:
         """File size of the media stream in kilobytes.
@@ -177,7 +191,7 @@ def filesize_kb(self) -> float:
                     raise
                 self._filesize_kb = float(ceil(request.seq_filesize(self.url)/1024 * 1000) / 1000)
         return self._filesize_kb
-    
+
     @property
     def filesize_mb(self) -> float:
         """File size of the media stream in megabytes.
@@ -211,7 +225,7 @@ def filesize_gb(self) -> float:
                     raise
                 self._filesize_gb = float(ceil(request.seq_filesize(self.url)/1024/1024/1024 * 1000) / 1000)
         return self._filesize_gb
-    
+
     @property
     def title(self) -> str:
         """Get title of video
@@ -432,5 +446,7 @@ def __repr__(self) -> str:
                 parts.extend(['vcodec="{s.video_codec}"'])
         else:
             parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"'])
+            if self.language_code:
+                parts.extend(['lang="{s.language}"', 'code="{s.language_code}"'])
         parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"'])
         return f"<Stream: {' '.join(parts).format(s=self)}>"
diff --git a/pytube/version.py b/pytube/version.py
@@ -1,4 +1,4 @@
-__version__ = "15.0.0"
+__version__ = "15.0.6"
 
 if __name__ == "__main__":
     print(__version__)