From 39e8871cef56602c569fc4c2366fb3685708d47e Mon Sep 17 00:00:00 2001 From: Hyeonseung Lee Date: Wed, 3 Apr 2024 16:00:33 +0900 Subject: [PATCH 1/2] Updated YouTube XML format YouTube XML format seems to be changed recently, thus updated that change in format to the code --- pytube/captions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytube/captions.py b/pytube/captions.py index fe84bec3f..6bd421883 100644 --- a/pytube/captions.py +++ b/pytube/captions.py @@ -83,14 +83,15 @@ def xml_caption_to_srt(self, xml_captions: str) -> str: """ segments = [] root = ElementTree.fromstring(xml_captions) - for i, child in enumerate(list(root)): + body = root.find("body") + for i, child in enumerate(list(body)): text = child.text or "" caption = unescape(text.replace("\n", " ").replace(" ", " "),) try: - duration = float(child.attrib["dur"]) + duration = float(child.attrib["d"]) except KeyError: duration = 0.0 - start = float(child.attrib["start"]) + start = float(child.attrib["t"]) end = start + duration sequence_number = i + 1 # convert from 0-indexed to 1. line = "{seq}\n{start} --> {end}\n{text}\n".format( From c20d3745b5f57af948245a5614040ef3975e6aac Mon Sep 17 00:00:00 2001 From: Hyeonseung Lee Date: Wed, 3 Apr 2024 19:33:13 +0900 Subject: [PATCH 2/2] Update captions.py duration and start times are presented in milliseconds (rather than seconds), thus divide them by 1000 --- pytube/captions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytube/captions.py b/pytube/captions.py index 6bd421883..544f96ed9 100644 --- a/pytube/captions.py +++ b/pytube/captions.py @@ -88,10 +88,10 @@ def xml_caption_to_srt(self, xml_captions: str) -> str: text = child.text or "" caption = unescape(text.replace("\n", " ").replace(" ", " "),) try: - duration = float(child.attrib["d"]) + duration = float(child.attrib["d"])/1000 except KeyError: duration = 0.0 - start = float(child.attrib["t"]) + start = float(child.attrib["t"])/1000 end = start + duration sequence_number = i + 1 # convert from 0-indexed to 1. line = "{seq}\n{start} --> {end}\n{text}\n".format(