Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement: Enable resume on partially downloaded video stream #1873

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion pytube/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def vid_info(self):
def bypass_age_gate(self):
"""Attempt to update the vid_info by bypassing the age gate."""
innertube = InnerTube(
client='ANDROID_EMBED',
client='ANDROID',
use_oauth=self.use_oauth,
allow_cache=self.allow_oauth_cache
)
Expand Down
18 changes: 9 additions & 9 deletions pytube/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import re
import socket
import sys
from functools import lru_cache
from urllib import parse
from urllib.error import URLError
Expand Down Expand Up @@ -133,17 +134,16 @@ def seq_stream(
def stream(
url,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
max_retries=0
max_retries=0,
start_pos=0
):
"""Read the response in chunks.
:param str url: The URL to perform the GET request for.
:rtype: Iterable[bytes]
"""
file_size: int = default_range_size # fake filesize to start
downloaded = 0
while downloaded < file_size:
stop_pos = min(downloaded + default_range_size, file_size) - 1
range_header = f"bytes={downloaded}-{stop_pos}"
file_size: int = sys.maxsize # fake filesize to start
while start_pos < file_size:
stop_pos = min(start_pos + default_range_size, file_size) - 1
tries = 0

# Attempt to make the request multiple times as necessary.
Expand All @@ -155,7 +155,7 @@ def stream(
# Try to execute the request, ignoring socket timeouts
try:
response = _execute_request(
url + f"&range={downloaded}-{stop_pos}",
url + f"&range={start_pos}-{stop_pos}",
method="GET",
timeout=timeout
)
Expand All @@ -174,7 +174,7 @@ def stream(
break
tries += 1

if file_size == default_range_size:
if file_size == sys.maxsize:
try:
resp = _execute_request(
url + f"&range={0}-{99999999999}",
Expand All @@ -189,7 +189,7 @@ def stream(
chunk = response.read()
if not chunk:
break
downloaded += len(chunk)
start_pos += len(chunk)
yield chunk
return # pylint: disable=R1711

Expand Down
20 changes: 15 additions & 5 deletions pytube/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,19 +305,29 @@ def download(
logger.debug(f'file {file_path} already exists, skipping')
self.on_complete(file_path)
return file_path

bytes_remaining = self.filesize

logger.debug(f'downloading ({self.filesize} total bytes) file to {file_path}')

with open(file_path, "wb") as fh:
downloaded = 0
bytes_remaining = self.filesize
if os.path.isfile(file_path):
partial_download_size = os.path.getsize(file_path)
if partial_download_size % request.default_range_size == 0:
downloaded = partial_download_size
bytes_remaining -= downloaded
logger.debug(f'appending to valid partial download file ({partial_download_size} bytes)')

file_mode = "ab" if downloaded > 0 else "wb"
with open(file_path, file_mode) as fh:
try:
for chunk in request.stream(
self.url,
timeout=timeout,
max_retries=max_retries
max_retries=max_retries,
start_pos=downloaded
):
# reduce the (bytes) remainder by the length of the chunk.
bytes_remaining -= len(chunk)
logger.debug(f"download chunks_left={bytes_remaining//len(chunk)}")
# send to the on_progress callback.
self.on_progress(chunk, fh, bytes_remaining)
except HTTPError as e:
Expand Down