From 66e43f6ecf9bef2c24ede3cef16289a8cf8aa071 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 15 Mar 2021 11:58:04 +0100 Subject: [PATCH] Fix redirect behavior of datasets.utils.download_url (#3564) * use head request for redirects * remove requests dependency --- torchvision/datasets/utils.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index 8b943e414ed..59e507322b2 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -67,18 +67,20 @@ def check_integrity(fpath: str, md5: Optional[str] = None) -> bool: return check_md5(fpath, md5) -def _get_redirect_url(url: str, max_hops: int = 10) -> str: - import requests - - for hop in range(max_hops + 1): - response = requests.get(url) +def _get_redirect_url(url: str, max_hops: int = 3) -> str: + initial_url = url + headers = {"Method": "HEAD", "User-Agent": USER_AGENT} - if response.url == url or response.url is None: - return url + for _ in range(max_hops + 1): + with urllib.request.urlopen(urllib.request.Request(url, headers=headers)) as response: + if response.url == url or response.url is None: + return url - url = response.url + url = response.url else: - raise RecursionError(f"Too many redirects: {max_hops + 1})") + raise RecursionError( + f"Request to {initial_url} exceeded {max_hops} redirects. The last redirect points to {url}." + ) def _get_google_drive_file_id(url: str) -> Optional[str]: