From ce77ae89a5c58089c25790133c69ab8a130cd0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 14 Aug 2020 13:51:17 +0200 Subject: [PATCH] http: replace HEAD with streamed GET --- dvc/tree/http.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/dvc/tree/http.py b/dvc/tree/http.py index dfe6f8bb95..cdc0374be0 100644 --- a/dvc/tree/http.py +++ b/dvc/tree/http.py @@ -122,12 +122,27 @@ def request(self, method, url, **kwargs): except requests.exceptions.RequestException: raise DvcException(f"could not perform a {method} request") + def _head(self, url): + response = self.request("HEAD", url) + if response.ok: + return response + + # Sometimes servers are configured to forbid HEAD requests + # Context: https://github.com/iterative/dvc/issues/4131 + with self.request("GET", url, stream=True) as r: + if r.ok: + return r + + return response + def exists(self, path_info, use_dvcignore=True): - return bool(self.request("HEAD", path_info.url)) + return bool(self._head(path_info.url)) def get_file_hash(self, path_info): url = path_info.url - headers = self.request("HEAD", url).headers + + headers = self._head(url).headers + etag = headers.get("ETag") or headers.get("Content-MD5") if not etag: