From 72abfb0572cc2d45ff364003ca9e817a807f02e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 14 Nov 2019 15:33:40 +0100 Subject: [PATCH 1/6] remote: http: raise exception when download response with error status code --- dvc/exceptions.py | 8 ++++++++ dvc/remote/http.py | 9 +++++++-- tests/func/test_repro.py | 3 ++- tests/unit/remote/test_http.py | 24 ++++++++++++++++++++++++ tests/utils/httpd.py | 4 ++-- 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 1a167117ad..3778be5922 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -348,3 +348,11 @@ def __init__(self, path, external_repo_path, external_repo_url): relpath(path, external_repo_path), external_repo_url ) ) + + +class HTTPErrorStatusCodeException(DvcException): + def __init__(self, code, reason): + super(HTTPErrorStatusCodeException, self).__init__( + "Server responded with error status code: '{}' and message: " + "'{}'".format(code, reason) + ) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index c224166254..d4ff1cd043 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -8,6 +8,7 @@ from dvc.config import Config from dvc.config import ConfigError from dvc.exceptions import DvcException +from dvc.exceptions import HTTPErrorStatusCodeException from dvc.progress import Tqdm from dvc.remote.base import RemoteBASE from dvc.scheme import Schemes @@ -37,7 +38,11 @@ def __init__(self, repo, config): ) def _download(self, from_info, to_file, name=None, no_progress_bar=False): - request = self._request("GET", from_info.url, stream=True) + response = self._request("GET", from_info.url, stream=True) + if response.status_code != 200: + raise HTTPErrorStatusCodeException( + response.status_code, response.reason + ) with Tqdm( total=None if no_progress_bar else self._content_length(from_info), leave=False, @@ -46,7 +51,7 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): disable=no_progress_bar, ) as pbar: with open(to_file, "wb") as fd: - for chunk in request.iter_content(chunk_size=self.CHUNK_SIZE): + for chunk in response.iter_content(chunk_size=self.CHUNK_SIZE): fd.write(chunk) fd.flush() pbar.update(len(chunk)) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index ae0727c144..f6e9a23a9b 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -43,6 +43,7 @@ from tests.func.test_data_cloud import get_ssh_url from tests.func.test_data_cloud import TEST_AWS_REPO_BUCKET from tests.func.test_data_cloud import TEST_GCP_REPO_BUCKET +from tests.utils.httpd import ContentMD5Handler from tests.utils.httpd import StaticFileServer @@ -1176,7 +1177,7 @@ def test(self): self.dvc.remove("imported_file.dvc") - with StaticFileServer(handler="Content-MD5") as httpd: + with StaticFileServer(handler_class=ContentMD5Handler) as httpd: import_url = urljoin(self.get_remote(httpd.server_port), self.FOO) import_output = "imported_file" import_stage = self.dvc.imp_url(import_url, import_output) diff --git a/tests/unit/remote/test_http.py b/tests/unit/remote/test_http.py index 2b1ecdb07a..d1c2a5efae 100644 --- a/tests/unit/remote/test_http.py +++ b/tests/unit/remote/test_http.py @@ -1,7 +1,11 @@ import pytest +from BaseHTTPServer import BaseHTTPRequestHandler from dvc.config import ConfigError +from dvc.exceptions import HTTPErrorStatusCodeException +from dvc.path_info import URLInfo from dvc.remote.http import RemoteHTTP +from tests.utils.httpd import StaticFileServer def test_no_traverse_compatibility(dvc_repo): @@ -13,3 +17,23 @@ def test_no_traverse_compatibility(dvc_repo): with pytest.raises(ConfigError): RemoteHTTP(dvc_repo, config) + + +@pytest.mark.parametrize("response_code", [404, 403, 500]) +def test_download_fails_on_error_code(response_code, dvc_repo): + class ErrorStatusRequestHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(response_code, message="Error message") + self.end_headers() + + with StaticFileServer(ErrorStatusRequestHandler) as httpd: + url = "http://localhost:{}/".format(httpd.server_port) + config = {"url": url} + + remote = RemoteHTTP(dvc_repo, config) + import os + + with pytest.raises(HTTPErrorStatusCodeException): + remote._download( + URLInfo(os.path.join(url, "file.txt")), "file.txt" + ) diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py index ff902e71a1..5883db7b7a 100644 --- a/tests/utils/httpd.py +++ b/tests/utils/httpd.py @@ -45,9 +45,9 @@ class ContentMD5Handler(TestRequestHandler): class StaticFileServer: _lock = threading.Lock() - def __init__(self, handler="etag"): + def __init__(self, handler_class=ETagHandler): self._lock.acquire() - handler_class = ETagHandler if handler == "etag" else ContentMD5Handler + self.response_handler = handler_class self._httpd = HTTPServer(("localhost", 0), handler_class) self._thread = None From 7218ff202cb2d9043b1f0d6dca496ea793fbae83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 15 Nov 2019 11:08:22 +0100 Subject: [PATCH 2/6] remote: http: rename http error, refactor test --- dvc/exceptions.py | 7 +++---- dvc/remote/http.py | 7 ++----- tests/func/test_repro.py | 3 +-- tests/unit/remote/test_http.py | 20 ++++++++++---------- tests/utils/httpd.py | 1 - 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 3778be5922..3ad8c72b8f 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -350,9 +350,8 @@ def __init__(self, path, external_repo_path, external_repo_url): ) -class HTTPErrorStatusCodeException(DvcException): +class HTTPError(DvcException): def __init__(self, code, reason): - super(HTTPErrorStatusCodeException, self).__init__( - "Server responded with error status code: '{}' and message: " - "'{}'".format(code, reason) + super(HTTPError, self).__init__( + "HTTP error: '{} {}'".format(code, reason) ) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index d4ff1cd043..4d5d15faa8 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -7,8 +7,7 @@ from dvc.config import Config from dvc.config import ConfigError -from dvc.exceptions import DvcException -from dvc.exceptions import HTTPErrorStatusCodeException +from dvc.exceptions import DvcException, HTTPError from dvc.progress import Tqdm from dvc.remote.base import RemoteBASE from dvc.scheme import Schemes @@ -40,9 +39,7 @@ def __init__(self, repo, config): def _download(self, from_info, to_file, name=None, no_progress_bar=False): response = self._request("GET", from_info.url, stream=True) if response.status_code != 200: - raise HTTPErrorStatusCodeException( - response.status_code, response.reason - ) + raise HTTPError(response.status_code, response.reason) with Tqdm( total=None if no_progress_bar else self._content_length(from_info), leave=False, diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index f6e9a23a9b..4e46fdde90 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -43,8 +43,7 @@ from tests.func.test_data_cloud import get_ssh_url from tests.func.test_data_cloud import TEST_AWS_REPO_BUCKET from tests.func.test_data_cloud import TEST_GCP_REPO_BUCKET -from tests.utils.httpd import ContentMD5Handler -from tests.utils.httpd import StaticFileServer +from tests.utils.httpd import StaticFileServer, ContentMD5Handler class TestRepro(TestDvc): diff --git a/tests/unit/remote/test_http.py b/tests/unit/remote/test_http.py index d1c2a5efae..924a05ae24 100644 --- a/tests/unit/remote/test_http.py +++ b/tests/unit/remote/test_http.py @@ -1,8 +1,12 @@ +try: + from http.server import BaseHTTPRequestHandler +except ImportError: + from BaseHTTPServer import BaseHTTPRequestHandler + import pytest -from BaseHTTPServer import BaseHTTPRequestHandler from dvc.config import ConfigError -from dvc.exceptions import HTTPErrorStatusCodeException +from dvc.exceptions import HTTPError from dvc.path_info import URLInfo from dvc.remote.http import RemoteHTTP from tests.utils.httpd import StaticFileServer @@ -19,11 +23,10 @@ def test_no_traverse_compatibility(dvc_repo): RemoteHTTP(dvc_repo, config) -@pytest.mark.parametrize("response_code", [404, 403, 500]) -def test_download_fails_on_error_code(response_code, dvc_repo): +def test_download_fails_on_error_code(dvc_repo): class ErrorStatusRequestHandler(BaseHTTPRequestHandler): def do_GET(self): - self.send_response(response_code, message="Error message") + self.send_response(404, message="Not found") self.end_headers() with StaticFileServer(ErrorStatusRequestHandler) as httpd: @@ -31,9 +34,6 @@ def do_GET(self): config = {"url": url} remote = RemoteHTTP(dvc_repo, config) - import os - with pytest.raises(HTTPErrorStatusCodeException): - remote._download( - URLInfo(os.path.join(url, "file.txt")), "file.txt" - ) + with pytest.raises(HTTPError): + remote._download(URLInfo(url) / "file.txt", "file.txt") diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py index 5883db7b7a..92027ae751 100644 --- a/tests/utils/httpd.py +++ b/tests/utils/httpd.py @@ -47,7 +47,6 @@ class StaticFileServer: def __init__(self, handler_class=ETagHandler): self._lock.acquire() - self.response_handler = handler_class self._httpd = HTTPServer(("localhost", 0), handler_class) self._thread = None From e084b427e3c434ca1ecc89c751ccb387d492d4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 21 Nov 2019 17:27:59 +0100 Subject: [PATCH 3/6] Update dvc/remote/http.py Co-Authored-By: Alexander Schepanovski --- dvc/remote/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index 4d5d15faa8..e9ec4f8885 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -41,7 +41,7 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): if response.status_code != 200: raise HTTPError(response.status_code, response.reason) with Tqdm( - total=None if no_progress_bar else self._content_length(from_info), + total=None if no_progress_bar else self._content_length(response), leave=False, bytes=True, desc=from_info.url if name is None else name, From c102a781c248e6ee3b4f54bff1dbda36daa2ac32 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 21 Nov 2019 17:45:29 +0100 Subject: [PATCH 4/6] HTTPError: reformat error message --- dvc/exceptions.py | 4 +--- dvc/remote/http.py | 1 - tests/unit/remote/test_http.py | 12 +----------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 3ad8c72b8f..62c0a34669 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -352,6 +352,4 @@ def __init__(self, path, external_repo_path, external_repo_url): class HTTPError(DvcException): def __init__(self, code, reason): - super(HTTPError, self).__init__( - "HTTP error: '{} {}'".format(code, reason) - ) + super(HTTPError, self).__init__("'{} {}'".format(code, reason)) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index e9ec4f8885..bf7e93adb2 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -50,7 +50,6 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): with open(to_file, "wb") as fd: for chunk in response.iter_content(chunk_size=self.CHUNK_SIZE): fd.write(chunk) - fd.flush() pbar.update(len(chunk)) def exists(self, path_info): diff --git a/tests/unit/remote/test_http.py b/tests/unit/remote/test_http.py index 924a05ae24..c0d3bd42be 100644 --- a/tests/unit/remote/test_http.py +++ b/tests/unit/remote/test_http.py @@ -1,8 +1,3 @@ -try: - from http.server import BaseHTTPRequestHandler -except ImportError: - from BaseHTTPServer import BaseHTTPRequestHandler - import pytest from dvc.config import ConfigError @@ -24,12 +19,7 @@ def test_no_traverse_compatibility(dvc_repo): def test_download_fails_on_error_code(dvc_repo): - class ErrorStatusRequestHandler(BaseHTTPRequestHandler): - def do_GET(self): - self.send_response(404, message="Not found") - self.end_headers() - - with StaticFileServer(ErrorStatusRequestHandler) as httpd: + with StaticFileServer() as httpd: url = "http://localhost:{}/".format(httpd.server_port) config = {"url": url} From f7846a5bed646596e169250d5260e8ba1cab2cbb Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 21 Nov 2019 17:50:03 +0100 Subject: [PATCH 5/6] test: remote: http: clear test file name --- dvc/remote/http.py | 2 +- tests/unit/remote/test_http.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index bf7e93adb2..b93adc3951 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -41,7 +41,7 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): if response.status_code != 200: raise HTTPError(response.status_code, response.reason) with Tqdm( - total=None if no_progress_bar else self._content_length(response), + total=None if no_progress_bar else self._content_length(from_info), leave=False, bytes=True, desc=from_info.url if name is None else name, diff --git a/tests/unit/remote/test_http.py b/tests/unit/remote/test_http.py index c0d3bd42be..dbd54bcd15 100644 --- a/tests/unit/remote/test_http.py +++ b/tests/unit/remote/test_http.py @@ -26,4 +26,4 @@ def test_download_fails_on_error_code(dvc_repo): remote = RemoteHTTP(dvc_repo, config) with pytest.raises(HTTPError): - remote._download(URLInfo(url) / "file.txt", "file.txt") + remote._download(URLInfo(url) / "missing.txt", "missing.txt") From 03c27192a6bb6172ad819ee39a3a787b57100714 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 27 Nov 2019 11:57:06 +0100 Subject: [PATCH 6/6] remote: http: calculate length basing on response and not head call --- dvc/remote/http.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index b93adc3951..98a1e50f55 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -41,7 +41,7 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): if response.status_code != 200: raise HTTPError(response.status_code, response.reason) with Tqdm( - total=None if no_progress_bar else self._content_length(from_info), + total=None if no_progress_bar else self._content_length(response), leave=False, bytes=True, desc=from_info.url if name is None else name, @@ -55,13 +55,8 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False): def exists(self, path_info): return bool(self._request("HEAD", path_info.url)) - def _content_length(self, url_or_request): - headers = getattr( - url_or_request, - "headers", - self._request("HEAD", url_or_request).headers, - ) - res = headers.get("Content-Length") + def _content_length(self, response): + res = response.headers.get("Content-Length") return int(res) if res else None def get_file_checksum(self, path_info):