From 274fa1ca901c0e933b6240e691ad67315159daed Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 2 May 2020 19:19:55 +0100 Subject: [PATCH 1/6] wip --- dvc/remote/gdrive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 4e004b53d4..b7688a166a 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -403,7 +403,7 @@ def _gdrive_download_file( with Tqdm( bar_format=bar_format, desc=progress_desc, disable=no_progress_bar ): - gdrive_file.GetContentFile(to_file) + gdrive_file.GetContentFile(to_file) # TODO: actually use pbar @_gdrive_retry def _gdrive_delete_file(self, item_id): From b6d84f81ec39ee05898e15876db46e2887afbe2f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 2 May 2020 19:38:38 +0100 Subject: [PATCH 2/6] gdrive: add progress Part of #2865 See https://github.com/iterative/dvc/issues/2865#issuecomment-622984785 --- dvc/remote/gdrive.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index b7688a166a..1095300164 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -396,14 +396,34 @@ def _gdrive_download_file( param = {"id": item_id} # it does not create a file on the remote gdrive_file = self._drive.CreateFile(param) - bar_format = ( - "Downloading {desc:{ncols_desc}.{ncols_desc}}... " - + Tqdm.format_sizeof(int(gdrive_file["fileSize"]), "B", 1024) - ) - with Tqdm( - bar_format=bar_format, desc=progress_desc, disable=no_progress_bar - ): - gdrive_file.GetContentFile(to_file) # TODO: actually use pbar + + import httplib2 + + OrigClass = httplib2.HTTPConnectionWithTimeout.response_class + + class Custom(OrigClass): + def _readall_chunked(self): + assert self.chunked != "UNKNOWN" + value = [] + with Tqdm( + total=int(gdrive_file["fileSize"]), + desc=progress_desc, + disable=no_progress_bar, + bytes=True, + ) as pbar: + while True: + chunk_left = self._get_chunk_left() + if chunk_left is None: + break + chunk = self._safe_read(chunk_left) + value.append(chunk) + pbar.update(len(chunk)) + self.chunk_left = 0 + return b"".join(value) + + httplib2.HTTPConnectionWithTimeout.response_class = Custom + gdrive_file.GetContentFile(to_file) + httplib2.HTTPConnectionWithTimeout.response_class = OrigClass @_gdrive_retry def _gdrive_delete_file(self, item_id): From 400c31af13c3978e01e036d7857937233bb8cb37 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 19:22:41 +0100 Subject: [PATCH 3/6] gdrive: move towards next pydrive2 release - depends on https://github.com/iterative/PyDrive2/pull/30 --- dvc/remote/gdrive.py | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 1095300164..ede722e452 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -397,33 +397,13 @@ def _gdrive_download_file( # it does not create a file on the remote gdrive_file = self._drive.CreateFile(param) - import httplib2 - - OrigClass = httplib2.HTTPConnectionWithTimeout.response_class - - class Custom(OrigClass): - def _readall_chunked(self): - assert self.chunked != "UNKNOWN" - value = [] - with Tqdm( - total=int(gdrive_file["fileSize"]), - desc=progress_desc, - disable=no_progress_bar, - bytes=True, - ) as pbar: - while True: - chunk_left = self._get_chunk_left() - if chunk_left is None: - break - chunk = self._safe_read(chunk_left) - value.append(chunk) - pbar.update(len(chunk)) - self.chunk_left = 0 - return b"".join(value) - - httplib2.HTTPConnectionWithTimeout.response_class = Custom - gdrive_file.GetContentFile(to_file) - httplib2.HTTPConnectionWithTimeout.response_class = OrigClass + with Tqdm( + total=int(gdrive_file["fileSize"]), + desc=progress_desc, + disable=no_progress_bar, + bytes=True, + ) as pbar: + gdrive_file.GetContentFile(to_file, callback=pbar.update_to) @_gdrive_retry def _gdrive_delete_file(self, item_id): From 58490e055f58371f51668bfbde8927568cc84f99 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 21:50:33 +0100 Subject: [PATCH 4/6] update to latest pydrive>=1.4.11 --- dvc/remote/gdrive.py | 15 ++------------- setup.py | 2 +- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index ede722e452..1a51651961 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -44,17 +44,6 @@ def __init__(self, cred_location): super().__init__(message) -def _extract(exc, field): - from pydrive2.files import ApiRequestError - - assert isinstance(exc, ApiRequestError) - - # https://cloud.google.com/storage/docs/json_api/v1/status-codes#errorformat - return ( - exc.error["errors"][0].get(field, "") if "errors" in exc.error else "" - ) - - def _gdrive_retry(func): def should_retry(exc): from pydrive2.files import ApiRequestError @@ -68,7 +57,7 @@ def should_retry(exc): result = True if error_code == 403: - result = _extract(exc, "reason") in [ + result = exc.GetField("reason") in [ "userRateLimitExceeded", "rateLimitExceeded", ] @@ -420,7 +409,7 @@ def _gdrive_delete_file(self, item_id): if ( http_error_code == 403 and self._list_params["corpora"] == "drive" - and _extract(exc, "location") == "file.permissions" + and exc.GetField("location") == "file.permissions" ): raise DvcException( "Insufficient permissions to {}. You should have {} " diff --git a/setup.py b/setup.py index e27b30843e..3ede96a2aa 100644 --- a/setup.py +++ b/setup.py @@ -87,7 +87,7 @@ def run(self): # Extra dependencies for remote integrations gs = ["google-cloud-storage==1.19.0"] -gdrive = ["pydrive2>=1.4.10"] +gdrive = ["pydrive2>=1.4.11"] s3 = ["boto3>=1.9.201"] azure = ["azure-storage-blob==2.1.0"] oss = ["oss2==2.6.1"] From 90b222805f456a58ecd181d327464557734fb448 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 23:26:54 +0100 Subject: [PATCH 5/6] avoid unneeded API call --- dvc/remote/gdrive.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 1a51651961..c4ad491995 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -387,10 +387,7 @@ def _gdrive_download_file( gdrive_file = self._drive.CreateFile(param) with Tqdm( - total=int(gdrive_file["fileSize"]), - desc=progress_desc, - disable=no_progress_bar, - bytes=True, + desc=progress_desc, disable=no_progress_bar, bytes=True, ) as pbar: gdrive_file.GetContentFile(to_file, callback=pbar.update_to) From 7a6fe7720bfd47ab2c81c2ff0fdc593bca89e026 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 23:36:48 +0100 Subject: [PATCH 6/6] progress: gdrive: ensure proper bar_format --- dvc/remote/gdrive.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index c4ad491995..6ce88eb057 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -387,7 +387,11 @@ def _gdrive_download_file( gdrive_file = self._drive.CreateFile(param) with Tqdm( - desc=progress_desc, disable=no_progress_bar, bytes=True, + desc=progress_desc, + disable=no_progress_bar, + bytes=True, + # explicit `bar_format` as `total` will be set by `update_to` + bar_format=Tqdm.BAR_FMT_DEFAULT, ) as pbar: gdrive_file.GetContentFile(to_file, callback=pbar.update_to)