From 52511714c0e7b4767a1ecae0ab7b02972f063501 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 6 Dec 2019 20:49:45 +0200 Subject: [PATCH 1/2] remote: base: download: don't walk dir twice --- dvc/remote/base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 40ebd086cd..f399c98557 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -588,13 +588,13 @@ def download( def _download_dir( self, from_info, to_info, name, no_progress_bar, file_mode, dir_mode ): - file_to_infos = ( - to_info / file_to_info.relative_to(from_info) - for file_to_info in self.walk_files(from_info) + from_infos = list(self.walk_files(from_info)) + to_infos = ( + to_info / info.relative_to(from_info) + for info in from_infos ) with ThreadPoolExecutor(max_workers=self.JOBS) as executor: - file_from_info = list(self.walk_files(from_info)) download_files = partial( self._download_file, name=name, @@ -603,11 +603,11 @@ def _download_dir( dir_mode=dir_mode, ) futures = executor.map( - download_files, file_from_info, file_to_infos + download_files, from_infos, to_infos ) with Tqdm( futures, - total=len(file_from_info), + total=len(from_infos), desc="Downloading directory", unit="Files", disable=no_progress_bar, From cf171f7ec30b87a495a6a9c7c7c0dedef08eb4f6 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 6 Dec 2019 21:09:53 +0200 Subject: [PATCH 2/2] fixup --- dvc/remote/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index f399c98557..7b58542a2a 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -590,8 +590,7 @@ def _download_dir( ): from_infos = list(self.walk_files(from_info)) to_infos = ( - to_info / info.relative_to(from_info) - for info in from_infos + to_info / info.relative_to(from_info) for info in from_infos ) with ThreadPoolExecutor(max_workers=self.JOBS) as executor: @@ -602,9 +601,7 @@ def _download_dir( file_mode=file_mode, dir_mode=dir_mode, ) - futures = executor.map( - download_files, from_infos, to_infos - ) + futures = executor.map(download_files, from_infos, to_infos) with Tqdm( futures, total=len(from_infos),