From 845cd352f78b7b89b5a63f19529cb8ad445f81ea Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 29 Nov 2019 11:45:45 -0600 Subject: [PATCH 1/2] s3: add support for top level empty directories --- dvc/remote/s3.py | 12 ++++++++++++ tests/unit/remote/test_s3.py | 12 +++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 5594806f03..75084ad513 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -212,6 +212,15 @@ def exists(self, path_info): fname = next(self._list_paths(path_info, max_items=1), "") return path_info.path == fname or fname.startswith(dir_path.path) + def makedirs(self, path_info): + # We need to support creating empty directories, on S3, that means + # creating an object with an empty body and a trailing slash `/`. + # + # We are not creating directory objects for every parent prefix, + # it doesn't make sense. + dir_path = path_info / "" + self.s3.put_object(Bucket=path_info.bucket, Key=dir_path.path, Body="") + def isdir(self, path_info): # S3 doesn't have a concept for directories. # @@ -271,4 +280,7 @@ def _generate_download_url(self, path_info, expires=3600): def walk_files(self, path_info, max_items=None): for fname in self._list_paths(path_info, max_items): + if fname.endswith("/"): + continue + yield path_info.replace(path=fname) diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_s3.py index a82d005c4d..96b7299174 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_s3.py @@ -79,9 +79,11 @@ def test_walk_files(remote): remote.path_info / "data/subdir/1", remote.path_info / "data/subdir/2", remote.path_info / "data/subdir/3", + remote.path_info / "empty_file", + remote.path_info / "foo", ] - assert list(remote.walk_files(remote.path_info / "data")) == files + assert list(remote.walk_files(remote.path_info)) == files def test_copy_preserve_etag_across_buckets(remote): @@ -99,3 +101,11 @@ def test_copy_preserve_etag_across_buckets(remote): to_etag = RemoteS3.get_etag(s3, "another", "foo") assert from_etag == to_etag + + +def makedirs(remote): + empty_dir = remote.path_info / "empty_dir" + remote.remove(empty_dir) + assert not remote.exists(empty_dir) + remote.makedirs(empty_dir) + assert remote.exists(empty_dir) From 1f9a0f0e0fe25f5e17cffb04a404b2bd0deeca7e Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 29 Nov 2019 12:38:12 -0600 Subject: [PATCH 2/2] address @efiop's suggestions --- dvc/remote/s3.py | 4 ++-- tests/unit/remote/test_s3.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 75084ad513..8013f9bb98 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -213,11 +213,11 @@ def exists(self, path_info): return path_info.path == fname or fname.startswith(dir_path.path) def makedirs(self, path_info): - # We need to support creating empty directories, on S3, that means + # We need to support creating empty directories, which means # creating an object with an empty body and a trailing slash `/`. # # We are not creating directory objects for every parent prefix, - # it doesn't make sense. + # as it is not required. dir_path = path_info / "" self.s3.put_object(Bucket=path_info.bucket, Key=dir_path.path, Body="") diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_s3.py index 96b7299174..49fc3dbb89 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_s3.py @@ -103,8 +103,8 @@ def test_copy_preserve_etag_across_buckets(remote): assert from_etag == to_etag -def makedirs(remote): - empty_dir = remote.path_info / "empty_dir" +def test_makedirs(remote): + empty_dir = remote.path_info / "empty_dir" / "" remote.remove(empty_dir) assert not remote.exists(empty_dir) remote.makedirs(empty_dir)