Skip to content

Commit

Permalink
Add pattern matching to list_files().
Browse files Browse the repository at this point in the history
Not as useful as it could be, since the b2/s3 apis don't really support
it. But should come in handy regardless.
  • Loading branch information
sz3 committed Feb 1, 2020
1 parent 0436deb commit 2fe95c9
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 13 deletions.
8 changes: 6 additions & 2 deletions pog/fs/b2fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def remove_file(self, remote_path):
res = _run_command('delete-file-version', remote_path, file_id)
print(res)

def list_files(self, remote_path='', recursive=False):
def list_files(self, remote_path='', pattern=None, recursive=False):
# maybe handle wildcards too... e.g. "*.mfn"
recursive_arg = ['--recursive'] if recursive else []
path_arg = [remote_path] if remote_path else []
Expand All @@ -63,4 +63,8 @@ def list_files(self, remote_path='', recursive=False):
res = _run_command(*args)
if not res:
return []
return res.split()

res = res.split()
if pattern:
res = [f for f in res if self._match(f, pattern)]
return res
9 changes: 6 additions & 3 deletions pog/fs/localfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ def remove_file(self, remote_path):
p = Path(self.root, remote_path)
remove(p.resolve())

def list_files(self, remote_path='', recursive=False):
globstr = '**/*' if recursive else '*'
def list_files(self, remote_path='', pattern=None, recursive=False):
globstr = pattern or '*'
if recursive:
globstr = f'**/{globstr}'

p = Path(self.root, remote_path)
files = [str(f) for f in p.glob(globstr)]
files = [f'{i}/' if i.is_dir() else str(i) for i in p.glob(globstr)]
return sorted(files)
10 changes: 9 additions & 1 deletion pog/fs/pogfs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from fnmatch import fnmatch
from os.path import basename

'''
Implemented per cloud storage service
'''
Expand All @@ -16,9 +19,14 @@ def download_file(self, local_path, remote_path):
def remove_file(self, remote_path):
raise NotImplementedError()

def list_files(self, remote_path='', recursive=False):
def list_files(self, remote_path='', pattern=None, recursive=False):
raise NotImplementedError()

def _match(self, path, pattern):
if path.endswith('/'): # we want to return directories
return True
return fnmatch(basename(path), pattern)


def get_cloud_fs(fs):
FS = {
Expand Down
11 changes: 8 additions & 3 deletions pog/fs/s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def remove_file(self, remote_path):
s3 = boto3.client('s3')
s3.delete_object(Bucket=self.bucket_name, Key=remote_path)

def list_files(self, remote_path='', recursive=False):
def list_files(self, remote_path='', pattern=None, recursive=False):
s3 = boto3.client('s3')
pager = s3.get_paginator("list_objects_v2")

Expand All @@ -48,5 +48,10 @@ def list_files(self, remote_path='', recursive=False):
kwargs['Delimiter'] = '/'

for p in pager.paginate(**kwargs):
for e in p.get('Contents', []):
yield e['Key']
for d in p.get('CommonPrefixes', []):
yield d['Prefix']
for f in p.get('Contents', []):
filename = f['Key']
if pattern and not self._match(filename, pattern):
continue
yield filename
5 changes: 5 additions & 0 deletions tests/test_b2fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ def test_list_files_empty(self, mock_run):
mock_run.return_value = b''
self.assertEqual(self.fs.list_files('path/to/nowhere', recursive=False), [])
mock_run.assert_called_once_with(['b2', 'ls', 'bucket', 'path/to/nowhere'])

def test_list_files_pattern(self, mock_run):
mock_run.return_value = EX_LS
self.assertEqual(self.fs.list_files(pattern='*.txt'), ['data/', 'file.txt'])
mock_run.assert_called_once_with(['b2', 'ls', 'bucket'])
4 changes: 2 additions & 2 deletions tests/test_cloud_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_cleanup_dryrun(self):
f'{self.working_dir.name}/1.mfn',
f'{self.working_dir.name}/2.mfn',
f'{self.working_dir.name}/3.mfn',
f'{self.working_dir.name}/data',
f'{self.working_dir.name}/data/',
f'{self.working_dir.name}/data/US-1DnY1AVF1huiGj10G9SEGwCHa4GVxJcBnaCuAcXk=',
f'{self.working_dir.name}/data/uselessblob',
])
Expand All @@ -46,6 +46,6 @@ def test_cleanup_for_real(self):

self.assertEqual(self.fs.list_files(recursive=True), [
f'{self.working_dir.name}/3.mfn',
f'{self.working_dir.name}/data',
f'{self.working_dir.name}/data/',
f'{self.working_dir.name}/data/US-1DnY1AVF1huiGj10G9SEGwCHa4GVxJcBnaCuAcXk=',
])
23 changes: 21 additions & 2 deletions tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@ def test_list_files_defaults(self, mock_boto):
mock_boto.client.return_value = mock_boto
mock_boto.get_paginator.return_value = mock_boto
mock_boto.paginate.side_effect = [
[{'Contents': [{'Key': 'abc'}, {'Key': 'def'}]}],
[{
'CommonPrefixes': [{'Prefix': 'dir/'}],
'Contents': [{'Key': 'abc'}, {'Key': 'def'}],
}],
]

self.assertEqual(list(self.fs.list_files()), ['abc', 'def'])
self.assertEqual(list(self.fs.list_files()), ['dir/', 'abc', 'def'])

mock_boto.client.assert_called_once_with('s3')
mock_boto.get_paginator.assert_called_once_with('list_objects_v2')
Expand All @@ -90,3 +93,19 @@ def test_list_files_subdir(self, mock_boto):
mock_boto.client.assert_called_once_with('s3')
mock_boto.get_paginator.assert_called_once_with('list_objects_v2')
mock_boto.paginate.assert_called_once_with(Bucket='bucket', Prefix='path/to/files')

def test_list_files_pattern(self, mock_boto):
mock_boto.client.return_value = mock_boto
mock_boto.get_paginator.return_value = mock_boto
mock_boto.paginate.side_effect = [
[{
'CommonPrefixes': [{'Prefix': 'dir/'}],
'Contents': [{'Key': 'file.txt'}, {'Key': 'other.jpg'}],
}],
]

self.assertEqual(list(self.fs.list_files(pattern='*.txt')), ['dir/', 'file.txt'])

mock_boto.client.assert_called_once_with('s3')
mock_boto.get_paginator.assert_called_once_with('list_objects_v2')
mock_boto.paginate.assert_called_once_with(Bucket='bucket', Prefix='', Delimiter='/')

0 comments on commit 2fe95c9

Please sign in to comment.