From 44aacf630c084b0b04fd05d281523c1e4d9a2fd9 Mon Sep 17 00:00:00 2001 From: Matthieu Monsch Date: Tue, 7 Apr 2015 14:18:26 -0700 Subject: [PATCH] Add client `list` method. --- hdfs/__init__.py | 2 +- hdfs/client.py | 21 +++++++++++++++++++++ test/test_client.py | 24 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/hdfs/__init__.py b/hdfs/__init__.py index 7b49bbb..67c9b41 100644 --- a/hdfs/__init__.py +++ b/hdfs/__init__.py @@ -3,7 +3,7 @@ """HdfsCLI.""" -__version__ = '0.5.1' +__version__ = '0.5.2' import logging as lg try: diff --git a/hdfs/client.py b/hdfs/client.py index ca13d0c..588bb6c 100644 --- a/hdfs/client.py +++ b/hdfs/client.py @@ -566,6 +566,27 @@ def rename(self, hdfs_src_path, hdfs_dst_path): self.resolve(hdfs_src_path), hdfs_dst_path ) + def list(self, hdfs_path): + """Return status of files contained in a remote folder. + + :param hdfs_path: Remote path to a directory. If `hdfs_path` doesn't exist + or points to a normal file, an :class:`HdfsError` will be raised. + + This method returns a list of tuples `(path, status)` where `path` is the + absolute path to a file or directory, and `status` is its corresponding + JSON FileStatus_ object. + + """ + hdfs_path = self.resolve(hdfs_path) + statuses = self._list_status(hdfs_path).json()['FileStatuses']['FileStatus'] + if len(statuses) == 1 and not statuses[0]['pathSuffix']: + # This is a normal file. + raise HdfsError('%r is not a directory.', hdfs_path) + return [ + (osp.join(hdfs_path, status['pathSuffix']), status) + for status in statuses + ] + def walk(self, hdfs_path, depth=0): """Depth-first walk of remote folder statuses. diff --git a/test/test_client.py b/test/test_client.py index 61fb1a8..2433721 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -511,6 +511,30 @@ def test_missing(self): self.client.content('foo') +class TestList(_TestSession): + + @raises(HdfsError) + def test_file(self): + self.client.write('foo', 'hello, world!') + self.client.list('foo') + + @raises(HdfsError) + def test_missing(self): + self.client.list('foo') + + def test_empty_dir(self): + self.client._mkdirs('foo') + eq_(self.client.list('foo'), []) + + def test_dir(self): + self.client.write('foo/bar', 'hello, world!') + statuses = self.client.list('foo') + eq_(len(statuses), 1) + status = self.client.status('foo/bar') + status['pathSuffix'] = 'bar' + eq_(statuses[0], (osp.join(self.client.root, 'foo', 'bar'), status)) + + class TestWalk(_TestSession): def test_file(self):