From f12f8495542a2f91ed7bb39ea74b4c896432533b Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 23 Jul 2019 16:17:19 -0400 Subject: [PATCH 1/7] add file size checker, bump versrion, update changelog --- docs/changelog.md | 4 ++++ ubiquerg/_version.py | 2 +- ubiquerg/files.py | 46 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 35e8b0b..81da9c9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## [0.4.6] - 2019-07-23 +### Added +- file/directory size checker + ## [0.4.5] - 2019-07-01 ### Changed - If argument to callability checker is a file, require executability; if it's a folder, it's not callable. diff --git a/ubiquerg/_version.py b/ubiquerg/_version.py index 98a433b..3dd3d2d 100644 --- a/ubiquerg/_version.py +++ b/ubiquerg/_version.py @@ -1 +1 @@ -__version__ = "0.4.5" +__version__ = "0.4.6" diff --git a/ubiquerg/files.py b/ubiquerg/files.py index b5fa93b..935e312 100644 --- a/ubiquerg/files.py +++ b/ubiquerg/files.py @@ -1,8 +1,10 @@ """ Functions facilitating file operations """ from hashlib import md5 +import os -__all__ = ["checksum"] +__all__ = ["checksum", "size"] +FILE_SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] def checksum(path, blocksize=int(2e+9)): @@ -21,3 +23,45 @@ def checksum(path, blocksize=int(2e+9)): break m.update(buf) return m.hexdigest() + + +def size(path): + """ + Gets the size of the file or directory in the provided path + + :param str path: path to the file to check size of + :return int: file size + """ + def _size_str(size): + """ + Converts the numeric bytes to the size string + + :param int|float size: file size to convert + :return str: file size string + """ + if isinstance(size, (int, float)): + for unit in FILE_SIZE_UNITS: + if size < 1024: + return "{}{}".format(round(size, 1), unit) + size /= 1024 + return size + + if os.path.isfile(path): + s = _size_str(os.path.getsize(path)) + elif os.path.isdir(path): + s = 0 + symlinks = [] + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + if not os.path.islink(fp): + s += os.path.getsize(fp) + else: + s += os.lstat(fp).st_size + symlinks.append(fp) + if len(symlinks) > 0: + print("{} symlinks were found: '{}'".format(len(symlinks), "\n".join(symlinks))) + else: + print("size could not be determined for: '{}'".format(path)) + s = None + return _size_str(s) From 73e90b085720302e81f62112755471ba2fbd7610 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 08:51:44 -0400 Subject: [PATCH 2/7] add unit tests, rename file to match its contents --- tests/{test_checksum.py => test_files.py} | 25 ++++++++++++++++++++++- tests/test_packaging.py | 2 +- 2 files changed, 25 insertions(+), 2 deletions(-) rename tests/{test_checksum.py => test_files.py} (59%) diff --git a/tests/test_checksum.py b/tests/test_files.py similarity index 59% rename from tests/test_checksum.py rename to tests/test_files.py index 7798eee..aaf922d 100644 --- a/tests/test_checksum.py +++ b/tests/test_files.py @@ -3,7 +3,7 @@ import hashlib import itertools import pytest -from ubiquerg import checksum +from ubiquerg import checksum, size __author__ = "Vince Reuter" __email__ = "vreuter@virginia.edu" @@ -32,6 +32,29 @@ def test_checksum(size1, size2, lines, tmpdir): assert res2 == exp +def test_size_returns_str(lines, tmpdir): + """ Size returns a string and works with both files and directories """ + fp = tmpdir.join("temp-data.txt").strpath + data = "\n".join(lines) + with open(fp, 'w') as f: + f.write(data) + assert isinstance(size(fp), str) + assert isinstance(size(tmpdir), str) + + +def test_size_returns_int(lines, tmpdir): + fp = tmpdir.join("temp-data.txt").strpath + fp_larger = tmpdir.join("temp-data.txt").strpath + data = "\n".join(lines) + with open(fp, 'w') as f: + f.write(data) + with open(fp_larger, 'w') as f1: + f1.write(data * 100) + assert isinstance(size(tmpdir, False), int) + assert isinstance(size(fp, False), int) + assert size(fp, size_str=False) <= size(fp, size_str=False) + + def test_nonexistent_path(tmpdir): """ Nonexistent path to checksum is erroneous. """ with pytest.raises(IOError): diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 52d97a2..ecb6930 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -9,7 +9,7 @@ @pytest.mark.parametrize( ["obj_name", "typecheck"], - [("build_cli_extra", isfunction), ("checksum", isfunction), + [("build_cli_extra", isfunction), ("checksum", isfunction), ("size", isfunction), ("expandpath", isfunction), ("is_collection_like", isfunction), ("is_command_callable", isfunction), ("is_url", isfunction), ("powerset", isfunction), ("query_yes_no", isfunction), From 91a3383a6d9d2e23f3486ca46c46aafb9df4f68d Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 08:54:36 -0400 Subject: [PATCH 3/7] update function interface --- ubiquerg/files.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ubiquerg/files.py b/ubiquerg/files.py index 935e312..89c0adf 100644 --- a/ubiquerg/files.py +++ b/ubiquerg/files.py @@ -25,12 +25,13 @@ def checksum(path, blocksize=int(2e+9)): return m.hexdigest() -def size(path): +def size(path, size_str=True): """ Gets the size of the file or directory in the provided path :param str path: path to the file to check size of - :return int: file size + :param bool size_str: whether the size should be converted to a human-readable string, e.g. convert B to MB + :return int|str: file size or file size string """ def _size_str(size): """ @@ -47,7 +48,7 @@ def _size_str(size): return size if os.path.isfile(path): - s = _size_str(os.path.getsize(path)) + s = os.path.getsize(path) elif os.path.isdir(path): s = 0 symlinks = [] @@ -64,4 +65,4 @@ def _size_str(size): else: print("size could not be determined for: '{}'".format(path)) s = None - return _size_str(s) + return _size_str(s) if size_str else s From 971e505113f976245c07de9938dda4b7bcd29934 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 09:01:34 -0400 Subject: [PATCH 4/7] convert LocalPath objects to str in tests maybe fixes python 3.5 and 2.7 failing the tests --- tests/test_files.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index aaf922d..14e49ef 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -34,7 +34,7 @@ def test_checksum(size1, size2, lines, tmpdir): def test_size_returns_str(lines, tmpdir): """ Size returns a string and works with both files and directories """ - fp = tmpdir.join("temp-data.txt").strpath + fp = str(tmpdir.join("temp-data.txt").strpath) data = "\n".join(lines) with open(fp, 'w') as f: f.write(data) @@ -43,8 +43,8 @@ def test_size_returns_str(lines, tmpdir): def test_size_returns_int(lines, tmpdir): - fp = tmpdir.join("temp-data.txt").strpath - fp_larger = tmpdir.join("temp-data.txt").strpath + fp = str(tmpdir.join("temp-data.txt").strpath) + fp_larger = str(tmpdir.join("temp-data.txt").strpath) data = "\n".join(lines) with open(fp, 'w') as f: f.write(data) From 923d3d14eb2c95628b838e47c9cf76c12c09fa13 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 09:06:40 -0400 Subject: [PATCH 5/7] fix tests --- tests/test_files.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index 14e49ef..4b5c11d 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -34,23 +34,23 @@ def test_checksum(size1, size2, lines, tmpdir): def test_size_returns_str(lines, tmpdir): """ Size returns a string and works with both files and directories """ - fp = str(tmpdir.join("temp-data.txt").strpath) + fp = tmpdir.join("temp-data.txt").strpath data = "\n".join(lines) with open(fp, 'w') as f: f.write(data) assert isinstance(size(fp), str) - assert isinstance(size(tmpdir), str) + assert isinstance(size(tmpdir.strpath), str) def test_size_returns_int(lines, tmpdir): - fp = str(tmpdir.join("temp-data.txt").strpath) - fp_larger = str(tmpdir.join("temp-data.txt").strpath) + fp = tmpdir.join("temp-data.txt").strpath + fp_larger = tmpdir.join("temp-data.txt").strpath data = "\n".join(lines) with open(fp, 'w') as f: f.write(data) with open(fp_larger, 'w') as f1: f1.write(data * 100) - assert isinstance(size(tmpdir, False), int) + assert isinstance(size(tmpdir.strpath, False), int) assert isinstance(size(fp, False), int) assert size(fp, size_str=False) <= size(fp, size_str=False) From ff3c4223bc4f8f845a7c1f8a54c0bb6e85cb9e43 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 09:34:10 -0400 Subject: [PATCH 6/7] abstract away the number to file size str conversion --- tests/test_files.py | 9 ++++++++- ubiquerg/files.py | 33 +++++++++++++++++---------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index 4b5c11d..f04547c 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -3,7 +3,7 @@ import hashlib import itertools import pytest -from ubiquerg import checksum, size +from ubiquerg import checksum, size, filesize_to_str __author__ = "Vince Reuter" __email__ = "vreuter@virginia.edu" @@ -59,3 +59,10 @@ def test_nonexistent_path(tmpdir): """ Nonexistent path to checksum is erroneous. """ with pytest.raises(IOError): checksum(tmpdir.join("does-not-exist.txt").strpath) + + +@pytest.mark.parametrize("size_num", list(range(0, 10)) + [i/3 for i in range(0, 10)]) +def test_filesize_to_str_int(size_num): + """ Works with int and returns str """ + print(size_num) + assert isinstance(filesize_to_str(size_num), str) diff --git a/ubiquerg/files.py b/ubiquerg/files.py index 89c0adf..6420e32 100644 --- a/ubiquerg/files.py +++ b/ubiquerg/files.py @@ -3,7 +3,7 @@ from hashlib import md5 import os -__all__ = ["checksum", "size"] +__all__ = ["checksum", "size", "filesize_to_str"] FILE_SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] @@ -33,20 +33,6 @@ def size(path, size_str=True): :param bool size_str: whether the size should be converted to a human-readable string, e.g. convert B to MB :return int|str: file size or file size string """ - def _size_str(size): - """ - Converts the numeric bytes to the size string - - :param int|float size: file size to convert - :return str: file size string - """ - if isinstance(size, (int, float)): - for unit in FILE_SIZE_UNITS: - if size < 1024: - return "{}{}".format(round(size, 1), unit) - size /= 1024 - return size - if os.path.isfile(path): s = os.path.getsize(path) elif os.path.isdir(path): @@ -65,4 +51,19 @@ def _size_str(size): else: print("size could not be determined for: '{}'".format(path)) s = None - return _size_str(s) if size_str else s + return filesize_to_str(s) if size_str else s + + +def filesize_to_str(size): + """ + Converts the numeric bytes to the size string + + :param int|float size: file size to convert + :return str: file size string + """ + if isinstance(size, (int, float)): + for unit in FILE_SIZE_UNITS: + if size < 1024: + return "{}{}".format(round(size, 1), unit) + size /= 1024 + return size From 2fe307a48fb14784b4eeb49a28df2228d3b3ba34 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 24 Jul 2019 09:54:54 -0400 Subject: [PATCH 7/7] warn if object to convert is not a number, test it --- tests/test_files.py | 8 +++++++- ubiquerg/files.py | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_files.py b/tests/test_files.py index f04547c..5db6f21 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -64,5 +64,11 @@ def test_nonexistent_path(tmpdir): @pytest.mark.parametrize("size_num", list(range(0, 10)) + [i/3 for i in range(0, 10)]) def test_filesize_to_str_int(size_num): """ Works with int and returns str """ - print(size_num) assert isinstance(filesize_to_str(size_num), str) + + +@pytest.mark.parametrize("obj", ["test", [], tuple()]) +def test_filesize_to_str_other(obj): + """ Returns the original object if it's not an int or float and warns """ + with pytest.warns(UserWarning): + assert filesize_to_str(obj) == obj \ No newline at end of file diff --git a/ubiquerg/files.py b/ubiquerg/files.py index 6420e32..0baefec 100644 --- a/ubiquerg/files.py +++ b/ubiquerg/files.py @@ -2,6 +2,7 @@ from hashlib import md5 import os +from warnings import warn __all__ = ["checksum", "size", "filesize_to_str"] FILE_SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] @@ -66,4 +67,5 @@ def filesize_to_str(size): if size < 1024: return "{}{}".format(round(size, 1), unit) size /= 1024 + warn("size argument was neither an int nor a float, returning the original object") return size