diff --git a/docs/changelog.md b/docs/changelog.md index 35e8b0b..81da9c9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## [0.4.6] - 2019-07-23 +### Added +- file/directory size checker + ## [0.4.5] - 2019-07-01 ### Changed - If argument to callability checker is a file, require executability; if it's a folder, it's not callable. diff --git a/tests/test_checksum.py b/tests/test_checksum.py deleted file mode 100644 index 7798eee..0000000 --- a/tests/test_checksum.py +++ /dev/null @@ -1,38 +0,0 @@ -""" Tests for checksum """ - -import hashlib -import itertools -import pytest -from ubiquerg import checksum - -__author__ = "Vince Reuter" -__email__ = "vreuter@virginia.edu" - - -def pytest_generate_tests(metafunc): - """ Dynamic test case generation/parameterization for this module. """ - if "size1" in metafunc.fixturenames and "size2" in metafunc.fixturenames: - metafunc.parametrize( - ["size1", "size2"], itertools.product([1, 4], [2, 8])) - if "lines" in metafunc.fixturenames: - metafunc.parametrize("lines", [[], ["line1"], ["line1", "line2"]]) - - -def test_checksum(size1, size2, lines, tmpdir): - """ Checksum result matches expectation and is blocksize-agnostic """ - fp = tmpdir.join("temp-data.txt").strpath - data = "\n".join(lines) - with open(fp, 'w') as f: - f.write(data) - exp = hashlib.new("md5", data.encode("utf-8")).hexdigest() - res1 = checksum(fp, size1) - res2 = checksum(fp, size2) - assert exp == res1 - assert res1 == res2 - assert res2 == exp - - -def test_nonexistent_path(tmpdir): - """ Nonexistent path to checksum is erroneous. """ - with pytest.raises(IOError): - checksum(tmpdir.join("does-not-exist.txt").strpath) diff --git a/tests/test_files.py b/tests/test_files.py new file mode 100644 index 0000000..5db6f21 --- /dev/null +++ b/tests/test_files.py @@ -0,0 +1,74 @@ +""" Tests for checksum """ + +import hashlib +import itertools +import pytest +from ubiquerg import checksum, size, filesize_to_str + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def pytest_generate_tests(metafunc): + """ Dynamic test case generation/parameterization for this module. """ + if "size1" in metafunc.fixturenames and "size2" in metafunc.fixturenames: + metafunc.parametrize( + ["size1", "size2"], itertools.product([1, 4], [2, 8])) + if "lines" in metafunc.fixturenames: + metafunc.parametrize("lines", [[], ["line1"], ["line1", "line2"]]) + + +def test_checksum(size1, size2, lines, tmpdir): + """ Checksum result matches expectation and is blocksize-agnostic """ + fp = tmpdir.join("temp-data.txt").strpath + data = "\n".join(lines) + with open(fp, 'w') as f: + f.write(data) + exp = hashlib.new("md5", data.encode("utf-8")).hexdigest() + res1 = checksum(fp, size1) + res2 = checksum(fp, size2) + assert exp == res1 + assert res1 == res2 + assert res2 == exp + + +def test_size_returns_str(lines, tmpdir): + """ Size returns a string and works with both files and directories """ + fp = tmpdir.join("temp-data.txt").strpath + data = "\n".join(lines) + with open(fp, 'w') as f: + f.write(data) + assert isinstance(size(fp), str) + assert isinstance(size(tmpdir.strpath), str) + + +def test_size_returns_int(lines, tmpdir): + fp = tmpdir.join("temp-data.txt").strpath + fp_larger = tmpdir.join("temp-data.txt").strpath + data = "\n".join(lines) + with open(fp, 'w') as f: + f.write(data) + with open(fp_larger, 'w') as f1: + f1.write(data * 100) + assert isinstance(size(tmpdir.strpath, False), int) + assert isinstance(size(fp, False), int) + assert size(fp, size_str=False) <= size(fp, size_str=False) + + +def test_nonexistent_path(tmpdir): + """ Nonexistent path to checksum is erroneous. """ + with pytest.raises(IOError): + checksum(tmpdir.join("does-not-exist.txt").strpath) + + +@pytest.mark.parametrize("size_num", list(range(0, 10)) + [i/3 for i in range(0, 10)]) +def test_filesize_to_str_int(size_num): + """ Works with int and returns str """ + assert isinstance(filesize_to_str(size_num), str) + + +@pytest.mark.parametrize("obj", ["test", [], tuple()]) +def test_filesize_to_str_other(obj): + """ Returns the original object if it's not an int or float and warns """ + with pytest.warns(UserWarning): + assert filesize_to_str(obj) == obj \ No newline at end of file diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 52d97a2..ecb6930 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -9,7 +9,7 @@ @pytest.mark.parametrize( ["obj_name", "typecheck"], - [("build_cli_extra", isfunction), ("checksum", isfunction), + [("build_cli_extra", isfunction), ("checksum", isfunction), ("size", isfunction), ("expandpath", isfunction), ("is_collection_like", isfunction), ("is_command_callable", isfunction), ("is_url", isfunction), ("powerset", isfunction), ("query_yes_no", isfunction), diff --git a/ubiquerg/_version.py b/ubiquerg/_version.py index 98a433b..3dd3d2d 100644 --- a/ubiquerg/_version.py +++ b/ubiquerg/_version.py @@ -1 +1 @@ -__version__ = "0.4.5" +__version__ = "0.4.6" diff --git a/ubiquerg/files.py b/ubiquerg/files.py index b5fa93b..0baefec 100644 --- a/ubiquerg/files.py +++ b/ubiquerg/files.py @@ -1,8 +1,11 @@ """ Functions facilitating file operations """ from hashlib import md5 +import os +from warnings import warn -__all__ = ["checksum"] +__all__ = ["checksum", "size", "filesize_to_str"] +FILE_SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] def checksum(path, blocksize=int(2e+9)): @@ -21,3 +24,48 @@ def checksum(path, blocksize=int(2e+9)): break m.update(buf) return m.hexdigest() + + +def size(path, size_str=True): + """ + Gets the size of the file or directory in the provided path + + :param str path: path to the file to check size of + :param bool size_str: whether the size should be converted to a human-readable string, e.g. convert B to MB + :return int|str: file size or file size string + """ + if os.path.isfile(path): + s = os.path.getsize(path) + elif os.path.isdir(path): + s = 0 + symlinks = [] + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + if not os.path.islink(fp): + s += os.path.getsize(fp) + else: + s += os.lstat(fp).st_size + symlinks.append(fp) + if len(symlinks) > 0: + print("{} symlinks were found: '{}'".format(len(symlinks), "\n".join(symlinks))) + else: + print("size could not be determined for: '{}'".format(path)) + s = None + return filesize_to_str(s) if size_str else s + + +def filesize_to_str(size): + """ + Converts the numeric bytes to the size string + + :param int|float size: file size to convert + :return str: file size string + """ + if isinstance(size, (int, float)): + for unit in FILE_SIZE_UNITS: + if size < 1024: + return "{}{}".format(round(size, 1), unit) + size /= 1024 + warn("size argument was neither an int nor a float, returning the original object") + return size diff --git a/ubiquerg/system.py b/ubiquerg/system.py index 15be79b..38ac93c 100644 --- a/ubiquerg/system.py +++ b/ubiquerg/system.py @@ -26,3 +26,29 @@ def is_command_callable(cmd): # Use `command` to see if command is callable, and rule on exit code. check = "command -v {0} >/dev/null 2>&1 || {{ exit 1; }}".format(cmd) return not bool(os.system(check)) + + +def is_writable(folder, check_exist=False, create=False): + """ + Make sure a folder is writable. + + Given a folder, check that it exists and is writable. Errors if requested on + a non-existent folder. Otherwise, make sure the first existing parent folder + is writable such that this folder could be created. + + :param str folder: Folder to check for writeability. + :param bool check_exist: Throw an error if it doesn't exist? + :param bool create: Create the folder if it doesn't exist? + """ + folder = folder or "." + + if os.path.exists(folder): + return os.access(folder, os.W_OK) and os.access(folder, os.X_OK) + elif create: + os.mkdir(folder) + elif check_exist: + raise OSError("Folder not found: {}".format(folder)) + else: + # The folder didn't exist. Recurse up the folder hierarchy to make sure + # all paths are writable + return is_writeable(os.path.dirname(folder), check_exist)