Skip to content

Commit

Permalink
Merge 2fe307a into bcf6314
Browse files Browse the repository at this point in the history
  • Loading branch information
stolarczyk committed Jul 24, 2019
2 parents bcf6314 + 2fe307a commit 89beb1a
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 41 deletions.
4 changes: 4 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [0.4.6] - 2019-07-23
### Added
- file/directory size checker

## [0.4.5] - 2019-07-01
### Changed
- If argument to callability checker is a file, require executability; if it's a folder, it's not callable.
Expand Down
38 changes: 0 additions & 38 deletions tests/test_checksum.py

This file was deleted.

74 changes: 74 additions & 0 deletions tests/test_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
""" Tests for checksum """

import hashlib
import itertools
import pytest
from ubiquerg import checksum, size, filesize_to_str

__author__ = "Vince Reuter"
__email__ = "vreuter@virginia.edu"


def pytest_generate_tests(metafunc):
""" Dynamic test case generation/parameterization for this module. """
if "size1" in metafunc.fixturenames and "size2" in metafunc.fixturenames:
metafunc.parametrize(
["size1", "size2"], itertools.product([1, 4], [2, 8]))
if "lines" in metafunc.fixturenames:
metafunc.parametrize("lines", [[], ["line1"], ["line1", "line2"]])


def test_checksum(size1, size2, lines, tmpdir):
""" Checksum result matches expectation and is blocksize-agnostic """
fp = tmpdir.join("temp-data.txt").strpath
data = "\n".join(lines)
with open(fp, 'w') as f:
f.write(data)
exp = hashlib.new("md5", data.encode("utf-8")).hexdigest()
res1 = checksum(fp, size1)
res2 = checksum(fp, size2)
assert exp == res1
assert res1 == res2
assert res2 == exp


def test_size_returns_str(lines, tmpdir):
""" Size returns a string and works with both files and directories """
fp = tmpdir.join("temp-data.txt").strpath
data = "\n".join(lines)
with open(fp, 'w') as f:
f.write(data)
assert isinstance(size(fp), str)
assert isinstance(size(tmpdir.strpath), str)


def test_size_returns_int(lines, tmpdir):
fp = tmpdir.join("temp-data.txt").strpath
fp_larger = tmpdir.join("temp-data.txt").strpath
data = "\n".join(lines)
with open(fp, 'w') as f:
f.write(data)
with open(fp_larger, 'w') as f1:
f1.write(data * 100)
assert isinstance(size(tmpdir.strpath, False), int)
assert isinstance(size(fp, False), int)
assert size(fp, size_str=False) <= size(fp, size_str=False)


def test_nonexistent_path(tmpdir):
""" Nonexistent path to checksum is erroneous. """
with pytest.raises(IOError):
checksum(tmpdir.join("does-not-exist.txt").strpath)


@pytest.mark.parametrize("size_num", list(range(0, 10)) + [i/3 for i in range(0, 10)])
def test_filesize_to_str_int(size_num):
""" Works with int and returns str """
assert isinstance(filesize_to_str(size_num), str)


@pytest.mark.parametrize("obj", ["test", [], tuple()])
def test_filesize_to_str_other(obj):
""" Returns the original object if it's not an int or float and warns """
with pytest.warns(UserWarning):
assert filesize_to_str(obj) == obj
2 changes: 1 addition & 1 deletion tests/test_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@pytest.mark.parametrize(
["obj_name", "typecheck"],
[("build_cli_extra", isfunction), ("checksum", isfunction),
[("build_cli_extra", isfunction), ("checksum", isfunction), ("size", isfunction),
("expandpath", isfunction), ("is_collection_like", isfunction),
("is_command_callable", isfunction), ("is_url", isfunction),
("powerset", isfunction), ("query_yes_no", isfunction),
Expand Down
2 changes: 1 addition & 1 deletion ubiquerg/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.5"
__version__ = "0.4.6"
50 changes: 49 additions & 1 deletion ubiquerg/files.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
""" Functions facilitating file operations """

from hashlib import md5
import os
from warnings import warn

__all__ = ["checksum"]
__all__ = ["checksum", "size", "filesize_to_str"]
FILE_SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']


def checksum(path, blocksize=int(2e+9)):
Expand All @@ -21,3 +24,48 @@ def checksum(path, blocksize=int(2e+9)):
break
m.update(buf)
return m.hexdigest()


def size(path, size_str=True):
"""
Gets the size of the file or directory in the provided path
:param str path: path to the file to check size of
:param bool size_str: whether the size should be converted to a human-readable string, e.g. convert B to MB
:return int|str: file size or file size string
"""
if os.path.isfile(path):
s = os.path.getsize(path)
elif os.path.isdir(path):
s = 0
symlinks = []
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
s += os.path.getsize(fp)
else:
s += os.lstat(fp).st_size
symlinks.append(fp)
if len(symlinks) > 0:
print("{} symlinks were found: '{}'".format(len(symlinks), "\n".join(symlinks)))
else:
print("size could not be determined for: '{}'".format(path))
s = None
return filesize_to_str(s) if size_str else s


def filesize_to_str(size):
"""
Converts the numeric bytes to the size string
:param int|float size: file size to convert
:return str: file size string
"""
if isinstance(size, (int, float)):
for unit in FILE_SIZE_UNITS:
if size < 1024:
return "{}{}".format(round(size, 1), unit)
size /= 1024
warn("size argument was neither an int nor a float, returning the original object")
return size

0 comments on commit 89beb1a

Please sign in to comment.