Skip to content

Commit

Permalink
Merge branch 'issue20-zipfile'
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Dec 18, 2022
2 parents 230737e + 9dfee32 commit a1bb96f
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Replace `optparse` usage with `argparse`
([#10](https://github.com/soxofaan/duviz/issues/10))
- Drop Python 3.5 support ([#27](https://github.com/soxofaan/duviz/issues/27))
- New feature: size breakdown of ZIP and tar files ([#20](https://github.com/soxofaan/duviz/issues/20))


## [3.1.2] - 2022-12-09
Expand Down
10 changes: 9 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ Features
The hard work is done by ``du`` (or ``ls``), which run an C-speed.
- Progress reporting while you wait. Be hypnotized!
- Detects your terminal width for maximum visualization pleasure.
- Apart from file size (the default), you can also just count files (inodes)
- Not only supports "disk usage" based on file size,
but also allows to count files (inode count mode)
or give a size breakdown of ZIP or tar files.
- Option to use terminal colors for the boxes instead of ASCII art


Expand Down Expand Up @@ -93,4 +95,10 @@ If you specify one or more directories, it will render the usage of those direct

Instead of size in bytes, you can also get inode usage: just use the option ``--inodes`` (or ``-i`` in short).

If you directly pass ``duviz`` a ZIP or tar file,
it will visualize the size breakdown of the file tree in the ZIP/tar file.
In case of ZIP files, the compressed size will be shown by default
(option ``--unzip-size`` will toggle showing of decompressed size).
For tar files, only the decompressed size is available.

Run it with option ``--help`` for more options.
97 changes: 88 additions & 9 deletions duviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@
import shutil
import subprocess
import sys
import tarfile
import time
import unicodedata
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
import zipfile
from pathlib import Path


# TODO: catch absence/failure of du/ls subprocesses
# TODO: how to handle unreadable subdirs in du/ls?
# TODO: option to sort alphabetically (instead of on size)
# TODO: use pathlib.Path instead of naive strings where appropriate


def path_split(path: str, base: str = "") -> List[str]:
Expand Down Expand Up @@ -109,6 +114,8 @@ class DuTree(SizeTree):
Size tree from `du` (disk usage) listings
"""

# TODO no need for subclassing from SizeTree

_du_regex = re.compile(r'([0-9]*)\s*(.*)')

@classmethod
Expand Down Expand Up @@ -158,6 +165,7 @@ def pairs(lines: Iterable[str]) -> Iterator[Tuple[List[str], int]]:


class InodeTree(SizeTree):
# TODO no need for subclassing from SizeTree

@classmethod
def from_ls(
Expand Down Expand Up @@ -221,6 +229,36 @@ def pairs(listing: str) -> Iterator[Tuple[List[str], int]]:
return tree


class ZipFileProcessor:
"""Build `SizeTree` from a file tree in a ZIP archive file."""

@staticmethod
def from_zipfile(path: Union[str, Path], compressed: bool = True) -> SizeTree:
# TODO: handle zipfile.BadZipFile in nicer way?
with zipfile.ZipFile(path, mode="r") as zf:
if compressed:
pairs = (
(path_split(z.filename), z.compress_size) for z in zf.infolist()
)
else:
pairs = ((path_split(z.filename), z.file_size) for z in zf.infolist())
return SizeTree.from_path_size_pairs(
pairs=pairs, root=str(path), _recalculate_sizes=True
)


class TarFileProcessor:
"""Build `SizeTree` from file tree in a tar archive file."""

@staticmethod
def from_tar_file(path: Union[str, Path]) -> SizeTree:
with tarfile.open(path, mode="r") as tf:
pairs = ((path_split(m.name), m.size) for m in tf.getmembers())
return SizeTree.from_path_size_pairs(
pairs=pairs, root=str(path), _recalculate_sizes=True
)


class SizeFormatter:
"""Render a (byte) count in compact human-readable way: 12, 34k, 56M, ..."""

Expand Down Expand Up @@ -253,8 +291,8 @@ def __init__(self, max_depth: int = 5, size_formatter: SizeFormatter = SIZE_FORM
def render(self, tree: SizeTree, width: int) -> List[str]:
raise NotImplementedError

@staticmethod
def bar(
self,
label: str,
width: int,
fill: str = "-",
Expand Down Expand Up @@ -517,7 +555,13 @@ def main():
cli = argparse.ArgumentParser(
prog="duviz", description="Render ASCII-art representation of disk space usage."
)
cli.add_argument("dir", nargs="*", help="directories to scan", default=["."])
cli.add_argument(
"paths",
metavar="PATH",
nargs="*",
help="Directories or ZIP/tar archives to scan",
default=["."],
)
cli.add_argument(
"-w",
"--width",
Expand Down Expand Up @@ -583,12 +627,34 @@ def main():
default=False,
help="Use colors to render bars (instead of ASCII art)",
)
cli.add_argument(
# TODO short option, "-z"?
"--zip",
action="store_true",
dest="zip",
help="Force ZIP-file handling of given paths (e.g. lacking a traditional `.zip` extension).",
)
cli.add_argument(
"--unzip-size",
action="store_true",
help="Visualize decompressed file size instead of compressed file size for ZIP files.",
)
cli.add_argument(
# TODO short option?
"--tar",
action="store_true",
dest="tar",
help="""
Force tar-file handling of given paths
(e.g. lacking a traditional extension like `.tar`, `.tar.gz`, ...).
""",
)

args = cli.parse_args()

# Make sure we have a valid list of paths
paths = []
for path in args.dir:
paths: List[str] = []
for path in args.paths:
if os.path.exists(path):
paths.append(path)
else:
Expand All @@ -599,13 +665,26 @@ def main():
else:
progress_report = None

for directory in paths:
if args.inode_count:
tree = InodeTree.from_ls(root=directory, progress_report=progress_report)
for path in paths:
if args.zip or (
os.path.isfile(path) and os.path.splitext(path)[1].lower() == ".zip"
):
tree = ZipFileProcessor.from_zipfile(path, compressed=not args.unzip_size)
size_formatter = SIZE_FORMATTER_BYTES
elif args.tar or (
os.path.isfile(path)
and any(
path.endswith(ext) for ext in {".tar", ".tar.gz", ".tgz", "tar.bz2"}
)
):
tree = TarFileProcessor().from_tar_file(path)
size_formatter = SIZE_FORMATTER_BYTES
elif args.inode_count:
tree = InodeTree.from_ls(root=path, progress_report=progress_report)
size_formatter = SIZE_FORMATTER_COUNT
else:
tree = DuTree.from_du(
root=directory,
root=path,
one_filesystem=args.one_file_system,
dereference=args.dereference,
progress_report=progress_report,
Expand Down
111 changes: 108 additions & 3 deletions test_duviz.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,34 @@
# coding: utf-8

# TODO: test actual CLI

import itertools
import tarfile
import textwrap
import zipfile
from pathlib import Path
from typing import List

import pytest

from duviz import TreeRenderer, SIZE_FORMATTER_COUNT, SIZE_FORMATTER_BYTES, SIZE_FORMATTER_BYTES_BINARY, path_split, \
SizeTree, AsciiDoubleLineBarRenderer, DuTree, InodeTree, get_progress_reporter, AsciiSingleLineBarRenderer, \
ColorDoubleLineBarRenderer, ColorSingleLineBarRenderer, Colorizer
from duviz import (
TreeRenderer,
SIZE_FORMATTER_COUNT,
SIZE_FORMATTER_BYTES,
SIZE_FORMATTER_BYTES_BINARY,
path_split,
SizeTree,
AsciiDoubleLineBarRenderer,
DuTree,
InodeTree,
get_progress_reporter,
AsciiSingleLineBarRenderer,
ColorDoubleLineBarRenderer,
ColorSingleLineBarRenderer,
Colorizer,
ZipFileProcessor,
TarFileProcessor,
)


def test_bar_one():
Expand Down Expand Up @@ -644,3 +665,87 @@ def test_get_progress_reporter():
deltas = [i1-i0 for (i0, i1) in zip(indexes[:-1], indexes[1:])]
assert all(d < 5 for d in deltas[:5])
assert all(d > 9 for d in deltas[-5:])


class TestZipFileProcessor:
@pytest.fixture
def zip_file(self, tmp_path):
path = tmp_path / "data.zip"
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
with zf.open("alpha/abc100.txt", "w") as f:
f.write(b"abcdefghijklmnopqrstuvwxyz" * 100)
with zf.open("alpha/abbcccdddde.txt", "w") as f:
f.write(b"abbcccdddde" * 2 * 100)
with zf.open("0.txt", "w") as f:
f.write(b"0" * 26 * 100)
return path

def test_default(self, zip_file):
tree = ZipFileProcessor.from_zipfile(zip_file)
renderer = AsciiDoubleLineBarRenderer(size_formatter=SIZE_FORMATTER_BYTES)
result = renderer.render(tree, width=40)
expected = [
"________________________________________",
TreeRenderer.bar(label=str(zip_file), width=40),
"[_________________99B__________________]",
"[ alpha ][ 0.txt ]",
"[_____________79B_____________][__20B__]",
"[ abc100.txt ][abbcccdddd] ",
"[_______49B_______][___30B____] ",
]
assert result == expected

def test_expanded(self, zip_file):
tree = ZipFileProcessor.from_zipfile(zip_file, compressed=False)
renderer = AsciiDoubleLineBarRenderer(size_formatter=SIZE_FORMATTER_BYTES)
result = renderer.render(tree, width=40)
expected = [
"________________________________________",
TreeRenderer.bar(label=str(zip_file), width=40),
"[________________7.40KB________________]",
"[ alpha ][ 0.txt ]",
"[_________4.80KB________][____2.60KB___]",
"[ abc100.txt][abbcccdddd] ",
"[___2.60KB__][__2.20KB__] ",
]
assert result == expected


def _create_file(path: Path, content: str) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as f:
f.write(content)
return path


class TestTarFileProcessor:
@pytest.fixture(params=[("w:", ".tar"), ("w:gz", ".tar.gz"), ("w:bz2", ".tar.bz2")])
def tar_file(self, request, tmp_path):
mode, ext = request.param
path = tmp_path / f"data{ext}"
with tarfile.open(path, mode) as tf:

def create(rel_path: str, content: str):
tf.add(
name=_create_file(tmp_path / rel_path, content), arcname=rel_path
)

create("alpha/abc100.txt", "abcdefghijklmnopqrstuvwxyz" * 100)
create("alpha/abbcccdddde.txt", "abbcccdddde" * 2 * 100)
create("0.txt", "0" * 26 * 100)
return path

def test_default(self, tar_file):
tree = TarFileProcessor.from_tar_file(tar_file)
renderer = AsciiDoubleLineBarRenderer(size_formatter=SIZE_FORMATTER_BYTES)
result = renderer.render(tree, width=40)
expected = [
"________________________________________",
TreeRenderer.bar(label=str(tar_file), width=40),
"[________________7.40KB________________]",
"[ alpha ][ 0.txt ]",
"[_________4.80KB________][____2.60KB___]",
"[ abc100.txt][abbcccdddd] ",
"[___2.60KB__][__2.20KB__] ",
]
assert result == expected

0 comments on commit a1bb96f

Please sign in to comment.