Skip to content

Commit

Permalink
Issue #20: Basic support for size breakdown of Zip files
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Dec 17, 2022
1 parent 230737e commit 680c5ab
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 8 deletions.
53 changes: 48 additions & 5 deletions duviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
import time
import unicodedata
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
import zipfile
from pathlib import Path


# TODO: catch absence/failure of du/ls subprocesses
# TODO: how to handle unreadable subdirs in du/ls?
# TODO: option to sort alphabetically (instead of on size)
# TODO: use pathlib.Path instead of naive strings where appropriate


def path_split(path: str, base: str = "") -> List[str]:
Expand Down Expand Up @@ -109,6 +113,8 @@ class DuTree(SizeTree):
Size tree from `du` (disk usage) listings
"""

# TODO no need for subclassing from SizeTree

_du_regex = re.compile(r'([0-9]*)\s*(.*)')

@classmethod
Expand Down Expand Up @@ -158,6 +164,7 @@ def pairs(lines: Iterable[str]) -> Iterator[Tuple[List[str], int]]:


class InodeTree(SizeTree):
# TODO no need for subclassing from SizeTree

@classmethod
def from_ls(
Expand Down Expand Up @@ -221,6 +228,25 @@ def pairs(listing: str) -> Iterator[Tuple[List[str], int]]:
return tree


class ZipFileProcessor:
"""Build `SizeTree` from a ZIP file."""

# TODO: tar.gz/... file support too

@staticmethod
def from_zipfile(path: Path, compressed: bool = True) -> SizeTree:
with zipfile.ZipFile(path, mode="r") as zf:
if compressed:
pairs = (
(path_split(z.filename), z.compress_size) for z in zf.infolist()
)
else:
pairs = ((path_split(z.filename), z.file_size) for z in zf.infolist())
return SizeTree.from_path_size_pairs(
pairs=pairs, root=str(path), _recalculate_sizes=True
)


class SizeFormatter:
"""Render a (byte) count in compact human-readable way: 12, 34k, 56M, ..."""

Expand Down Expand Up @@ -253,8 +279,8 @@ def __init__(self, max_depth: int = 5, size_formatter: SizeFormatter = SIZE_FORM
def render(self, tree: SizeTree, width: int) -> List[str]:
raise NotImplementedError

@staticmethod
def bar(
self,
label: str,
width: int,
fill: str = "-",
Expand Down Expand Up @@ -583,6 +609,18 @@ def main():
default=False,
help="Use colors to render bars (instead of ASCII art)",
)
cli.add_argument(
"-z",
"--zip",
action="store_true",
dest="zip",
help="Force ZIP-file handling of given paths (e.g. lacking a traditional `.zip` extension).",
)
cli.add_argument(
"--expanded",
action="store_true",
help="Use expanded file size instead of compressed file size when processing an archive file (e.g. ZIP)",
)

args = cli.parse_args()

Expand All @@ -599,13 +637,18 @@ def main():
else:
progress_report = None

for directory in paths:
if args.inode_count:
tree = InodeTree.from_ls(root=directory, progress_report=progress_report)
for path in paths:
if args.zip or (
os.path.isfile(path) and os.path.splitext(path)[1].lower() == ".zip"
):
tree = ZipFileProcessor.from_zipfile(path, compressed=not args.expanded)
size_formatter = SIZE_FORMATTER_BYTES
elif args.inode_count:
tree = InodeTree.from_ls(root=path, progress_report=progress_report)
size_formatter = SIZE_FORMATTER_COUNT
else:
tree = DuTree.from_du(
root=directory,
root=path,
one_filesystem=args.one_file_system,
dereference=args.dereference,
progress_report=progress_report,
Expand Down
65 changes: 62 additions & 3 deletions test_duviz.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
# coding: utf-8
import itertools
import textwrap
import zipfile
from typing import List

import pytest

from duviz import TreeRenderer, SIZE_FORMATTER_COUNT, SIZE_FORMATTER_BYTES, SIZE_FORMATTER_BYTES_BINARY, path_split, \
SizeTree, AsciiDoubleLineBarRenderer, DuTree, InodeTree, get_progress_reporter, AsciiSingleLineBarRenderer, \
ColorDoubleLineBarRenderer, ColorSingleLineBarRenderer, Colorizer
from duviz import (
TreeRenderer,
SIZE_FORMATTER_COUNT,
SIZE_FORMATTER_BYTES,
SIZE_FORMATTER_BYTES_BINARY,
path_split,
SizeTree,
AsciiDoubleLineBarRenderer,
DuTree,
InodeTree,
get_progress_reporter,
AsciiSingleLineBarRenderer,
ColorDoubleLineBarRenderer,
ColorSingleLineBarRenderer,
Colorizer,
ZipFileProcessor,
)


def test_bar_one():
Expand Down Expand Up @@ -644,3 +659,47 @@ def test_get_progress_reporter():
deltas = [i1-i0 for (i0, i1) in zip(indexes[:-1], indexes[1:])]
assert all(d < 5 for d in deltas[:5])
assert all(d > 9 for d in deltas[-5:])


class TestZipFileProcessor:
@pytest.fixture
def zip_file(self, tmp_path):
path = tmp_path / "data.zip"
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
with zf.open("alpha/abc100.txt", "w") as f:
f.write(b"abcdefghijklmnopqrstuvwxyz" * 100)
with zf.open("alpha/abbcccdddde.txt", "w") as f:
f.write(b"abbcccdddde" * 2 * 100)
with zf.open("0.txt", "w") as f:
f.write(b"0" * 26 * 100)
return path

def test_default(self, zip_file):
tree = ZipFileProcessor.from_zipfile(zip_file)
renderer = AsciiDoubleLineBarRenderer(size_formatter=SIZE_FORMATTER_BYTES)
result = renderer.render(tree, width=40)
expected = [
"________________________________________",
TreeRenderer.bar(label=str(zip_file), width=40),
"[_________________99B__________________]",
"[ alpha ][ 0.txt ]",
"[_____________79B_____________][__20B__]",
"[ abc100.txt ][abbcccdddd] ",
"[_______49B_______][___30B____] ",
]
assert result == expected

def test_expanded(self, zip_file):
tree = ZipFileProcessor.from_zipfile(zip_file, compressed=False)
renderer = AsciiDoubleLineBarRenderer(size_formatter=SIZE_FORMATTER_BYTES)
result = renderer.render(tree, width=40)
expected = [
"________________________________________",
TreeRenderer.bar(label=str(zip_file), width=40),
"[________________7.40KB________________]",
"[ alpha ][ 0.txt ]",
"[_________4.80KB________][____2.60KB___]",
"[ abc100.txt][abbcccdddd] ",
"[___2.60KB__][__2.20KB__] ",
]
assert result == expected

0 comments on commit 680c5ab

Please sign in to comment.