From dd53b1ee2c90ab2f69d7a0f217bf7d5fb0399145 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Mon, 8 Sep 2025 20:43:52 -0400 Subject: [PATCH 1/9] Refactored report format to use Pydantic. --- docs/guide.md | 27 ++-- fuzzing/search_chunks_fuzzer.py | 2 +- pyproject.toml | 1 + python/unblob/extractors/command.py | 6 +- python/unblob/finder.py | 2 +- python/unblob/models.py | 67 +++------ python/unblob/processing.py | 4 +- python/unblob/report.py | 218 +++++++++++++++++----------- tests/conftest.py | 2 +- tests/extractors/test_command.py | 2 +- tests/test_logging.py | 4 +- tests/test_models.py | 21 ++- tests/test_processing.py | 4 +- tests/test_report.py | 12 +- uv.lock | 147 +++++++++++++++++++ vulture_whitelist.py | 3 - 16 files changed, 344 insertions(+), 178 deletions(-) diff --git a/docs/guide.md b/docs/guide.md index ffb6f603e1..88e53d9abb 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -71,8 +71,7 @@ $ cat alpine-report.json "task": { "path": "/home/walkman/Projects/unblob/demo/alpine-minirootfs-3.16.1-x86_64.tar.gz", "depth": 0, - "chunk_id": "", - "__typename__": "Task" + "chunk_id": "" }, "reports": [ { @@ -82,12 +81,12 @@ $ cat alpine-report.json "is_file": true, "is_link": false, "link_target": null, - "__typename__": "StatReport" + "report_type": "StatReport" }, { "magic": "gzip compressed data, max compression, from Unix, original size modulo 2^32 5816320\\012- data", "mime_type": "application/gzip", - "__typename__": "FileMagicReport" + "report_type": "FileMagicReport" }, { "id": "13590:1", @@ -97,18 +96,16 @@ $ cat alpine-report.json "size": 2711958, "is_encrypted": false, "extraction_reports": [], - "__typename__": "ChunkReport" + "report_type": "ChunkReport" } ], "subtasks": [ { "path": "/home/walkman/Projects/unblob/demo/alpine-minirootfs-3.16.1-x86_64.tar.gz_extract", "depth": 1, - "chunk_id": "13590:1", - "__typename__": "Task" + "chunk_id": "13590:1" } - ], - "__typename__": "TaskResult" + ] }, ... ] @@ -144,7 +141,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=97.88 lowest=3.17 mean=52.76 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••♰••••••••••••••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰ ♰♰♰♰ ♰ ♰ ♰ │ @@ -158,8 +155,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰ ♰♰ │ 0┤ ♰ ♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes path=unknown-file_extract/0-10485760.unknown pid=1963719 2024-10-30 10:52.03 [info ] Extracting unknown chunk chunk=0xc96196-0x1696196 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 2024-10-30 10:52.03 [debug ] Carving chunk path=unknown-file_extract/13197718-23683478.unknown pid=1963719 @@ -167,7 +164,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=99.03 lowest=0.23 mean=42.62 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••••••••••••••♰••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰♰ ♰ │ @@ -181,8 +178,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰♰ ♰♰ ♰♰ ♰ ♰ ♰ │ 0┤ ♰ ♰ ♰♰ ♰ ♰♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes ``` ### Skip extraction with file magic diff --git a/fuzzing/search_chunks_fuzzer.py b/fuzzing/search_chunks_fuzzer.py index 5e2efe7997..bf9e1c9de2 100755 --- a/fuzzing/search_chunks_fuzzer.py +++ b/fuzzing/search_chunks_fuzzer.py @@ -59,7 +59,7 @@ def test_search_chunks(data): depth=0, blob_id="", ) - result = TaskResult(task) + result = TaskResult(task=task) search_chunks(file, len(data), config.handlers, result) diff --git a/pyproject.toml b/pyproject.toml index 56c7d36afb..897d67cfd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "lz4>=4.3.2,!=4.4.3", # 4.4.3 doesn't have aarch64 wheels https://github.com/python-lz4/python-lz4/pull/298 "plotext>=4.2.0,<6.0", "pluggy>=1.3.0", + "pydantic>=2.0", "pyfatfs>=1.0.5", "pymdown-extensions>=10.15", "pyperscan>=0.3.0", diff --git a/python/unblob/extractors/command.py b/python/unblob/extractors/command.py index 7db354d676..cffdf59a0a 100644 --- a/python/unblob/extractors/command.py +++ b/python/unblob/extractors/command.py @@ -64,7 +64,7 @@ def no_op(): exit_code=res.returncode, ) - logger.error("Extract command failed", **error_report.asdict()) + logger.error("Extract command failed", **error_report.model_dump()) raise ExtractError(error_report) except FileNotFoundError: error_report = ExtractorDependencyNotFoundReport( @@ -72,14 +72,14 @@ def no_op(): ) logger.error( "Can't run extract command. Is the extractor installed?", - **error_report.asdict(), + **error_report.model_dump(), ) raise ExtractError(error_report) from None except subprocess.TimeoutExpired as e: error_report = ExtractorTimedOut(cmd=e.cmd, timeout=e.timeout) logger.error( "Extract command timed out.", - **error_report.asdict(), + **error_report.model_dump(), ) raise ExtractError(error_report) from None finally: diff --git a/python/unblob/finder.py b/python/unblob/finder.py index e5f68c7f9d..741dcffd8b 100644 --- a/python/unblob/finder.py +++ b/python/unblob/finder.py @@ -63,7 +63,7 @@ def _calculate_chunk( ) task_result.add_report(error_report) logger.error( - "Unhandled Exception during chunk calculation", **error_report.asdict() + "Unhandled Exception during chunk calculation", **error_report.model_dump() ) diff --git a/python/unblob/models.py b/python/unblob/models.py index 62f973a302..d201f4468a 100644 --- a/python/unblob/models.py +++ b/python/unblob/models.py @@ -8,6 +8,7 @@ from typing import Generic, Optional, TypeVar, Union import attrs +from pydantic import BaseModel from structlog import get_logger from .file_utils import Endian, File, InvalidInputFormat, StructParser @@ -16,7 +17,7 @@ from .report import ( CarveDirectoryReport, ChunkReport, - ErrorReport, + ErrorReportBase, MultiFileReport, RandomnessReport, Report, @@ -61,12 +62,11 @@ def __post_init__(self): self.fully_supported = len(self.limitations) == 0 -@attrs.define(frozen=True) -class Task: +class Task(BaseModel): path: Path depth: int blob_id: str - is_multi_file: bool = attrs.field(default=False) + is_multi_file: bool = False @attrs.define @@ -228,11 +228,10 @@ def as_report(self, extraction_reports: list[Report]) -> MultiFileReport: ReportType = TypeVar("ReportType", bound=Report) -@attrs.define -class TaskResult: +class TaskResult(BaseModel): task: Task - reports: list[Report] = attrs.field(factory=list) - subtasks: list[Task] = attrs.field(factory=list) + reports: list[Report] = [] + subtasks: list[Task] = [] def add_report(self, report: Report): self.reports.append(report) @@ -244,23 +243,24 @@ def filter_reports(self, report_class: type[ReportType]) -> list[ReportType]: return [report for report in self.reports if isinstance(report, report_class)] -@attrs.define -class ProcessResult: - results: list[TaskResult] = attrs.field(factory=list) +class ProcessResult(BaseModel): + results: list[TaskResult] = [] @property - def errors(self) -> list[ErrorReport]: + def errors(self) -> list[ErrorReportBase]: reports = itertools.chain.from_iterable(r.reports for r in self.results) interesting_reports = ( - r for r in reports if isinstance(r, (ErrorReport, ChunkReport)) + r for r in reports if isinstance(r, (ErrorReportBase, ChunkReport)) ) errors = [] for report in interesting_reports: - if isinstance(report, ErrorReport): + if isinstance(report, ErrorReportBase): errors.append(report) else: errors.extend( - r for r in report.extraction_reports if isinstance(r, ErrorReport) + r + for r in report.extraction_reports + if isinstance(r, ErrorReportBase) ) return errors @@ -268,7 +268,9 @@ def register(self, result: TaskResult): self.results.append(result) def to_json(self, indent=" "): - return to_json(self.results, indent=indent) + return json.dumps( + [result.model_dump(mode="json") for result in self.results], indent=indent + ) def get_output_dir(self) -> Optional[Path]: try: @@ -285,39 +287,6 @@ def get_output_dir(self) -> Optional[Path]: return None -class _JSONEncoder(json.JSONEncoder): - def default(self, o): - obj = o - if attrs.has(type(obj)): - extend_attr_output = True - attr_output = attrs.asdict(obj, recurse=not extend_attr_output) - attr_output["__typename__"] = obj.__class__.__name__ - return attr_output - - if isinstance(obj, Enum): - return obj.name - - if isinstance(obj, Path): - return str(obj) - - if isinstance(obj, bytes): - try: - return obj.decode() - except UnicodeDecodeError: - return str(obj) - - logger.error("JSONEncoder met a non-JSON encodable value", obj=obj) - # the usual fail path of custom JSONEncoders is to call the parent and let it fail - # return json.JSONEncoder.default(self, obj) - # instead of failing, just return something usable - return f"Non-JSON encodable value: {obj}" - - -def to_json(obj, indent=" ") -> str: - """Encode any UnBlob object as a serialized JSON.""" - return json.dumps(obj, cls=_JSONEncoder, indent=indent) - - class ExtractError(Exception): """There was an error during extraction.""" diff --git a/python/unblob/processing.py b/python/unblob/processing.py index 64ebf210fd..2509de7c83 100644 --- a/python/unblob/processing.py +++ b/python/unblob/processing.py @@ -242,7 +242,7 @@ def __init__(self, config: ExtractionConfig): self._get_mime_type = magic.Magic(mime=True).from_file def process_task(self, task: Task) -> TaskResult: - result = TaskResult(task) + result = TaskResult(task=task) try: self._process_task(result, task) except Exception as exc: @@ -393,7 +393,7 @@ def _calculate_multifile( task_result.add_report(error_report) logger.warning( "Unhandled Exception during multi file calculation", - **error_report.asdict(), + **error_report.model_dump(), ) def _check_conflicting_files( diff --git a/python/unblob/report.py b/python/unblob/report.py index b902903ce1..ec303b2995 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -1,25 +1,17 @@ +from __future__ import annotations + import hashlib import stat import traceback from enum import Enum from pathlib import Path -from typing import Optional, Union, final - -import attrs - +from typing import Annotated, Any, Literal, Union -@attrs.define(kw_only=True, frozen=True) -class Report: - """A common base class for different reports.""" +from pydantic import BaseModel, ConfigDict, Field, field_serializer - def __attrs_post_init__(self): - for field in attrs.fields(type(self)): - value = getattr(self, field.name) - if isinstance(value, int): - object.__setattr__(self, field.name, int(value)) - def asdict(self) -> dict: - return attrs.asdict(self) +class ReportBase(BaseModel): + """A common base class for different reports. This will enable easy pydantic configuration of all models from a single point in the future if desired.""" class Severity(Enum): @@ -29,56 +21,64 @@ class Severity(Enum): WARNING = "WARNING" -@attrs.define(kw_only=True, frozen=True) -class ErrorReport(Report): +class ErrorReportBase(ReportBase): severity: Severity -def _convert_exception_to_str(obj: Union[str, Exception]) -> str: - if isinstance(obj, str): - return obj - if isinstance(obj, Exception): - e: Exception = obj - return "".join(traceback.format_exception(type(e), e, e.__traceback__)) - raise ValueError("Invalid exception object", obj) +class ErrorReport(ErrorReportBase): + report_type: Literal["ErrorReport"] = "ErrorReport" -@attrs.define(kw_only=True, frozen=True) -class UnknownError(ErrorReport): +class UnknownErrorBase(ErrorReportBase): """Describes an exception raised during file processing.""" - severity: Severity = attrs.field(default=Severity.ERROR) - exception: Union[str, Exception] = attrs.field( # pyright: ignore[reportGeneralTypeIssues] - converter=_convert_exception_to_str - ) - """Exceptions are also formatted at construct time. + severity: Severity = Severity.ERROR + exception: str | Exception - `attrs` is not integrated enough with type checker/LSP provider `pyright` to include converters. + model_config = ConfigDict( + arbitrary_types_allowed=True + ) # Necessary to support Exception type - See: https://www.attrs.org/en/stable/types.html#pyright - """ + def model_post_init(self, __context: Any) -> None: + if isinstance(self.exception, Exception): + self.exception = "".join( + traceback.format_exception( + type(self.exception), self.exception, self.exception.__traceback__ + ) + ) + + """Exceptions are also formatted at construct time.""" + + +class UnknownError(UnknownErrorBase): + """Describes an exception raised during file processing.""" + + report_type: Literal["UnknownError"] = "UnknownError" -@attrs.define(kw_only=True, frozen=True) -class CalculateChunkExceptionReport(UnknownError): +class CalculateChunkExceptionReport(UnknownErrorBase): """Describes an exception raised during calculate_chunk execution.""" start_offset: int # Stored in `str` rather than `Handler`, because the pickle picks ups structs from `C_DEFINITIONS` handler: str + report_type: Literal["CalculateChunkExceptionReport"] = ( + "CalculateChunkExceptionReport" + ) -@attrs.define(kw_only=True, frozen=True) -class CalculateMultiFileExceptionReport(UnknownError): +class CalculateMultiFileExceptionReport(UnknownErrorBase): """Describes an exception raised during calculate_chunk execution.""" path: Path # Stored in `str` rather than `Handler`, because the pickle picks ups structs from `C_DEFINITIONS` handler: str + report_type: Literal["CalculateMultiFileExceptionReport"] = ( + "CalculateMultiFileExceptionReport" + ) -@attrs.define(kw_only=True, frozen=True) -class ExtractCommandFailedReport(ErrorReport): +class ExtractCommandFailedReport(ErrorReportBase): """Describes an error when failed to run the extraction command.""" severity: Severity = Severity.WARNING @@ -86,57 +86,69 @@ class ExtractCommandFailedReport(ErrorReport): stdout: bytes stderr: bytes exit_code: int + report_type: Literal["ExtractCommandFailedReport"] = "ExtractCommandFailedReport" + + # In case there is any strange encoding in stdout/stderr, convert them to str when serializing + @field_serializer("stdout") + def stdout_to_str(self, v: bytes, _info): + return str(v) + @field_serializer("stderr") + def stderr_to_str(self, v: bytes, _info): + return str(v) -@attrs.define(kw_only=True, frozen=True) -class OutputDirectoryExistsReport(ErrorReport): + +class OutputDirectoryExistsReport(ErrorReportBase): severity: Severity = Severity.ERROR path: Path + report_type: Literal["OutputDirectoryExistsReport"] = "OutputDirectoryExistsReport" -@attrs.define(kw_only=True, frozen=True) -class ExtractorDependencyNotFoundReport(ErrorReport): +class ExtractorDependencyNotFoundReport(ErrorReportBase): """Describes an error when the dependency of an extractor doesn't exist.""" severity: Severity = Severity.ERROR dependencies: list[str] + report_type: Literal["ExtractorDependencyNotFoundReport"] = ( + "ExtractorDependencyNotFoundReport" + ) -@attrs.define(kw_only=True, frozen=True) -class ExtractorTimedOut(ErrorReport): +class ExtractorTimedOut(ErrorReportBase): """Describes an error when the extractor execution timed out.""" severity: Severity = Severity.ERROR cmd: str timeout: float + report_type: Literal["ExtractorTimedOut"] = "ExtractorTimedOut" -@attrs.define(kw_only=True, frozen=True) -class MaliciousSymlinkRemoved(ErrorReport): +class MaliciousSymlinkRemoved(ErrorReportBase): """Describes an error when malicious symlinks have been removed from disk.""" severity: Severity = Severity.WARNING link: str target: str + report_type: Literal["MaliciousSymlinkRemoved"] = "MaliciousSymlinkRemoved" -@attrs.define(kw_only=True, frozen=True) -class MultiFileCollisionReport(ErrorReport): +class MultiFileCollisionReport(ErrorReportBase): """Describes an error when MultiFiles collide on the same file.""" severity: Severity = Severity.ERROR paths: set[Path] handler: str + report_type: Literal["MultiFileCollisionReport"] = "MultiFileCollisionReport" -@attrs.define(kw_only=True, frozen=True) -class StatReport(Report): +class StatReport(ReportBase): path: Path size: int is_dir: bool is_file: bool is_link: bool - link_target: Optional[Path] + link_target: Path | None + report_type: Literal["StatReport"] = "StatReport" @classmethod def from_path(cls, path: Path): @@ -157,11 +169,11 @@ def from_path(cls, path: Path): ) -@attrs.define(kw_only=True, frozen=True) -class HashReport(Report): +class HashReport(ReportBase): md5: str sha1: str sha256: str + report_type: Literal["HashReport"] = "HashReport" @classmethod def from_path(cls, path: Path): @@ -183,14 +195,13 @@ def from_path(cls, path: Path): ) -@attrs.define(kw_only=True, frozen=True) -class FileMagicReport(Report): +class FileMagicReport(ReportBase): magic: str mime_type: str + report_type: Literal["FileMagicReport"] = "FileMagicReport" -@attrs.define(kw_only=True, frozen=True) -class RandomnessMeasurements: +class RandomnessMeasurements(BaseModel): percentages: list[float] block_size: int mean: float @@ -204,15 +215,13 @@ def lowest(self): return min(self.percentages) -@attrs.define(kw_only=True, frozen=True) -class RandomnessReport(Report): +class RandomnessReport(ReportBase): shannon: RandomnessMeasurements chi_square: RandomnessMeasurements + report_type: Literal["RandomnessReport"] = "RandomnessReport" -@final -@attrs.define(kw_only=True, frozen=True) -class ChunkReport(Report): +class ChunkReport(ReportBase): id: str handler_name: str start_offset: int @@ -220,35 +229,33 @@ class ChunkReport(Report): size: int is_encrypted: bool extraction_reports: list[Report] + report_type: Literal["ChunkReport"] = "ChunkReport" -@final -@attrs.define(kw_only=True, frozen=True) -class UnknownChunkReport(Report): +class UnknownChunkReport(ReportBase): id: str start_offset: int end_offset: int size: int - randomness: Optional[RandomnessReport] + randomness: RandomnessReport | None + report_type: Literal["UnknownChunkReport"] = "UnknownChunkReport" -@attrs.define(kw_only=True, frozen=True) -class CarveDirectoryReport(Report): +class CarveDirectoryReport(ReportBase): carve_dir: Path + report_type: Literal["CarveDirectoryReport"] = "CarveDirectoryReport" -@final -@attrs.define(kw_only=True, frozen=True) -class MultiFileReport(Report): +class MultiFileReport(ReportBase): id: str handler_name: str name: str paths: list[Path] extraction_reports: list[Report] + report_type: Literal["MultiFileReport"] = "MultiFileReport" -@attrs.define(kw_only=True, frozen=True) -class ExtractionProblem(Report): +class ExtractionProblemBase(ReportBase): """A non-fatal problem discovered during extraction. A report like this still means, that the extraction was successful, @@ -265,7 +272,7 @@ class ExtractionProblem(Report): problem: str resolution: str - path: Optional[str] = None + path: str | None = None @property def log_msg(self): @@ -275,9 +282,27 @@ def log_with(self, logger): logger.warning(self.log_msg, path=self.path) -@attrs.define(kw_only=True, frozen=True) -class PathTraversalProblem(ExtractionProblem): +class ExtractionProblem(ExtractionProblemBase): + """A non-fatal problem discovered during extraction. + + A report like this still means, that the extraction was successful, + but there were problems that got resolved. + The output is expected to be complete, with the exception of + the reported path. + + Examples + -------- + - duplicate entries for certain archive formats (tar, zip) + - unsafe symlinks pointing outside of extraction directory + + """ + + report_type: Literal["ExtractionProblem"] = "ExtractionProblem" + + +class PathTraversalProblem(ExtractionProblemBase): extraction_path: str + report_type: Literal["PathTraversalProblem"] = "PathTraversalProblem" def log_with(self, logger): logger.warning( @@ -287,18 +312,49 @@ def log_with(self, logger): ) -@attrs.define(kw_only=True, frozen=True) -class LinkExtractionProblem(ExtractionProblem): +class LinkExtractionProblem(ExtractionProblemBase): link_path: str + report_type: Literal["LinkExtractionProblem"] = "LinkExtractionProblem" def log_with(self, logger): logger.warning(self.log_msg, path=self.path, link_path=self.link_path) -@attrs.define(kw_only=True, frozen=True) -class SpecialFileExtractionProblem(ExtractionProblem): +class SpecialFileExtractionProblem(ExtractionProblemBase): mode: int device: int + report_type: Literal["SpecialFileExtractionProblem"] = ( + "SpecialFileExtractionProblem" + ) def log_with(self, logger): logger.warning(self.log_msg, path=self.path, mode=self.mode, device=self.device) + + +Report = Annotated[ + Union[ + ErrorReport, + UnknownError, + CalculateChunkExceptionReport, + CalculateMultiFileExceptionReport, + ExtractCommandFailedReport, + OutputDirectoryExistsReport, + ExtractorDependencyNotFoundReport, + ExtractorTimedOut, + MaliciousSymlinkRemoved, + MultiFileCollisionReport, + StatReport, + HashReport, + FileMagicReport, + RandomnessReport, + ChunkReport, + UnknownChunkReport, + CarveDirectoryReport, + MultiFileReport, + ExtractionProblem, + PathTraversalProblem, + LinkExtractionProblem, + SpecialFileExtractionProblem, + ], + Field(discriminator="report_type"), +] diff --git a/tests/conftest.py b/tests/conftest.py index 71eb2210bc..ffc503734b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,4 +12,4 @@ @pytest.fixture def task_result(): task = Task(path=Path("/nonexistent"), depth=0, blob_id="") - return TaskResult(task) + return TaskResult(task=task) diff --git a/tests/extractors/test_command.py b/tests/extractors/test_command.py index 158d960367..d99c8f5896 100644 --- a/tests/extractors/test_command.py +++ b/tests/extractors/test_command.py @@ -63,7 +63,7 @@ def test_command_execution_failure(tmpdir: Path): command.extract(Path("input"), outdir) assert list(excinfo.value.reports) == [ - ExtractCommandFailedReport( + ExtractCommandFailedReport.model_construct( command=mock.ANY, stdout=b"stdout", stderr=b"stderr", diff --git a/tests/test_logging.py b/tests/test_logging.py index 9d4a09bece..4d9e5e1d4d 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -42,4 +42,6 @@ def test_UnknownError_can_be_logged(): # noqa: N802 logger = structlog.get_logger() # this line used to trigger an exception: - logger.error("unknown", **UnknownError(exception=Exception("whatever")).asdict()) + logger.error( + "unknown", **UnknownError(exception=Exception("whatever")).model_dump() + ) diff --git a/tests/test_models.py b/tests/test_models.py index c1d44220a0..b59264d4bc 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -4,7 +4,7 @@ import pytest from unblob.file_utils import InvalidInputFormat -from unblob.models import Chunk, ProcessResult, Task, TaskResult, UnknownChunk, to_json +from unblob.models import Chunk, ProcessResult, Task, TaskResult, UnknownChunk from unblob.report import ( ChunkReport, ExtractCommandFailedReport, @@ -116,7 +116,7 @@ def test_validation(self, start_offset, end_offset): class Test_to_json: # noqa: N801 def test_process_result_conversion(self): task = Task(path=Path("/nonexistent"), depth=0, blob_id="") - task_result = TaskResult(task) + task_result = TaskResult(task=task) chunk_id = "test_basic_conversion:id" task_result.add_report( @@ -170,10 +170,9 @@ def test_process_result_conversion(self): decoded_report = json.loads(json_text) assert decoded_report == [ { - "__typename__": "TaskResult", "reports": [ { - "__typename__": "StatReport", + "report_type": "StatReport", "is_dir": False, "is_file": True, "is_link": False, @@ -182,18 +181,18 @@ def test_process_result_conversion(self): "size": 384, }, { - "__typename__": "FileMagicReport", + "report_type": "FileMagicReport", "magic": "Zip archive data, at least v2.0 to extract", "mime_type": "application/zip", }, { - "__typename__": "HashReport", + "report_type": "HashReport", "md5": "9019fcece2433ad7f12c077e84537a74", "sha1": "36998218d8f43b69ef3adcadf2e8979e81eed166", "sha256": "7d7ca7e1410b702b0f85d18257aebb964ac34f7fad0a0328d72e765bfcb21118", }, { - "__typename__": "ChunkReport", + "report_type": "ChunkReport", "end_offset": 384, "extraction_reports": [], "handler_name": "zip", @@ -205,7 +204,6 @@ def test_process_result_conversion(self): ], "subtasks": [ { - "__typename__": "Task", "blob_id": "test_basic_conversion:id", "depth": 314, "is_multi_file": False, @@ -213,7 +211,6 @@ def test_process_result_conversion(self): } ], "task": { - "__typename__": "Task", "blob_id": "", "depth": 0, "is_multi_file": False, @@ -230,16 +227,16 @@ def test_exotic_command_output(self): exit_code=1, ) - json_text = to_json(report) + json_text = report.model_dump_json() decoded_report = json.loads(json_text) assert decoded_report == { - "__typename__": "ExtractCommandFailedReport", + "report_type": "ExtractCommandFailedReport", "command": "dump all bytes", "exit_code": 1, "severity": "WARNING", - "stderr": "stdout is pretty strange ;)", + "stderr": "b'stdout is pretty strange ;)'", "stdout": ( "b'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07" "\\x08\\t\\n\\x0b\\x0c\\r\\x0e\\x0f" diff --git a/tests/test_processing.py b/tests/test_processing.py index 1a141a3ca1..67bd7bb411 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -226,10 +226,10 @@ def test_format_randomness_plot_error(): def test_format_randomness_plot_no_exception(percentages: list[float], block_size: int): assert str(block_size) in format_randomness_plot( RandomnessReport( - shannon=RandomnessMeasurements( + shannon=RandomnessMeasurements.model_construct( percentages=percentages, block_size=block_size, mean=mean(percentages) ), - chi_square=RandomnessMeasurements( + chi_square=RandomnessMeasurements.model_construct( percentages=percentages, block_size=block_size, mean=mean(percentages) ), ) diff --git a/tests/test_report.py b/tests/test_report.py index ecbe79ca5c..85c44d6116 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -121,14 +121,14 @@ def hello_kitty_task_results( sha256="144d8b2c949cb4943128aa0081153bcba4f38eb0ba26119cc06ca1563c4999e1", ), CarveDirectoryReport(carve_dir=extract_root / "hello_kitty_extract"), - UnknownChunkReport( + UnknownChunkReport.model_construct( id=ANY, start_offset=0, end_offset=6, size=6, randomness=None, ), - UnknownChunkReport( + UnknownChunkReport.model_construct( id=ANY, start_offset=131, end_offset=138, @@ -183,7 +183,7 @@ def hello_kitty_task_results( blob_id=kitty_id, ), reports=[ - StatReport( + StatReport.model_construct( path=extract_root / "hello_kitty_extract/138-263.zip_extract", size=ANY, is_dir=True, @@ -236,7 +236,7 @@ def hello_kitty_task_results( blob_id=hello_id, ), reports=[ - StatReport( + StatReport.model_construct( path=extract_root / "hello_kitty_extract/6-131.zip_extract", size=ANY, is_dir=True, @@ -343,7 +343,7 @@ def container_task_results( is_link=False, link_target=None, ), - FileMagicReport( + FileMagicReport.model_construct( magic=ANY, mime_type="application/zip", ), @@ -377,7 +377,7 @@ def container_task_results( blob_id=chunk_id, ), reports=[ - StatReport( + StatReport.model_construct( path=extract_root / "container_extract", size=ANY, is_dir=True, diff --git a/uv.lock b/uv.lock index 2d903c515f..e86544752f 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,15 @@ resolution-markers = [ "python_full_version < '3.10'", ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "appdirs" version = "1.4.4" @@ -1187,6 +1196,130 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, ] +[[package]] +name = "pydantic" +version = "2.11.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817, upload-time = "2025-04-23T18:30:43.919Z" }, + { url = "https://files.pythonhosted.org/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357, upload-time = "2025-04-23T18:30:46.372Z" }, + { url = "https://files.pythonhosted.org/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011, upload-time = "2025-04-23T18:30:47.591Z" }, + { url = "https://files.pythonhosted.org/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730, upload-time = "2025-04-23T18:30:49.328Z" }, + { url = "https://files.pythonhosted.org/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178, upload-time = "2025-04-23T18:30:50.907Z" }, + { url = "https://files.pythonhosted.org/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462, upload-time = "2025-04-23T18:30:52.083Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652, upload-time = "2025-04-23T18:30:53.389Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306, upload-time = "2025-04-23T18:30:54.661Z" }, + { url = "https://files.pythonhosted.org/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720, upload-time = "2025-04-23T18:30:56.11Z" }, + { url = "https://files.pythonhosted.org/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915, upload-time = "2025-04-23T18:30:57.501Z" }, + { url = "https://files.pythonhosted.org/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884, upload-time = "2025-04-23T18:30:58.867Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496, upload-time = "2025-04-23T18:31:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019, upload-time = "2025-04-23T18:31:01.335Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" }, + { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" }, + { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" }, + { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" }, + { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" }, + { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" }, + { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, + { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" }, + { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" }, + { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" }, + { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" }, + { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" }, + { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" }, + { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" }, + { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" }, + { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, + { url = "https://files.pythonhosted.org/packages/53/ea/bbe9095cdd771987d13c82d104a9c8559ae9aec1e29f139e286fd2e9256e/pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d", size = 2028677, upload-time = "2025-04-23T18:32:27.227Z" }, + { url = "https://files.pythonhosted.org/packages/49/1d/4ac5ed228078737d457a609013e8f7edc64adc37b91d619ea965758369e5/pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954", size = 1864735, upload-time = "2025-04-23T18:32:29.019Z" }, + { url = "https://files.pythonhosted.org/packages/23/9a/2e70d6388d7cda488ae38f57bc2f7b03ee442fbcf0d75d848304ac7e405b/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb", size = 1898467, upload-time = "2025-04-23T18:32:31.119Z" }, + { url = "https://files.pythonhosted.org/packages/ff/2e/1568934feb43370c1ffb78a77f0baaa5a8b6897513e7a91051af707ffdc4/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7", size = 1983041, upload-time = "2025-04-23T18:32:33.655Z" }, + { url = "https://files.pythonhosted.org/packages/01/1a/1a1118f38ab64eac2f6269eb8c120ab915be30e387bb561e3af904b12499/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4", size = 2136503, upload-time = "2025-04-23T18:32:35.519Z" }, + { url = "https://files.pythonhosted.org/packages/5c/da/44754d1d7ae0f22d6d3ce6c6b1486fc07ac2c524ed8f6eca636e2e1ee49b/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b", size = 2736079, upload-time = "2025-04-23T18:32:37.659Z" }, + { url = "https://files.pythonhosted.org/packages/4d/98/f43cd89172220ec5aa86654967b22d862146bc4d736b1350b4c41e7c9c03/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3", size = 2006508, upload-time = "2025-04-23T18:32:39.637Z" }, + { url = "https://files.pythonhosted.org/packages/2b/cc/f77e8e242171d2158309f830f7d5d07e0531b756106f36bc18712dc439df/pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a", size = 2113693, upload-time = "2025-04-23T18:32:41.818Z" }, + { url = "https://files.pythonhosted.org/packages/54/7a/7be6a7bd43e0a47c147ba7fbf124fe8aaf1200bc587da925509641113b2d/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782", size = 2074224, upload-time = "2025-04-23T18:32:44.033Z" }, + { url = "https://files.pythonhosted.org/packages/2a/07/31cf8fadffbb03be1cb520850e00a8490c0927ec456e8293cafda0726184/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9", size = 2245403, upload-time = "2025-04-23T18:32:45.836Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8d/bbaf4c6721b668d44f01861f297eb01c9b35f612f6b8e14173cb204e6240/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e", size = 2242331, upload-time = "2025-04-23T18:32:47.618Z" }, + { url = "https://files.pythonhosted.org/packages/bb/93/3cc157026bca8f5006250e74515119fcaa6d6858aceee8f67ab6dc548c16/pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9", size = 1910571, upload-time = "2025-04-23T18:32:49.401Z" }, + { url = "https://files.pythonhosted.org/packages/5b/90/7edc3b2a0d9f0dda8806c04e511a67b0b7a41d2187e2003673a996fb4310/pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3", size = 1956504, upload-time = "2025-04-23T18:32:51.287Z" }, + { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" }, + { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" }, + { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527, upload-time = "2025-04-23T18:32:59.771Z" }, + { url = "https://files.pythonhosted.org/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225, upload-time = "2025-04-23T18:33:04.51Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490, upload-time = "2025-04-23T18:33:06.391Z" }, + { url = "https://files.pythonhosted.org/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525, upload-time = "2025-04-23T18:33:08.44Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446, upload-time = "2025-04-23T18:33:10.313Z" }, + { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" }, + { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" }, + { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" }, + { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" }, + { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, + { url = "https://files.pythonhosted.org/packages/08/98/dbf3fdfabaf81cda5622154fda78ea9965ac467e3239078e0dcd6df159e7/pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101", size = 2024034, upload-time = "2025-04-23T18:33:32.843Z" }, + { url = "https://files.pythonhosted.org/packages/8d/99/7810aa9256e7f2ccd492590f86b79d370df1e9292f1f80b000b6a75bd2fb/pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64", size = 1858578, upload-time = "2025-04-23T18:33:34.912Z" }, + { url = "https://files.pythonhosted.org/packages/d8/60/bc06fa9027c7006cc6dd21e48dbf39076dc39d9abbaf718a1604973a9670/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d", size = 1892858, upload-time = "2025-04-23T18:33:36.933Z" }, + { url = "https://files.pythonhosted.org/packages/f2/40/9d03997d9518816c68b4dfccb88969756b9146031b61cd37f781c74c9b6a/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535", size = 2068498, upload-time = "2025-04-23T18:33:38.997Z" }, + { url = "https://files.pythonhosted.org/packages/d8/62/d490198d05d2d86672dc269f52579cad7261ced64c2df213d5c16e0aecb1/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d", size = 2108428, upload-time = "2025-04-23T18:33:41.18Z" }, + { url = "https://files.pythonhosted.org/packages/9a/ec/4cd215534fd10b8549015f12ea650a1a973da20ce46430b68fc3185573e8/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6", size = 2069854, upload-time = "2025-04-23T18:33:43.446Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1a/abbd63d47e1d9b0d632fee6bb15785d0889c8a6e0a6c3b5a8e28ac1ec5d2/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca", size = 2237859, upload-time = "2025-04-23T18:33:45.56Z" }, + { url = "https://files.pythonhosted.org/packages/80/1c/fa883643429908b1c90598fd2642af8839efd1d835b65af1f75fba4d94fe/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039", size = 2239059, upload-time = "2025-04-23T18:33:47.735Z" }, + { url = "https://files.pythonhosted.org/packages/d4/29/3cade8a924a61f60ccfa10842f75eb12787e1440e2b8660ceffeb26685e7/pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27", size = 2066661, upload-time = "2025-04-23T18:33:49.995Z" }, +] + [[package]] name = "pyfatfs" version = "1.1.0" @@ -1649,6 +1782,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, +] + [[package]] name = "ubi-reader" version = "0.8.10" @@ -1678,6 +1823,7 @@ dependencies = [ { name = "lz4" }, { name = "plotext" }, { name = "pluggy" }, + { name = "pydantic" }, { name = "pyfatfs" }, { name = "pymdown-extensions" }, { name = "pyperscan" }, @@ -1723,6 +1869,7 @@ requires-dist = [ { name = "lz4", specifier = ">=4.3.2,!=4.4.3" }, { name = "plotext", specifier = ">=4.2.0,<6.0" }, { name = "pluggy", specifier = ">=1.3.0" }, + { name = "pydantic", specifier = ">=2.0" }, { name = "pyfatfs", specifier = ">=1.0.5" }, { name = "pymdown-extensions", specifier = ">=10.15" }, { name = "pyperscan", specifier = ">=0.3.0" }, diff --git a/vulture_whitelist.py b/vulture_whitelist.py index 816198be11..b72d86774d 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -14,7 +14,6 @@ HandlerType, SingleFile, TaskResult, - _JSONEncoder, ) from unblob.parser import _HexStringToRegex from unblob.report import ChunkReport, FileMagicReport, StatReport @@ -25,8 +24,6 @@ _HexStringToRegex.range_jump _HexStringToRegex.alternative -_JSONEncoder.default - TaskResult.filter_reports ChunkReport.handler_name FileMagicReport.magic From 204d1a74705f68997744f09d825b89baf1512d27 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Tue, 9 Sep 2025 08:15:11 -0400 Subject: [PATCH 2/9] Added TypeAdapter for deserialization from JSON report. Added test for deserialization. --- python/unblob/models.py | 18 ++++++++++- tests/test_models.py | 70 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/python/unblob/models.py b/python/unblob/models.py index d201f4468a..c498be3657 100644 --- a/python/unblob/models.py +++ b/python/unblob/models.py @@ -8,7 +8,7 @@ from typing import Generic, Optional, TypeVar, Union import attrs -from pydantic import BaseModel +from pydantic import BaseModel, TypeAdapter from structlog import get_logger from .file_utils import Endian, File, InvalidInputFormat, StructParser @@ -287,6 +287,22 @@ def get_output_dir(self) -> Optional[Path]: return None +ReportModel = list[TaskResult] +ReportModelAdapter = TypeAdapter(ReportModel) +"""Use this for deserialization (import JSON report back into Python +objects) of the JSON report. + +For example: + +with open('report.json', 'r') as f: + data: str = f.read() + report_data: ReportModel = ReportModelAdapter.validate_json(data) + +For another example see: +tests/test_models.py::Test_to_json::test_process_result_deserialization +""" + + class ExtractError(Exception): """There was an error during extraction.""" diff --git a/tests/test_models.py b/tests/test_models.py index b59264d4bc..fb92d4dc3f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -4,7 +4,14 @@ import pytest from unblob.file_utils import InvalidInputFormat -from unblob.models import Chunk, ProcessResult, Task, TaskResult, UnknownChunk +from unblob.models import ( + Chunk, + ProcessResult, + ReportModelAdapter, + Task, + TaskResult, + UnknownChunk, +) from unblob.report import ( ChunkReport, ExtractCommandFailedReport, @@ -219,6 +226,67 @@ def test_process_result_conversion(self): }, ] + def test_process_result_deserialization(self): + task = Task(path=Path("/nonexistent"), depth=0, blob_id="") + task_result = TaskResult(task=task) + chunk_id = "test_basic_conversion:id" + + task_result.add_report( + StatReport( + path=task.path, + size=384, + is_dir=False, + is_file=True, + is_link=False, + link_target=None, + ) + ) + task_result.add_report( + FileMagicReport( + magic="Zip archive data, at least v2.0 to extract", + mime_type="application/zip", + ) + ) + task_result.add_report( + HashReport( + md5="9019fcece2433ad7f12c077e84537a74", + sha1="36998218d8f43b69ef3adcadf2e8979e81eed166", + sha256="7d7ca7e1410b702b0f85d18257aebb964ac34f7fad0a0328d72e765bfcb21118", + ) + ) + task_result.add_report( + ChunkReport( + id=chunk_id, + handler_name="zip", + start_offset=0, + end_offset=384, + size=384, + is_encrypted=False, + extraction_reports=[], + ) + ) + task_result.add_subtask( + Task( + path=Path("/extractions/nonexistent_extract"), + depth=314, + blob_id=chunk_id, + ) + ) + + process_result = ProcessResult(results=[task_result]) + + json_text = process_result.to_json() + + # output must be a valid json string + assert isinstance(json_text, str) + + # deserialize using ReportModel TypeAdapter + report_data = ReportModelAdapter.validate_json(json_text) + + # convert to ProcessResult object and compare + deserialized_process_result = ProcessResult(results=report_data) + assert process_result == deserialized_process_result + def test_exotic_command_output(self): report = ExtractCommandFailedReport( command="dump all bytes", From b1858f1dfe1dd37db9aca5433cf638fe6f2e3115 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Tue, 9 Sep 2025 21:00:48 -0400 Subject: [PATCH 3/9] Minor update to documentation on TypeAdapter. Fixed Pydantic definitions to support Python 3.9. --- python/unblob/models.py | 4 ++-- python/unblob/report.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/unblob/models.py b/python/unblob/models.py index c498be3657..01bd7b41a4 100644 --- a/python/unblob/models.py +++ b/python/unblob/models.py @@ -295,8 +295,8 @@ def get_output_dir(self) -> Optional[Path]: For example: with open('report.json', 'r') as f: - data: str = f.read() - report_data: ReportModel = ReportModelAdapter.validate_json(data) + data = f.read() + report_data = ReportModelAdapter.validate_json(data) For another example see: tests/test_models.py::Test_to_json::test_process_result_deserialization diff --git a/python/unblob/report.py b/python/unblob/report.py index ec303b2995..9c55106eaf 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -5,7 +5,7 @@ import traceback from enum import Enum from pathlib import Path -from typing import Annotated, Any, Literal, Union +from typing import Annotated, Any, Literal, Optional, Union from pydantic import BaseModel, ConfigDict, Field, field_serializer @@ -33,7 +33,7 @@ class UnknownErrorBase(ErrorReportBase): """Describes an exception raised during file processing.""" severity: Severity = Severity.ERROR - exception: str | Exception + exception: Union[str, Exception] model_config = ConfigDict( arbitrary_types_allowed=True @@ -147,7 +147,7 @@ class StatReport(ReportBase): is_dir: bool is_file: bool is_link: bool - link_target: Path | None + link_target: Optional[Path] report_type: Literal["StatReport"] = "StatReport" @classmethod @@ -237,7 +237,7 @@ class UnknownChunkReport(ReportBase): start_offset: int end_offset: int size: int - randomness: RandomnessReport | None + randomness: Optional[RandomnessReport] report_type: Literal["UnknownChunkReport"] = "UnknownChunkReport" @@ -272,7 +272,7 @@ class ExtractionProblemBase(ReportBase): problem: str resolution: str - path: str | None = None + path: Optional[str] = None @property def log_msg(self): From 26d58ea28d5e46c45f8215babe566ae69e695415 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Wed, 10 Sep 2025 07:58:34 -0400 Subject: [PATCH 4/9] Refactored encode and decode for ExtractCommandFailedReport. --- python/unblob/report.py | 19 +++++++++++-------- tests/test_models.py | 38 ++++---------------------------------- 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/python/unblob/report.py b/python/unblob/report.py index 9c55106eaf..fb208ba0fe 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 import hashlib import stat import traceback @@ -7,7 +8,7 @@ from pathlib import Path from typing import Annotated, Any, Literal, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, field_serializer +from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator class ReportBase(BaseModel): @@ -88,14 +89,16 @@ class ExtractCommandFailedReport(ErrorReportBase): exit_code: int report_type: Literal["ExtractCommandFailedReport"] = "ExtractCommandFailedReport" - # In case there is any strange encoding in stdout/stderr, convert them to str when serializing - @field_serializer("stdout") - def stdout_to_str(self, v: bytes, _info): - return str(v) + # Use base64 to encode and decode bytes data in case there are non-standard characters + @field_serializer("stdout", "stderr") + def encode_bytes(self, v: bytes, _): + return base64.b64encode(v).decode("ascii") - @field_serializer("stderr") - def stderr_to_str(self, v: bytes, _info): - return str(v) + @field_validator("stdout", "stderr", mode="before") + def decode_bytes(cls: ExtractCommandFailedReport, v: Any): + if isinstance(v, str): + return base64.b64decode(v) + return v class OutputDirectoryExistsReport(ErrorReportBase): diff --git a/tests/test_models.py b/tests/test_models.py index fb92d4dc3f..454b922672 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -297,38 +297,8 @@ def test_exotic_command_output(self): json_text = report.model_dump_json() - decoded_report = json.loads(json_text) + decoded_report = ExtractCommandFailedReport.model_validate_json(json_text) - assert decoded_report == { - "report_type": "ExtractCommandFailedReport", - "command": "dump all bytes", - "exit_code": 1, - "severity": "WARNING", - "stderr": "b'stdout is pretty strange ;)'", - "stdout": ( - "b'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07" - "\\x08\\t\\n\\x0b\\x0c\\r\\x0e\\x0f" - "\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17" - '\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f !"#' - "$%&\\'()*+,-./0123456789:;<=>?@AB" - "CDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`a" - "bcdefghijklmnopqrstuvwxyz{|}~\\x7f" - "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87" - "\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f" - "\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97" - "\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f" - "\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7" - "\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf" - "\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7" - "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf" - "\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7" - "\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf" - "\\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7" - "\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf" - "\\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7" - "\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef" - "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7" - "\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff" - "'" - ), - } + assert decoded_report == report + + decoded_report = json.loads(json_text) From 93baab823246abc591dc7a47863828490d35c7d7 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Wed, 10 Sep 2025 08:13:29 -0400 Subject: [PATCH 5/9] Updated vulture whitelist with exceptions for Pydantic models in report. --- python/unblob/report.py | 2 +- vulture_whitelist.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/python/unblob/report.py b/python/unblob/report.py index fb208ba0fe..2c6f34086f 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -40,7 +40,7 @@ class UnknownErrorBase(ErrorReportBase): arbitrary_types_allowed=True ) # Necessary to support Exception type - def model_post_init(self, __context: Any) -> None: + def model_post_init(self, _: Any) -> None: if isinstance(self.exception, Exception): self.exception = "".join( traceback.format_exception( diff --git a/vulture_whitelist.py b/vulture_whitelist.py index b72d86774d..7443fc9b8c 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -12,11 +12,18 @@ Handler, HandlerDoc, HandlerType, + ReportModelAdapter, SingleFile, TaskResult, ) from unblob.parser import _HexStringToRegex -from unblob.report import ChunkReport, FileMagicReport, StatReport +from unblob.report import ( + ChunkReport, + ExtractCommandFailedReport, + FileMagicReport, + StatReport, + UnknownErrorBase, +) _HexStringToRegex.literal _HexStringToRegex.wildcard @@ -66,3 +73,13 @@ generate_markdown Handler.DOC + +ReportModelAdapter + +report_type + +UnknownErrorBase.model_config +UnknownErrorBase.model_post_init + +ExtractCommandFailedReport.encode_bytes +ExtractCommandFailedReport.decode_bytes From 7300fc4e0c5ea386ffd63c7149b99282fd5603b8 Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Wed, 10 Sep 2025 08:33:24 -0400 Subject: [PATCH 6/9] Fixed minor formatting issues related to pre-commit checks and refactored vulture whitelisting for report. --- python/unblob/report.py | 5 ++++- vulture_whitelist.py | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/python/unblob/report.py b/python/unblob/report.py index 2c6f34086f..5b572bc93b 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -1,3 +1,5 @@ +# ruff: noqa: UP007,UP045 + from __future__ import annotations import base64 @@ -95,7 +97,8 @@ def encode_bytes(self, v: bytes, _): return base64.b64encode(v).decode("ascii") @field_validator("stdout", "stderr", mode="before") - def decode_bytes(cls: ExtractCommandFailedReport, v: Any): + @classmethod + def decode_bytes(cls, v: Any): if isinstance(v, str): return base64.b64decode(v) return v diff --git a/vulture_whitelist.py b/vulture_whitelist.py index 7443fc9b8c..ed978e7961 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -1,6 +1,8 @@ # pyright: reportUnusedExpression=false # ruff: noqa: B018 +import importlib +import inspect import sys import unblob.plugins @@ -76,10 +78,13 @@ ReportModelAdapter -report_type - UnknownErrorBase.model_config UnknownErrorBase.model_post_init ExtractCommandFailedReport.encode_bytes ExtractCommandFailedReport.decode_bytes + +module = importlib.import_module("unblob.report") +for _, obj in inspect.getmembers(module, inspect.isclass): + if hasattr(obj, "report_type"): + obj.report_type From 81931240fe8fa8a0fc87bd1ad0e0dad76109a81b Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Wed, 10 Sep 2025 08:55:16 -0400 Subject: [PATCH 7/9] Resolved typing issues in file_utils.py --- python/unblob/file_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/unblob/file_utils.py b/python/unblob/file_utils.py index d0e64d54db..28351caf3e 100644 --- a/python/unblob/file_utils.py +++ b/python/unblob/file_utils.py @@ -491,7 +491,15 @@ def __init__(self, root: Path): self.root = root.resolve() self.problems = [] - def record_problem(self, problem: ExtractionProblem): + def record_problem( + self, + problem: Union[ + ExtractionProblem, + LinkExtractionProblem, + PathTraversalProblem, + SpecialFileExtractionProblem, + ], + ): self.problems.append(problem) problem.log_with(logger) From e637ab62b873c08e42f68daabe4d09a1395602dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Thu, 11 Sep 2025 16:34:53 +0200 Subject: [PATCH 8/9] Review suggestions --- docs/guide.md | 24 ++++--- python/unblob/models.py | 12 ++-- python/unblob/report.py | 156 ++++++++++++++++------------------------ tests/test_models.py | 8 +-- vulture_whitelist.py | 13 +--- 5 files changed, 87 insertions(+), 126 deletions(-) diff --git a/docs/guide.md b/docs/guide.md index 88e53d9abb..61e98048b0 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -81,12 +81,12 @@ $ cat alpine-report.json "is_file": true, "is_link": false, "link_target": null, - "report_type": "StatReport" + "__typename__": "StatReport" }, { "magic": "gzip compressed data, max compression, from Unix, original size modulo 2^32 5816320\\012- data", "mime_type": "application/gzip", - "report_type": "FileMagicReport" + "__typename__": "FileMagicReport" }, { "id": "13590:1", @@ -96,16 +96,18 @@ $ cat alpine-report.json "size": 2711958, "is_encrypted": false, "extraction_reports": [], - "report_type": "ChunkReport" + "__typename__": "ChunkReport" } ], "subtasks": [ { "path": "/home/walkman/Projects/unblob/demo/alpine-minirootfs-3.16.1-x86_64.tar.gz_extract", "depth": 1, - "chunk_id": "13590:1" + "chunk_id": "13590:1", + "__typename__": "Task" } - ] + ], + "__typename__": "TaskResult" }, ... ] @@ -141,7 +143,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=97.88 lowest=3.17 mean=52.76 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••♰••••••••••••••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰ ♰♰♰♰ ♰ ♰ ♰ │ @@ -155,8 +157,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰ ♰♰ │ 0┤ ♰ ♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes path=unknown-file_extract/0-10485760.unknown pid=1963719 2024-10-30 10:52.03 [info ] Extracting unknown chunk chunk=0xc96196-0x1696196 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 2024-10-30 10:52.03 [debug ] Carving chunk path=unknown-file_extract/13197718-23683478.unknown pid=1963719 @@ -164,7 +166,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=99.03 lowest=0.23 mean=42.62 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••••••••••••••♰••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰♰ ♰ │ @@ -178,8 +180,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰♰ ♰♰ ♰♰ ♰ ♰ ♰ │ 0┤ ♰ ♰ ♰♰ ♰ ♰♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes ``` ### Skip extraction with file magic diff --git a/python/unblob/models.py b/python/unblob/models.py index 01bd7b41a4..860ab9ce65 100644 --- a/python/unblob/models.py +++ b/python/unblob/models.py @@ -17,7 +17,7 @@ from .report import ( CarveDirectoryReport, ChunkReport, - ErrorReportBase, + ErrorReport, MultiFileReport, RandomnessReport, Report, @@ -247,20 +247,18 @@ class ProcessResult(BaseModel): results: list[TaskResult] = [] @property - def errors(self) -> list[ErrorReportBase]: + def errors(self) -> list[ErrorReport]: reports = itertools.chain.from_iterable(r.reports for r in self.results) interesting_reports = ( - r for r in reports if isinstance(r, (ErrorReportBase, ChunkReport)) + r for r in reports if isinstance(r, (ErrorReport, ChunkReport)) ) errors = [] for report in interesting_reports: - if isinstance(report, ErrorReportBase): + if isinstance(report, ErrorReport): errors.append(report) else: errors.extend( - r - for r in report.extraction_reports - if isinstance(r, ErrorReportBase) + r for r in report.extraction_reports if isinstance(r, ErrorReport) ) return errors diff --git a/python/unblob/report.py b/python/unblob/report.py index 5b572bc93b..3c28db8cda 100644 --- a/python/unblob/report.py +++ b/python/unblob/report.py @@ -8,14 +8,27 @@ import traceback from enum import Enum from pathlib import Path -from typing import Annotated, Any, Literal, Optional, Union +from typing import Annotated, Any, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator +from pydantic import ( + BaseModel, + ConfigDict, + Discriminator, + Tag, + computed_field, + field_serializer, + field_validator, +) class ReportBase(BaseModel): """A common base class for different reports. This will enable easy pydantic configuration of all models from a single point in the future if desired.""" + @computed_field + @property + def __typename__(self) -> str: + return self.__class__.__name__ + class Severity(Enum): """Represents possible problems encountered during execution.""" @@ -24,15 +37,11 @@ class Severity(Enum): WARNING = "WARNING" -class ErrorReportBase(ReportBase): +class ErrorReport(ReportBase): severity: Severity -class ErrorReport(ErrorReportBase): - report_type: Literal["ErrorReport"] = "ErrorReport" - - -class UnknownErrorBase(ErrorReportBase): +class UnknownError(ErrorReport): """Describes an exception raised during file processing.""" severity: Severity = Severity.ERROR @@ -53,35 +62,23 @@ def model_post_init(self, _: Any) -> None: """Exceptions are also formatted at construct time.""" -class UnknownError(UnknownErrorBase): - """Describes an exception raised during file processing.""" - - report_type: Literal["UnknownError"] = "UnknownError" - - -class CalculateChunkExceptionReport(UnknownErrorBase): +class CalculateChunkExceptionReport(UnknownError): """Describes an exception raised during calculate_chunk execution.""" start_offset: int # Stored in `str` rather than `Handler`, because the pickle picks ups structs from `C_DEFINITIONS` handler: str - report_type: Literal["CalculateChunkExceptionReport"] = ( - "CalculateChunkExceptionReport" - ) -class CalculateMultiFileExceptionReport(UnknownErrorBase): +class CalculateMultiFileExceptionReport(UnknownError): """Describes an exception raised during calculate_chunk execution.""" path: Path # Stored in `str` rather than `Handler`, because the pickle picks ups structs from `C_DEFINITIONS` handler: str - report_type: Literal["CalculateMultiFileExceptionReport"] = ( - "CalculateMultiFileExceptionReport" - ) -class ExtractCommandFailedReport(ErrorReportBase): +class ExtractCommandFailedReport(ErrorReport): """Describes an error when failed to run the extraction command.""" severity: Severity = Severity.WARNING @@ -89,7 +86,6 @@ class ExtractCommandFailedReport(ErrorReportBase): stdout: bytes stderr: bytes exit_code: int - report_type: Literal["ExtractCommandFailedReport"] = "ExtractCommandFailedReport" # Use base64 to encode and decode bytes data in case there are non-standard characters @field_serializer("stdout", "stderr") @@ -104,47 +100,40 @@ def decode_bytes(cls, v: Any): return v -class OutputDirectoryExistsReport(ErrorReportBase): +class OutputDirectoryExistsReport(ErrorReport): severity: Severity = Severity.ERROR path: Path - report_type: Literal["OutputDirectoryExistsReport"] = "OutputDirectoryExistsReport" -class ExtractorDependencyNotFoundReport(ErrorReportBase): +class ExtractorDependencyNotFoundReport(ErrorReport): """Describes an error when the dependency of an extractor doesn't exist.""" severity: Severity = Severity.ERROR dependencies: list[str] - report_type: Literal["ExtractorDependencyNotFoundReport"] = ( - "ExtractorDependencyNotFoundReport" - ) -class ExtractorTimedOut(ErrorReportBase): +class ExtractorTimedOut(ErrorReport): """Describes an error when the extractor execution timed out.""" severity: Severity = Severity.ERROR cmd: str timeout: float - report_type: Literal["ExtractorTimedOut"] = "ExtractorTimedOut" -class MaliciousSymlinkRemoved(ErrorReportBase): +class MaliciousSymlinkRemoved(ErrorReport): """Describes an error when malicious symlinks have been removed from disk.""" severity: Severity = Severity.WARNING link: str target: str - report_type: Literal["MaliciousSymlinkRemoved"] = "MaliciousSymlinkRemoved" -class MultiFileCollisionReport(ErrorReportBase): +class MultiFileCollisionReport(ErrorReport): """Describes an error when MultiFiles collide on the same file.""" severity: Severity = Severity.ERROR paths: set[Path] handler: str - report_type: Literal["MultiFileCollisionReport"] = "MultiFileCollisionReport" class StatReport(ReportBase): @@ -154,7 +143,6 @@ class StatReport(ReportBase): is_file: bool is_link: bool link_target: Optional[Path] - report_type: Literal["StatReport"] = "StatReport" @classmethod def from_path(cls, path: Path): @@ -179,7 +167,6 @@ class HashReport(ReportBase): md5: str sha1: str sha256: str - report_type: Literal["HashReport"] = "HashReport" @classmethod def from_path(cls, path: Path): @@ -204,7 +191,6 @@ def from_path(cls, path: Path): class FileMagicReport(ReportBase): magic: str mime_type: str - report_type: Literal["FileMagicReport"] = "FileMagicReport" class RandomnessMeasurements(BaseModel): @@ -224,7 +210,6 @@ def lowest(self): class RandomnessReport(ReportBase): shannon: RandomnessMeasurements chi_square: RandomnessMeasurements - report_type: Literal["RandomnessReport"] = "RandomnessReport" class ChunkReport(ReportBase): @@ -235,7 +220,6 @@ class ChunkReport(ReportBase): size: int is_encrypted: bool extraction_reports: list[Report] - report_type: Literal["ChunkReport"] = "ChunkReport" class UnknownChunkReport(ReportBase): @@ -244,12 +228,10 @@ class UnknownChunkReport(ReportBase): end_offset: int size: int randomness: Optional[RandomnessReport] - report_type: Literal["UnknownChunkReport"] = "UnknownChunkReport" class CarveDirectoryReport(ReportBase): carve_dir: Path - report_type: Literal["CarveDirectoryReport"] = "CarveDirectoryReport" class MultiFileReport(ReportBase): @@ -258,10 +240,9 @@ class MultiFileReport(ReportBase): name: str paths: list[Path] extraction_reports: list[Report] - report_type: Literal["MultiFileReport"] = "MultiFileReport" -class ExtractionProblemBase(ReportBase): +class ExtractionProblem(ReportBase): """A non-fatal problem discovered during extraction. A report like this still means, that the extraction was successful, @@ -288,27 +269,8 @@ def log_with(self, logger): logger.warning(self.log_msg, path=self.path) -class ExtractionProblem(ExtractionProblemBase): - """A non-fatal problem discovered during extraction. - - A report like this still means, that the extraction was successful, - but there were problems that got resolved. - The output is expected to be complete, with the exception of - the reported path. - - Examples - -------- - - duplicate entries for certain archive formats (tar, zip) - - unsafe symlinks pointing outside of extraction directory - - """ - - report_type: Literal["ExtractionProblem"] = "ExtractionProblem" - - -class PathTraversalProblem(ExtractionProblemBase): +class PathTraversalProblem(ExtractionProblem): extraction_path: str - report_type: Literal["PathTraversalProblem"] = "PathTraversalProblem" def log_with(self, logger): logger.warning( @@ -318,49 +280,55 @@ def log_with(self, logger): ) -class LinkExtractionProblem(ExtractionProblemBase): +class LinkExtractionProblem(ExtractionProblem): link_path: str - report_type: Literal["LinkExtractionProblem"] = "LinkExtractionProblem" def log_with(self, logger): logger.warning(self.log_msg, path=self.path, link_path=self.link_path) -class SpecialFileExtractionProblem(ExtractionProblemBase): +class SpecialFileExtractionProblem(ExtractionProblem): mode: int device: int - report_type: Literal["SpecialFileExtractionProblem"] = ( - "SpecialFileExtractionProblem" - ) def log_with(self, logger): logger.warning(self.log_msg, path=self.path, mode=self.mode, device=self.device) +def _get_report_type(report: dict | ReportBase): + if isinstance(report, dict): + return report.get("__typename__") + return report.__typename__ + + Report = Annotated[ Union[ - ErrorReport, - UnknownError, - CalculateChunkExceptionReport, - CalculateMultiFileExceptionReport, - ExtractCommandFailedReport, - OutputDirectoryExistsReport, - ExtractorDependencyNotFoundReport, - ExtractorTimedOut, - MaliciousSymlinkRemoved, - MultiFileCollisionReport, - StatReport, - HashReport, - FileMagicReport, - RandomnessReport, - ChunkReport, - UnknownChunkReport, - CarveDirectoryReport, - MultiFileReport, - ExtractionProblem, - PathTraversalProblem, - LinkExtractionProblem, - SpecialFileExtractionProblem, + Annotated[ErrorReport, Tag("ErrorReport")], + Annotated[UnknownError, Tag("UnknownError")], + Annotated[CalculateChunkExceptionReport, Tag("CalculateChunkExceptionReport")], + Annotated[ + CalculateMultiFileExceptionReport, Tag("CalculateMultiFileExceptionReport") + ], + Annotated[ExtractCommandFailedReport, Tag("ExtractCommandFailedReport")], + Annotated[OutputDirectoryExistsReport, Tag("OutputDirectoryExistsReport")], + Annotated[ + ExtractorDependencyNotFoundReport, Tag("ExtractorDependencyNotFoundReport") + ], + Annotated[ExtractorTimedOut, Tag("ExtractorTimedOut")], + Annotated[MaliciousSymlinkRemoved, Tag("MaliciousSymlinkRemoved")], + Annotated[MultiFileCollisionReport, Tag("MultiFileCollisionReport")], + Annotated[StatReport, Tag("StatReport")], + Annotated[HashReport, Tag("HashReport")], + Annotated[FileMagicReport, Tag("FileMagicReport")], + Annotated[RandomnessReport, Tag("RandomnessReport")], + Annotated[ChunkReport, Tag("ChunkReport")], + Annotated[UnknownChunkReport, Tag("UnknownChunkReport")], + Annotated[CarveDirectoryReport, Tag("CarveDirectoryReport")], + Annotated[MultiFileReport, Tag("MultiFileReport")], + Annotated[ExtractionProblem, Tag("ExtractionProblem")], + Annotated[PathTraversalProblem, Tag("PathTraversalProblem")], + Annotated[LinkExtractionProblem, Tag("LinkExtractionProblem")], + Annotated[SpecialFileExtractionProblem, Tag("SpecialFileExtractionProblem")], ], - Field(discriminator="report_type"), + Discriminator(_get_report_type), ] diff --git a/tests/test_models.py b/tests/test_models.py index 454b922672..735090c32b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -179,7 +179,7 @@ def test_process_result_conversion(self): { "reports": [ { - "report_type": "StatReport", + "__typename__": "StatReport", "is_dir": False, "is_file": True, "is_link": False, @@ -188,18 +188,18 @@ def test_process_result_conversion(self): "size": 384, }, { - "report_type": "FileMagicReport", + "__typename__": "FileMagicReport", "magic": "Zip archive data, at least v2.0 to extract", "mime_type": "application/zip", }, { - "report_type": "HashReport", + "__typename__": "HashReport", "md5": "9019fcece2433ad7f12c077e84537a74", "sha1": "36998218d8f43b69ef3adcadf2e8979e81eed166", "sha256": "7d7ca7e1410b702b0f85d18257aebb964ac34f7fad0a0328d72e765bfcb21118", }, { - "report_type": "ChunkReport", + "__typename__": "ChunkReport", "end_offset": 384, "extraction_reports": [], "handler_name": "zip", diff --git a/vulture_whitelist.py b/vulture_whitelist.py index ed978e7961..7e161580f8 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -1,8 +1,6 @@ # pyright: reportUnusedExpression=false # ruff: noqa: B018 -import importlib -import inspect import sys import unblob.plugins @@ -24,7 +22,7 @@ ExtractCommandFailedReport, FileMagicReport, StatReport, - UnknownErrorBase, + UnknownError, ) _HexStringToRegex.literal @@ -78,13 +76,8 @@ ReportModelAdapter -UnknownErrorBase.model_config -UnknownErrorBase.model_post_init +UnknownError.model_config +UnknownError.model_post_init ExtractCommandFailedReport.encode_bytes ExtractCommandFailedReport.decode_bytes - -module = importlib.import_module("unblob.report") -for _, obj in inspect.getmembers(module, inspect.isclass): - if hasattr(obj, "report_type"): - obj.report_type From 965fa39144e9c77d965f4074108d3050c117114c Mon Sep 17 00:00:00 2001 From: Garrett Pence Date: Fri, 12 Sep 2025 10:21:14 -0400 Subject: [PATCH 9/9] Updates related to report model refactor. --- docs/guide.md | 18 ++++++++---------- package.nix | 1 + python/unblob/file_utils.py | 10 +--------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/docs/guide.md b/docs/guide.md index 61e98048b0..fcafe764c0 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -103,11 +103,9 @@ $ cat alpine-report.json { "path": "/home/walkman/Projects/unblob/demo/alpine-minirootfs-3.16.1-x86_64.tar.gz_extract", "depth": 1, - "chunk_id": "13590:1", - "__typename__": "Task" + "chunk_id": "13590:1" } - ], - "__typename__": "TaskResult" + ] }, ... ] @@ -143,7 +141,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=97.88 lowest=3.17 mean=52.76 path=unknown-file_extract/0-10485760.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••♰••••••••••••••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰ ♰♰♰♰ ♰ ♰ ♰ │ @@ -157,8 +155,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰ ♰♰ │ 0┤ ♰ ♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes path=unknown-file_extract/0-10485760.unknown pid=1963719 2024-10-30 10:52.03 [info ] Extracting unknown chunk chunk=0xc96196-0x1696196 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 2024-10-30 10:52.03 [debug ] Carving chunk path=unknown-file_extract/13197718-23683478.unknown pid=1963719 @@ -166,7 +164,7 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 2024-10-30 10:52.03 [debug ] Shannon entropy calculated block_size=0x20000 highest=99.99 lowest=99.98 mean=99.98 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Chi square probability calculated block_size=0x20000 highest=99.03 lowest=0.23 mean=42.62 path=unknown-file_extract/13197718-23683478.unknown pid=1963719 size=0xa00000 2024-10-30 10:52.03 [debug ] Entropy chart chart= - Randomness distribution + Randomness distribution ┌───────────────────────────────────────────────────────────────────────────┐ 100┤ •• Shannon entropy (%) •••••••••••••••••••••♰••••••••••••••••••••••│ 90┤ ♰♰ Chi square probability (%) ♰ ♰♰ ♰ │ @@ -180,8 +178,8 @@ $ unblob -vvv unknown-file | grep -C 15 "Entropy distribution" 10┤ ♰ ♰ ♰ ♰ ♰ ♰ ♰ ♰♰ ♰ ♰♰ ♰♰ ♰♰ ♰ ♰ ♰ │ 0┤ ♰ ♰ ♰♰ ♰ ♰♰ │ └─┬──┬─┬──┬────┬───┬──┬──┬──┬───┬───┬──┬────┬───┬────┬──┬──┬────┬──┬───┬──┬─┘ - 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 - 131072 bytes + 0 2 5 7 11 16 20 23 27 30 34 38 42 47 51 56 60 63 68 71 76 79 + 131072 bytes ``` ### Skip extraction with file magic diff --git a/package.nix b/package.nix index 645aea93bd..e447594494 100644 --- a/package.nix +++ b/package.nix @@ -86,6 +86,7 @@ python3.pkgs.buildPythonApplication { python3.pkgs.lz4 # shadowed by pkgs.lz4 plotext pluggy + pydantic pyfatfs pymdown-extensions pyperscan diff --git a/python/unblob/file_utils.py b/python/unblob/file_utils.py index 28351caf3e..d0e64d54db 100644 --- a/python/unblob/file_utils.py +++ b/python/unblob/file_utils.py @@ -491,15 +491,7 @@ def __init__(self, root: Path): self.root = root.resolve() self.problems = [] - def record_problem( - self, - problem: Union[ - ExtractionProblem, - LinkExtractionProblem, - PathTraversalProblem, - SpecialFileExtractionProblem, - ], - ): + def record_problem(self, problem: ExtractionProblem): self.problems.append(problem) problem.log_with(logger)