From 2ddcf7a564590c974b848f81cbfe46daa3fc948a Mon Sep 17 00:00:00 2001 From: lepy Date: Thu, 25 Jun 2026 15:02:13 +0200 Subject: [PATCH] test(sclass): sdata/sclass/dataframe.py auf 100% Coverage Tests fuer DataFrame (init-Varianten, df-Setter, to_dict/from_dict, to_dataframe, to_parquet/from_parquet(_bytes) inkl. except-Zweige ohne _sdata-attrs). setup.py: parquet-Extra (pyarrow); local-ci installiert .[did,parquet]. --- ci/local-ci.sh | 2 +- setup.py | 1 + tests/test_sclass_dataframe_coverage.py | 82 +++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/test_sclass_dataframe_coverage.py diff --git a/ci/local-ci.sh b/ci/local-ci.sh index 9400ed4..4ce3fb0 100755 --- a/ci/local-ci.sh +++ b/ci/local-ci.sh @@ -28,7 +28,7 @@ fi echo "[ci] installiere/aktualisiere Abhängigkeiten (sdata[did] + Test-Tools)" "$PYBIN" -m pip install --quiet --upgrade pip -"$PYBIN" -m pip install --quiet -e ".[did]" pytest coverage +"$PYBIN" -m pip install --quiet -e ".[did,parquet]" pytest coverage # Testziel: durchgereichte Argumente oder – wenn keine – die komplette Suite. TARGETS=("$@") diff --git a/setup.py b/setup.py index e0be6f5..5ae59fa 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ 'did': ['ecdsa>=0.18', 'base58>=2.1'], 'hdf': ['tables'], 'sql': ['sqlalchemy'], + 'parquet': ['pyarrow'], # sdata.sclass.DataFrame (Parquet-Serialisierung) } setup( diff --git a/tests/test_sclass_dataframe_coverage.py b/tests/test_sclass_dataframe_coverage.py new file mode 100644 index 0000000..09f6060 --- /dev/null +++ b/tests/test_sclass_dataframe_coverage.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +"""Vollständige Abdeckung von sdata/sclass/dataframe.py.""" +import pandas as pd +import pytest + +pytest.importorskip("pyarrow") # Parquet-Backend + +from sdata.metadata import Metadata +from sdata.sclass.dataframe import DataFrame + + +def _df(): + return pd.DataFrame({"weight": [10, 20, 30], "height": [1.5, 1.6, 1.7]}) + + +def test_init_variants(): + cm = {"weight": {"label": "Gewicht", "unit": "kg"}} + a = DataFrame(df=_df(), column_metadata=cm, name="A") + assert isinstance(a.cmd, Metadata) + assert not a.cmdf.empty + # column_metadata als Metadata + b = DataFrame(df=_df(), column_metadata=a.cmd, name="B") + assert isinstance(b.column_metadata, Metadata) + # ungültiges column_metadata -> Warnung + c = DataFrame(df=_df(), column_metadata=123, name="C") + assert isinstance(c.column_metadata, Metadata) + # df=None + d = DataFrame(df=None, name="D") + assert d.df.empty + + +def test_df_setter_index_name_and_columns(): + sdf = DataFrame(name="x") + sdf.df = _df() + assert sdf.df.index.name == "index" + assert sdf.column_metadata.get("weight") is not None + + +def test_to_from_dict_roundtrip(): + sdf = DataFrame(df=_df(), name="rt", description="desc") + d = sdf.to_dict() + assert "parquet_bytes" in d["data"] and "column_metadata" in d["data"] + r = DataFrame.from_dict(d) + assert list(r.df.columns) == ["weight", "height"] + + +def test_to_dataframe_attrs(): + sdf = DataFrame(df=_df(), name="att", description="d") + out = sdf.to_dataframe() + assert "!sdata" in out.attrs + + +def test_to_parquet_bytes_and_from_bytes(): + sdf = DataFrame(df=_df(), name="pq") + raw = sdf.to_parquet() # ohne path -> bytes + assert isinstance(raw, (bytes, bytearray)) + back = DataFrame.from_parquet_bytes(raw) # attrs nicht erhalten -> except-Zweige + assert list(back.df.columns) == ["weight", "height"] + + +def test_to_parquet_file_and_from_parquet(tmp_path): + sdf = DataFrame(df=_df(), name="onfile") + fp = sdf.to_parquet(path=str(tmp_path)) # schreibt .spq + assert fp.endswith(".spq") + loaded = DataFrame.from_parquet(fp) + assert list(loaded.df.columns) == ["weight", "height"] + # nicht existierende Datei -> Exception + with pytest.raises(Exception): + DataFrame.from_parquet(str(tmp_path / "nope.spq")) + + +def test_from_parquet_bytes_without_sdata_attrs(): + raw = _df().to_parquet() # plain, keine _sdata-attrs + back = DataFrame.from_parquet_bytes(raw) # attrs None -> except-Zweige + assert list(back.df.columns) == ["weight", "height"] + + +def test_from_parquet_without_sdata_attrs(tmp_path): + fp = str(tmp_path / "plain.spq") + _df().to_parquet(fp) + loaded = DataFrame.from_parquet(fp) # attrs None -> except-Zweige + assert list(loaded.df.columns) == ["weight", "height"]