In [2]:
from pathlib import Path
import re
import numpy as np
import pandas as pd
import h5py
import sqlite3

In [4]:
output_dir = Path("data_csv"); output_dir.mkdir(exist_ok=True)

def sanitize(name: str) -> str:
    base = name.strip("/").replace("/", "_") or "root"
    return re.sub(r"[^0-9A-Za-z_]", "_", base)

def describe_file(h5_path):
    print(f"Structure of {h5_path}")
    with h5py.File(h5_path, "r") as f:
        def show(name, obj):
            if isinstance(obj, h5py.Dataset):
                print(f"[DATASET] {name} shape={obj.shape} dtype={obj.dtype}")
            elif isinstance(obj, h5py.Group):
                print(f"[GROUP]   {name}")
        f.visititems(show)

def dataset_to_dataframe(ds: h5py.Dataset) -> pd.DataFrame:
    data = ds[()]
    if hasattr(data, "dtype") and data.dtype.names:
        df = pd.DataFrame({k: data[k] for k in data.dtype.names})
    else:
        if data.ndim == 1:
            df = pd.DataFrame({sanitize(ds.name): data})
        elif data.ndim == 2:
            df = pd.DataFrame(data)
            df.columns = [f"c{i}" for i in range(df.shape[1])]
        else:
            raise ValueError(f"Skip non-tabular dataset {ds.name} with ndim={data.ndim}")
    for c in df.columns:
        col = df[c]
        if col.dtype.kind in {"S", "O"}:
            try:
                sample = next((x for x in col.dropna().values if isinstance(x, (bytes, bytearray))), None)
                if isinstance(sample, (bytes, bytearray)):
                    df[c] = col.apply(lambda x: x.decode("utf-8", "ignore") if isinstance(x, (bytes, bytearray)) else x)
            except Exception:
                pass
    return df

def export_all(h5_path, out_dir: Path):
    csv_paths = []
    with h5py.File(h5_path, "r") as f:
        def handle(name, obj):
            if not isinstance(obj, h5py.Dataset):
                return
            tbl = sanitize(name)
            try:
                df = dataset_to_dataframe(obj)
            except Exception as e:
                print(f"- Skipped {name}: {e}")
                return
            csv_path = out_dir / f"{Path(h5_path).stem}_{tbl}.csv"
            df.to_csv(csv_path, index=False)
            csv_paths.append((tbl, str(csv_path)))
            print(f"+ Wrote {csv_path} rows={len(df)} cols={len(df.columns)}")
        f.visititems(handle)
    return csv_paths

In [29]:
data_h5_path = Path("data_h5")
for file in data_h5_path.glob("*.h5"):
    print(f"\nProcessing {file}")
    describe_file(file)
    export_all(file, output_dir)


Processing data_h5/nrshoes_left_stone1.h5
=== Structure of data_h5/nrshoes_left_stone1.h5 ===
[DATASET] sensor_left shape=(218, 209) dtype=float64
+ Wrote data_csv/nrshoes_left_stone1_sensor_left.csv rows=218 cols=209

Processing data_h5/fullsoul_left_wiese1.h5
=== Structure of data_h5/fullsoul_left_wiese1.h5 ===
[DATASET] sensor_left shape=(583, 209) dtype=float64
+ Wrote data_csv/fullsoul_left_wiese1_sensor_left.csv rows=583 cols=209

Processing data_h5/fullsoul_left_wiese_onlyfront.h5
=== Structure of data_h5/fullsoul_left_wiese_onlyfront.h5 ===
[DATASET] sensor_left shape=(465, 209) dtype=float64
+ Wrote data_csv/fullsoul_left_wiese_onlyfront_sensor_left.csv rows=465 cols=209

Processing data_h5/fullsoul_left_wiese0.h5
=== Structure of data_h5/fullsoul_left_wiese0.h5 ===
[DATASET] sensor_left shape=(370, 209) dtype=float64
+ Wrote data_csv/fullsoul_left_wiese0_sensor_left.csv rows=370 cols=209

Processing data_h5/nrshoes_left_onlyfront.h5
=== Structure of data_h5/nrshoes_left_only

In [5]:
db_path = "footsole.sqlite" 

def sanitize(name: str) -> str:
    base = re.sub(r"\.csv$", "", name, flags=re.I)
    base = re.sub(r"[^0-9A-Za-z_]", "_", base)
    return base or "table"

conn = sqlite3.connect(db_path)
for csv_path in output_dir.glob("*.csv"):
    tbl = sanitize(csv_path.name)
    df = pd.read_csv(csv_path)
    df.columns = [re.sub(r"[^0-9A-Za-z_]", "_", str(c)) or "col" for c in df.columns]
    df.to_sql(tbl, conn, if_exists="replace", index=False)
    print(f"Loaded {csv_path} -> {tbl}")
conn.close()

print(f"SQLite ready -> {db_path}")

Loaded data_csv/nrshoes_left_stone1_sensor_left.csv -> nrshoes_left_stone1_sensor_left
Loaded data_csv/fullsoul_left_wiese_onlyfront_sensor_left.csv -> fullsoul_left_wiese_onlyfront_sensor_left
Loaded data_csv/fullsoul_left_stone1_sensor_left.csv -> fullsoul_left_stone1_sensor_left
Loaded data_csv/nrshoes_left_onlyfront_sensor_left.csv -> nrshoes_left_onlyfront_sensor_left
Loaded data_csv/sensor_left.csv -> sensor_left
Loaded data_csv/nrshoes_left_wiese2_sensor_left.csv -> nrshoes_left_wiese2_sensor_left
Loaded data_csv/fullsoul_left_stone2_sensor_left.csv -> fullsoul_left_stone2_sensor_left
Loaded data_csv/nrshoes_left_wood1_sensor_left.csv -> nrshoes_left_wood1_sensor_left
Loaded data_csv/fullsoul_left_wiese0_sensor_left.csv -> fullsoul_left_wiese0_sensor_left
Loaded data_csv/nrshoes_left_stone2_sensor_left.csv -> nrshoes_left_stone2_sensor_left
Loaded data_csv/nrshoes_left_wiese1_sensor_left.csv -> nrshoes_left_wiese1_sensor_left
Loaded data_csv/fullsoul_left_wood1_sensor_left.csv -