In [None]:
from pathlib import Path
import pandas as pd

BASE_DIR = Path("/content")

print("üìÇ Files in /content")
print("="*70)

all_files = sorted(BASE_DIR.iterdir())

for f in all_files:
    size_kb = f.stat().st_size / 1024
    print(f"- {f.name:45s} | {size_kb:8.2f} KB")

print("="*70)

# Separate parquet & csv
parquet_files = [f for f in all_files if f.suffix == ".parquet"]
csv_files = [f for f in all_files if f.suffix == ".csv"]

print(f"üß± Parquet files found: {len(parquet_files)}")
for f in parquet_files:
    print("  ‚úî", f.name)

print("-"*70)
print(f"üìÑ CSV files found: {len(csv_files)}")
for f in csv_files[:10]:
    print("  -", f.name)
if len(csv_files) > 10:
    print("  ... (showing first 10 only)")

print("="*70)

# Inspect parquet schemas safely
def inspect_parquet(path):
    df = pd.read_parquet(path)
    print(f"\nüìò {path.name}")
    print("-"*50)
    print("Shape:", df.shape)
    print("Columns:")
    for c, t in df.dtypes.items():
        print(f"  {c:15s} ‚Üí {t}")
    return df

dataframes = {}

for pq in parquet_files:
    try:
        dataframes[pq.name] = inspect_parquet(pq)
    except Exception as e:
        print(f"‚ùå Failed to read {pq.name}: {e}")

print("\n‚úÖ Inspection complete.")


üìÇ Files in /content
- .config                                       |     4.00 KB
- 00005.csv                                     |    31.54 KB
- 00006.csv                                     |   128.46 KB
- 00007.csv                                     |    31.18 KB
- 1.RFUD_RW9_10_11_12.parquet                   | 48338.64 KB
- B0018_numeric_interpolated.csv                |     9.35 KB
- B0018_numeric_raw.csv                         |     8.99 KB
- OXFORD_checkpoint_features.parquet            |    66.62 KB
- RFUD_RW1_12_ALL_FINAL.parquet                 | 351985.01 KB
- RFUD_RW1_2_7_8.parquet                        | 56767.78 KB
- RFUD_RW3_4_5_6.parquet                        | 51042.70 KB
- sample_data                                   |     4.00 KB
üß± Parquet files found: 5
  ‚úî 1.RFUD_RW9_10_11_12.parquet
  ‚úî OXFORD_checkpoint_features.parquet
  ‚úî RFUD_RW1_12_ALL_FINAL.parquet
  ‚úî RFUD_RW1_2_7_8.parquet
  ‚úî RFUD_RW3_4_5_6.parquet
-----------------------------------

In [None]:
import pyarrow.parquet as pq
import pandas as pd
from pathlib import Path

RFUD_PATH = "/content/RFUD_RW1_12_ALL_FINAL.parquet"
pf = pq.ParquetFile(RFUD_PATH)

print("Row groups:", pf.num_row_groups)


Row groups: 32


In [None]:
features = []

for i in range(pf.num_row_groups):
    print(f"Processing row group {i+1}/{pf.num_row_groups}")

    df = pf.read_row_group(i).to_pandas()

    grp = df.groupby("cycle_id").agg(
        V_mean=("voltage_V", "mean"),
        V_std=("voltage_V", "std"),
        V_min=("voltage_V", "min"),
        V_max=("voltage_V", "max"),
        dV_dt_mean=("voltage_V", lambda x: x.diff().mean()),
        dV_dt_max=("voltage_V", lambda x: x.diff().max()),
        T_mean=("temperature_C", "mean"),
        T_max=("temperature_C", "max"),
        T_delta=("temperature_C", lambda x: x.max() - x.min()),
        duration_s=("time_s", lambda x: x.iloc[-1] - x.iloc[0]),
    ).reset_index()

    features.append(grp)

    del df, grp


Processing row group 1/32
Processing row group 2/32
Processing row group 3/32
Processing row group 4/32
Processing row group 5/32
Processing row group 6/32
Processing row group 7/32
Processing row group 8/32
Processing row group 9/32
Processing row group 10/32
Processing row group 11/32
Processing row group 12/32
Processing row group 13/32
Processing row group 14/32
Processing row group 15/32
Processing row group 16/32
Processing row group 17/32
Processing row group 18/32
Processing row group 19/32
Processing row group 20/32
Processing row group 21/32
Processing row group 22/32
Processing row group 23/32
Processing row group 24/32
Processing row group 25/32
Processing row group 26/32
Processing row group 27/32
Processing row group 28/32
Processing row group 29/32
Processing row group 30/32
Processing row group 31/32
Processing row group 32/32
