In [None]:
# Install compatible versions
print("Installing dependencies...")
!pip install -q numpy==1.26.4
!pip install -q scikit-learn==1.4.2
!pip install -q sktime==0.30.0

Installing dependencies...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m117.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
jax 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, 

In [None]:
# RealWorld-HAR (RealWorld2016, University of Mannheim)
!mkdir -p /content/data/rwhar
%cd /content/data/rwhar

# Attempt HTTPS first (disabling certificate verification due to an SNI mismatch on the host); on failure, fall back to HTTP
!wget -c --no-check-certificate "https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip" -O realworld2016_dataset.zip || wget -c "http://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip" -O realworld2016_dataset.zip

# Decompress and perform a brief inspection
!unzip -q -o realworld2016_dataset.zip
!echo "=== top-level ==="
!ls -lah
!echo "=== dirs (depth<=2) ==="
!find . -maxdepth 2 -type d | sort | head -n 20

/content/data/rwhar
--2025-11-11 20:53:28--  https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip
Resolving wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)... 134.155.98.56
Connecting to wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)|134.155.98.56|:443... connected.
	requested host name ‘wifo5-14.informatik.uni-mannheim.de’.
HTTP request sent, awaiting response... 403 Forbidden
2025-11-11 20:53:29 ERROR 403: Forbidden.

--2025-11-11 20:53:29--  http://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip
Resolving wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)... 134.155.98.56
Connecting to wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)|134.155.98.56|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3721016476 (3.5G) [application/zip]
Saving to: ‘realworld2016_dataset.zip’


2

In [None]:
# ================ Step 0: Project Initialization ================
import os
from datetime import datetime

# Create directory structure
dirs = ['data/raw', 'interim', 'proc', 'features', 'models', 'logs', 'figures', 'configs']
for d in dirs:
    os.makedirs(f'/content/{d}', exist_ok=True)
print("✓ Directory structure created")

# Git Initialization
%cd /content
!git init
!git config user.name "HAR-Project"
!git config user.email "har@project.local"
print("✓ Git repository initialized")

# Persist environment information
!pip freeze > logs/env.txt
print("✓ Environment dependencies saved to logs/env.txt")

# Persist random seed list and hardware information
import json
import subprocess

meta = {
    "timestamp": datetime.now().isoformat(),
    "random_seeds": [42, 123, 456, 789, 2024],  # predefined seeds
    "hardware": {
        "gpu": subprocess.getoutput("nvidia-smi --query-gpu=name --format=csv,noheader"),
        "cpu": subprocess.getoutput("cat /proc/cpuinfo | grep 'model name' | head -1").split(':')[1].strip(),
    }
}

with open('logs/init_meta.json', 'w') as f:
    json.dump(meta, f, indent=2)
print("✓ Metadata saved to logs/init_meta.json")

# Initial commit
!git add .
!git commit -m "init: project structure and environment"
git_hash = subprocess.getoutput("git rev-parse HEAD")
print(f"✓ Git commit hash: {git_hash[:8]}")


# ================ Step 1: Data Acquisition (Compliance) ================
# Move raw data to data/raw/ and retain structure
!mv /content/data/rwhar/* /content/data/raw/ 2>/dev/null || true
!rm -rf /content/data/rwhar
print("✓ Raw data moved to data/raw/")

# Compute checksums
import hashlib

def calc_checksum(filepath):
    h = hashlib.sha256()
    with open(filepath, 'rb') as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()

checksums = {}
for root, _, files in os.walk('/content/data/raw'):
    for f in files:
        path = os.path.join(root, f)
        rel_path = os.path.relpath(path, '/content/data/raw')
        checksums[rel_path] = calc_checksum(path)

with open('/content/logs/checksums.txt', 'w') as f:
    f.write(f"# RealWorld2016 dataset checksums (SHA256)\n")
    f.write(f"# Generated at: {datetime.now().isoformat()}\n\n")
    for path, sha in sorted(checksums.items()):
        f.write(f"{sha}  {path}\n")

print(f"✓ Computed checksums for {len(checksums)} files → logs/checksums.txt")

# Record data source
with open('/content/logs/data_source.txt', 'w') as f:
    f.write("RealWorld2016 Human Activity Recognition Dataset\n")
    f.write("=" * 50 + "\n")
    f.write("Source: University of Mannheim\n")
    f.write("URL: https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/\n")
    f.write("Citation: Sztyler, T., & Stuckenschmidt, H. (2016). On-body localization of wearable devices.\n")
    f.write(f"Downloaded: {datetime.now().isoformat()}\n")

print("✓ Data source recorded to logs/data_source.txt")

# Commit data acquisition records
!git add logs/
!git commit -m "data: add RealWorld2016 checksums and source"
print(f"\n{'='*60}\nProject initialization and data acquisition completed\n{'='*60}")

✓ Directory structure created
/content
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/
✓ Git repository initialized
✓ Environment dependencies saved to logs/env.txt
✓ Metadata saved to logs/init_meta.json
[master (root-commit) e133110] init: project structure and environment
 1837 files changed, 51723 insertions(+)
 create mode 100644 .config/.last_opt_in_prompt.yaml
 create mode 100644 .config/.last_survey_prompt.yaml
 create mode 100644 .config/.last_update_check.json
 create mode 100644 .config/active_co

In [None]:
# ================ Step 2: Sensor/Location Selection (Revised) ================
import pandas as pd
from pathlib import Path
import json
import zipfile

print("Step 2: Sensor/Location Selection")
print("=" * 60)

raw_dir = Path('/content/data/raw')

# Decompress all zip files first
print("Extracting sensor data...")
zip_files = list(raw_dir.rglob('*.zip'))
print(f"Found {len(zip_files)} zip files")

for zip_path in zip_files:
    if 'csv.zip' in zip_path.name:
        extract_dir = zip_path.parent / zip_path.stem
        if not extract_dir.exists():
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)

print("✓ Extraction complete")

# Search for CSV files under acc and gyr directories
print("\nSearching for sensor directories...")
acc_dirs = list(raw_dir.rglob('acc_*_csv'))
gyr_dirs = list(raw_dir.rglob('gyr_*_csv'))

print(f"✓ Found {len(acc_dirs)} ACC directories")
print(f"✓ Found {len(gyr_dirs)} GYR directories")

if acc_dirs:
    print(f"\nExample ACC directory: {acc_dirs[0].relative_to(raw_dir)}")
    sample_files = list(acc_dirs[0].glob('*.csv'))
    print(f"Number of files under {acc_dirs[0].name}: {len(sample_files)}")
    if sample_files:
        print(f"Example file: {sample_files[0].name}")

# Find all files containing "waist"
waist_files = {'acc': [], 'gyr': []}

for acc_dir in acc_dirs:
    for f in acc_dir.glob('*waist*.csv'):
        waist_files['acc'].append(f)

for gyr_dir in gyr_dirs:
    for f in gyr_dir.glob('*waist*.csv'):
        waist_files['gyr'].append(f)

print(f"\n✓ Found Waist-ACC files: {len(waist_files['acc'])}")
print(f"✓ Found Waist-GYR files: {len(waist_files['gyr'])}")

# Display example files
if waist_files['acc']:
    print(f"\nExample ACC file: {waist_files['acc'][0].relative_to(raw_dir)}")
    sample_acc = pd.read_csv(waist_files['acc'][0])
    print(f"Columns: {list(sample_acc.columns)}")
    print(f"Shape: {sample_acc.shape}")
    print(sample_acc.head(3))

if waist_files['gyr']:
    print(f"\nExample GYR file: {waist_files['gyr'][0].relative_to(raw_dir)}")
    sample_gyr = pd.read_csv(waist_files['gyr'][0])
    print(f"Columns: {list(sample_gyr.columns)}")
    print(f"Shape: {sample_gyr.shape}")
    print(sample_gyr.head(3))

# Collect metadata
waist_metadata = []
for sensor_type in ['acc', 'gyr']:
    for filepath in waist_files[sensor_type]:
        parts = filepath.parts
        subject = [p for p in parts if p.startswith('proband')][0]
        activity = filepath.parent.name.split('_')[1]

        df = pd.read_csv(filepath)
        waist_metadata.append({
            'subject': subject,
            'activity': activity,
            'sensor': sensor_type,
            'original_path': str(filepath.relative_to(raw_dir)),
            'shape': list(df.shape),
            'columns': list(df.columns)
        })

# Persist selection report
with open('/content/logs/sensor_selection.json', 'w') as f:
    json.dump({
        'selection': {
            'position': 'waist',
            'sensors': ['acc', 'gyr'],
            'channels': 6,
            'rationale': 'Single position to avoid domain shift; ACC+GYRO is the standard configuration for HAR'
        },
        'files_found': {
            'acc': len(waist_files['acc']),
            'gyr': len(waist_files['gyr'])
        },
        'metadata': waist_metadata[:10]
    }, f, indent=2)

print(f"\n✓ Selection report saved: logs/sensor_selection.json")

!git add logs/sensor_selection.json
!git commit -m "data: select waist position with acc+gyr sensors"


# ================ Step 3: Column Alignment and Naming ================
print("\n\nStep 3: Column Alignment and Naming")
print("=" * 60)

# Analyze column names
acc_cols = set()
gyr_cols = set()

for filepath in waist_files['acc'][:3]:
    df = pd.read_csv(filepath)
    acc_cols.update(df.columns)

for filepath in waist_files['gyr'][:3]:
    df = pd.read_csv(filepath)
    gyr_cols.update(df.columns)

print(f"ACC column names: {sorted(acc_cols)}")
print(f"GYR column names: {sorted(gyr_cols)}")

# Define standard mapping
standard_mapping = {
    'acc': {
        'attr_x': 'acc_x',
        'attr_y': 'acc_y',
        'attr_z': 'acc_z',
        'attr_time': 'timestamp'
    },
    'gyr': {
        'attr_x': 'gyro_x',
        'attr_y': 'gyro_y',
        'attr_z': 'gyro_z',
        'attr_time': 'timestamp'
    }
}

cols_config = {
    'standard_columns': ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'],
    'units': {
        'acc_x': 'm/s²', 'acc_y': 'm/s²', 'acc_z': 'm/s²',
        'gyro_x': 'rad/s', 'gyro_y': 'rad/s', 'gyro_z': 'rad/s'
    },
    'mapping': standard_mapping,
    'timestamp_col': 'timestamp'
}

with open('/content/configs/cols.json', 'w') as f:
    json.dump(cols_config, f, indent=2)

print("\n✓ Column mapping configuration saved: configs/cols.json")

# Generate schema report
report = [
    "# RealWorld2016 Data Schema Report\n\n",
    f"Generated at: {datetime.now().isoformat()}\n\n",
    "## Standard column definitions\n\n",
    "| Column | Unit | Description |\n|------|------|------|\n"
]

for col in cols_config['standard_columns']:
    unit = cols_config['units'][col]
    sensor = 'Accelerometer' if 'acc' in col else 'Gyroscope'
    axis = col.split('_')[1].upper()
    report.append(f"| {col} | {unit} | {sensor} {axis}-axis |\n")

report.append("\n## Original column mapping\n\n### Accelerometer\n")
for orig, std in standard_mapping['acc'].items():
    report.append(f"- `{orig}` → `{std}`\n")

report.append("\n### Gyroscope\n")
for orig, std in standard_mapping['gyr'].items():
    report.append(f"- `{orig}` → `{std}`\n")

# Missing-value statistics
report.append("\n## Data quality checks\n\n")
for sensor in ['acc', 'gyr']:
    report.append(f"### {sensor.upper()} Missing values (sample of 5 files)\n\n")
    has_missing = False
    for fp in waist_files[sensor][:5]:
        df = pd.read_csv(fp)
        missing = df.isnull().sum()
        if missing.sum() > 0:
            report.append(f"- {fp.name}: {missing[missing > 0].to_dict()}\n")
            has_missing = True
    if not has_missing:
        report.append("- No missing values ✓\n")
    report.append("\n")

with open('/content/logs/schema_report.md', 'w') as f:
    f.writelines(report)

print("✓ Schema report saved: logs/schema_report.md")
print("\n" + "".join(report))

!git add configs/cols.json logs/schema_report.md
!git commit -m "data: standardize column names and units"

print(f"\n{'='*60}")
print("Steps 2–3 completed")
print(f"{'='*60}")

Step 2: Sensor/Location Selection
Extracting sensor data...
Found 1441 zip files
✓ Extraction complete

Searching for sensor directories...
✓ Found 120 ACC directories
✓ Found 120 GYR directories

Example ACC directory: proband4/data/acc_climbingdown_csv
Number of files under acc_climbingdown_csv: 0

✓ Found Waist-ACC files: 114
✓ Found Waist-GYR files: 114

Example ACC file: proband4/data/acc_walking_csv/acc_walking_2_waist.csv
Columns: ['id', 'attr_time', 'attr_x', 'attr_y', 'attr_z']
Shape: (31031, 5)
   id      attr_time    attr_x    attr_y    attr_z
0   1  1436291534801  9.831789 -0.413000  1.091157
1   2  1436291534820  9.919178 -0.492607  1.191714
2   3  1436291534839  9.939528 -0.612317  1.142034

Example GYR file: proband4/data/gyr_jumping_csv/Gyroscope_jumping_waist.csv
Columns: ['id', 'attr_time', 'attr_x', 'attr_y', 'attr_z']
Shape: (4292, 5)
   id      attr_time    attr_x    attr_y    attr_z
0   1  1436295094022 -0.002988 -0.022363 -0.004502
1   2  1436295094041  0.003121 

In [None]:
# ================ Step 4: Timeline Normalization (Final) ================
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
from pathlib import Path
import json
import zipfile

print("\n\nStep 4: Timeline Normalization")
print("=" * 60)

raw_dir = Path('/content/data/raw')

# Decompression
print("Extracting waist data...")
for proband_dir in raw_dir.glob('proband*'):
    data_dir = proband_dir / 'data'
    if data_dir.exists():
        for zip_file in data_dir.glob('*_csv.zip'):
            if zip_file.stem.startswith(('acc_', 'gyr_')):
                extract_dir = zip_file.parent / zip_file.stem
                if not extract_dir.exists():
                    with zipfile.ZipFile(zip_file, 'r') as zf:
                        if any('waist' in f.lower() for f in zf.namelist()):
                            zf.extractall(extract_dir)

# Scan
waist_files = {'acc': [], 'gyr': []}
for csv_file in raw_dir.rglob('*.csv'):
    if 'waist' in csv_file.name.lower():
        if csv_file.parent.name.startswith('acc_'):
            waist_files['acc'].append(csv_file)
        elif csv_file.parent.name.startswith('gyr_'):
            waist_files['gyr'].append(csv_file)

print(f"✓ ACC: {len(waist_files['acc'])}, GYR: {len(waist_files['gyr'])}")

# Improved pairing: directory mapping + same-name preference
def find_gyr_for_acc(acc_path):
    gyr_dir = acc_path.parent.parent / acc_path.parent.name.replace('acc_', 'gyr_')
    if not gyr_dir.exists():
        return None
    cand = gyr_dir / acc_path.name.replace('acc_', 'gyr_')
    if cand.exists():
        return cand
    cands = sorted(gyr_dir.glob('*waist*.csv'))
    return cands[0] if cands else None

file_pairs = []
for acc_path in waist_files['acc']:
    gyr_path = find_gyr_for_acc(acc_path)
    if not gyr_path:
        continue
    proband = next(p for p in acc_path.parts if p.startswith('proband'))
    activity = acc_path.parent.name.split('_')[1]
    file_pairs.append((acc_path, gyr_path, proband, activity))

print(f"✓ File pairs: {len(file_pairs)}")

with open('/content/configs/cols.json', 'r') as f:
    cols_config = json.load(f)

TARGET_FS = 50
MAX_GAP_MS = 200
MIN_DURATION_S = 1.0
interim_dir = Path('/content/interim')
interim_dir.mkdir(exist_ok=True)

def detect_time_unit(df, col='timestamp'):
    ts = df[col].sort_values().iloc[:200].values
    diffs = np.diff(ts)
    diffs = diffs[diffs > 0]
    if len(diffs) == 0:
        return None, None
    dt = np.median(diffs)

    if 0.01 < dt < 5:
        return df[col] * 1e9, 's'
    elif 10 < dt < 100:
        return df[col] * 1e6, 'ms'
    elif 10000 < dt < 100000:
        return df[col] * 1e3, 'us'
    elif 1e7 < dt < 1e8:
        return df[col], 'ns'
    else:
        return None, None

all_stats = []
skipped = []

for idx, (acc_path, gyr_path, proband, activity) in enumerate(file_pairs):
    print(f"\n[{idx+1}/{len(file_pairs)}] {proband}/{activity}")

    acc_df = pd.read_csv(acc_path).rename(columns=cols_config['mapping']['acc'])
    gyr_df = pd.read_csv(gyr_path).rename(columns=cols_config['mapping']['gyr'])

    acc_ts_ns, acc_unit = detect_time_unit(acc_df)
    gyr_ts_ns, gyr_unit = detect_time_unit(gyr_df)

    if acc_ts_ns is None or gyr_ts_ns is None:
        print(f"  ⚠️ Skipped: unable to determine timestamp unit")
        skipped.append(f"{proband}_{activity}")
        continue

    acc_df['timestamp_ns'] = acc_ts_ns
    gyr_df['timestamp_ns'] = gyr_ts_ns
    acc_df = acc_df[['timestamp_ns', 'acc_x', 'acc_y', 'acc_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')
    gyr_df = gyr_df[['timestamp_ns', 'gyro_x', 'gyro_y', 'gyro_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')

    df = None
    merge_mode = 'absolute'
    merge_tol = None
    offset_ns = 0

    # Adaptive tolerance
    for tol_ms in [10, 30, 50, 100]:
        tol_ns = int(tol_ms * 1e6)
        df_try = pd.merge_asof(acc_df, gyr_df, on='timestamp_ns', direction='nearest', tolerance=tol_ns).dropna()
        if len(df_try) >= TARGET_FS:
            df = df_try
            merge_tol = tol_ms
            break

    # Fallback 1: relative time (relaxed thresholds)
    if df is None:
        for tol_ms in [10, 30, 50]:
            acc_tmp = acc_df.copy()
            gyr_tmp = gyr_df.copy()
            acc_tmp['t_rel'] = acc_tmp['timestamp_ns'] - acc_tmp['timestamp_ns'].iloc[0]
            gyr_tmp['t_rel'] = gyr_tmp['timestamp_ns'] - gyr_tmp['timestamp_ns'].iloc[0]

            df_try = pd.merge_asof(acc_tmp.sort_values('t_rel'), gyr_tmp.sort_values('t_rel'),
                                   on='t_rel', direction='nearest', tolerance=int(tol_ms*1e6)).dropna()

            if len(df_try) > 1:
                p99 = (df_try['t_rel'].diff() / 1e6).quantile(0.99)
                match_rate = len(df_try) / max(1, min(len(acc_df), len(gyr_df)))

                if len(df_try) >= TARGET_FS and p99 <= 40 and match_rate >= 0.5:
                    df = df_try.rename(columns={'t_rel': 'timestamp_ns'})
                    merge_mode = 'relative'
                    merge_tol = tol_ms
                    break

    # Fallback 2: offset search (broaden range and thresholds)
    if df is None:
        best_df, best_matches, best_offset = None, -1, 0
        for offset_ms in range(-3000, 3001, 50):
            gyr_shift = gyr_df.copy()
            gyr_shift['timestamp_ns'] = gyr_shift['timestamp_ns'] + int(offset_ms * 1e6)
            df_try = pd.merge_asof(acc_df, gyr_shift, on='timestamp_ns',
                                   direction='nearest', tolerance=int(30*1e6)).dropna()
            if len(df_try) > best_matches:
                best_df, best_matches, best_offset = df_try, len(df_try), offset_ms

        if best_matches >= TARGET_FS and best_df is not None and len(best_df) > 1:
            p99 = (best_df['timestamp_ns'].diff() / 1e6).quantile(0.99)
            match_rate = best_matches / max(1, min(len(acc_df), len(gyr_df)))

            if p99 <= 40 and match_rate >= 0.5:
                df = best_df
                merge_mode = 'offset_search'
                merge_tol = 30
                offset_ns = int(best_offset * 1e6)

    # Fallback 3: intersection window resampling
    if df is None:
        t0 = max(acc_df['timestamp_ns'].iloc[0], gyr_df['timestamp_ns'].iloc[0])
        t1 = min(acc_df['timestamp_ns'].iloc[-1], gyr_df['timestamp_ns'].iloc[-1])

        if t1 - t0 >= 1e9:
            STEP_NS = int(1e9 / TARGET_FS)
            t_grid = np.arange(t0, t1, STEP_NS, dtype=np.int64)

            acc_interp = interpolate.interp1d(acc_df['timestamp_ns'].values,
                                              acc_df[['acc_x', 'acc_y', 'acc_z']].values,
                                              axis=0, kind='linear', bounds_error=True)
            gyr_interp = interpolate.interp1d(gyr_df['timestamp_ns'].values,
                                              gyr_df[['gyro_x', 'gyro_y', 'gyro_z']].values,
                                              axis=0, kind='linear', bounds_error=True)

            acc_vals = acc_interp(t_grid)
            gyr_vals = gyr_interp(t_grid)

            df = pd.DataFrame({
                'timestamp': t_grid,
                'segment_id': 0,
                'proband': proband,
                'activity': activity,
                'acc_x': acc_vals[:, 0], 'acc_y': acc_vals[:, 1], 'acc_z': acc_vals[:, 2],
                'gyro_x': gyr_vals[:, 0], 'gyro_y': gyr_vals[:, 1], 'gyro_z': gyr_vals[:, 2]
            })

            out_name = f"{proband}_{activity}_waist.csv"
            df.to_csv(interim_dir / out_name, index=False)

            all_stats.append({
                'file': out_name,
                'proband': proband,
                'activity': activity,
                'acc_unit': acc_unit,
                'gyr_unit': gyr_unit,
                'merge_mode': 'intersection',
                'segments': 1,
                'samples': len(df)
            })

            print(f"  {acc_unit}/{gyr_unit}, intersection, 1 segment, {len(df)} samples")
            continue

    if df is None or len(df) < TARGET_FS:
        print(f"  ⚠️ Skipped: merge failed")
        skipped.append(f"{proband}_{activity}")
        continue

    df = df.reset_index(drop=True)
    df['dt_ms'] = df['timestamp_ns'].diff() / 1e6

    # Segmentation
    gaps = df['dt_ms'].values
    large_gap_idx = np.where(gaps > MAX_GAP_MS)[0]
    split_points = [0] + large_gap_idx.tolist() + [len(df)]

    segments = []
    for i in range(len(split_points) - 1):
        seg = df.iloc[split_points[i]:split_points[i + 1]].copy()
        if len(seg) > 1:
            duration_s = (seg['timestamp_ns'].iloc[-1] - seg['timestamp_ns'].iloc[0]) / 1e9
            if duration_s >= MIN_DURATION_S:
                segments.append(seg)

    if len(segments) == 0:
        print(f"  ⚠️ Skipped: no valid segments")
        skipped.append(f"{proband}_{activity}")
        continue

    # Resampling
    STEP_NS = int(1e9 / TARGET_FS)
    all_resampled = []
    for seg_id, seg in enumerate(segments):
        t_start = seg['timestamp_ns'].iloc[0]
        t_end = seg['timestamp_ns'].iloc[-1]
        t_grid = np.arange(t_start, t_end + 1, STEP_NS, dtype=np.int64)

        df_seg = pd.DataFrame({
            'timestamp': t_grid,
            'segment_id': seg_id,
            'proband': proband,
            'activity': activity
        })
        for col in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']:
            f = interpolate.interp1d(seg['timestamp_ns'], seg[col], kind='linear', bounds_error=True)
            df_seg[col] = f(t_grid)

        all_resampled.append(df_seg)

    df_final = pd.concat(all_resampled, ignore_index=True)

    out_name = f"{proband}_{activity}_waist.csv"
    df_final.to_csv(interim_dir / out_name, index=False)

    stat = {
        'file': out_name,
        'proband': proband,
        'activity': activity,
        'acc_unit': acc_unit,
        'gyr_unit': gyr_unit,
        'merge_mode': merge_mode,
        'merge_tolerance_ms': merge_tol,
        'segments': len(segments),
        'samples': len(df_final)
    }
    if merge_mode == 'offset_search':
        stat['offset_ns'] = offset_ns

    all_stats.append(stat)

    mode_str = f"{merge_mode}" + (f"(Δ={offset_ns/1e6:.0f}ms)" if merge_mode=='offset_search' else '')
    print(f"  {acc_unit}/{gyr_unit}, {mode_str}, {len(segments)} segments, {len(df_final)} samples")

print(f"\n✓ Completed {len(all_stats)} files")
if skipped:
    print(f"⚠️ Skipped {len(skipped)}: {skipped}")

# Plotting
if all_stats:
    first_file = all_stats[0]
    first_pair = [(p[0], p[1], p[2], p[3]) for p in file_pairs if p[2] == first_file['proband'] and p[3] == first_file['activity']][0]

    acc_df = pd.read_csv(first_pair[0]).rename(columns=cols_config['mapping']['acc'])
    gyr_df = pd.read_csv(first_pair[1]).rename(columns=cols_config['mapping']['gyr'])
    acc_ts_ns, _ = detect_time_unit(acc_df)
    gyr_ts_ns, _ = detect_time_unit(gyr_df)
    acc_df['timestamp_ns'] = acc_ts_ns
    gyr_df['timestamp_ns'] = gyr_ts_ns
    acc_df = acc_df[['timestamp_ns', 'acc_x', 'acc_y', 'acc_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')
    gyr_df = gyr_df[['timestamp_ns', 'gyro_x', 'gyro_y', 'gyro_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')

    df = pd.merge_asof(acc_df, gyr_df, on='timestamp_ns', direction='nearest', tolerance=int(100*1e6)).dropna()
    intervals = df['timestamp_ns'].diff() / 1e6

    fig, ax = plt.subplots(figsize=(10, 4))
    ax.hist(intervals[intervals < 100], bins=100, edgecolor='black', linewidth=0.5)
    ax.axvline(20, color='red', linestyle='--', label='Ideal (50Hz=20ms)')
    ax.axvline(MAX_GAP_MS, color='orange', linestyle='--', label=f'Threshold ({MAX_GAP_MS}ms)')
    ax.set_xlabel('Sampling Interval (ms)')
    ax.set_ylabel('Count')
    ax.set_title(f'Sampling Interval Distribution - {first_pair[2]}/{first_pair[3]}')
    ax.legend()
    ax.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig('/content/figures/step4_interval_hist.png', dpi=150)
    plt.close()

with open('/content/logs/step4_summary.json', 'w') as f:
    json.dump({'files': all_stats, 'skipped': skipped}, f, indent=2)

!git add figures/ logs/step4_*.json interim/
!git commit -m "preproc: final time normalization with all fallbacks"

print(f"\n{'='*60}\nStep 4 completed\n{'='*60}")



Step 4: Timeline Normalization
Extracting waist data...
✓ ACC: 114, GYR: 114
✓ File pairs: 114

[1/114] proband4/walking
  ms/ms, absolute, 13 segments, 30482 samples

[2/114] proband4/standing
  ms/ms, absolute, 12 segments, 29741 samples

[3/114] proband4/jumping
  ms/ms, absolute, 3 segments, 4148 samples

[4/114] proband4/lying
  ms/ms, absolute, 16 segments, 33106 samples

[5/114] proband4/running
  ms/ms, absolute, 40 segments, 50541 samples

[6/114] proband4/sitting
  ms/ms, absolute, 14 segments, 31248 samples

[7/114] proband14/walking
  ms/ms, absolute, 50 segments, 32007 samples

[8/114] proband14/standing
  ms/ms, absolute, 34 segments, 30114 samples

[9/114] proband14/jumping
  ms/ms, absolute, 5 segments, 4642 samples

[10/114] proband14/lying
  ms/ms, absolute, 27 segments, 30563 samples

[11/114] proband14/running
  ms/ms, absolute, 33 segments, 29786 samples

[12/114] proband14/sitting
  ms/ms, absolute, 31 segments, 30562 samples

[13/114] proband7/walking
  ms/ms, 

In [None]:
# ================ Step 5: Gravity Removal / Detrending (Batch Processing) ================
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
from pathlib import Path
import json

print("\n\nStep 5: Gravity Removal / Detrending")
print("=" * 60)

interim_dir = Path('/content/interim')
proc_dir = Path('/content/proc')
proc_dir.mkdir(exist_ok=True)

TARGET_FS = 50
CUTOFF_HZ = 0.3

def highpass_filter(data, cutoff, fs, order=3):
    """Third-order Butterworth high-pass filter"""
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return filtfilt(b, a, data)

# Process all files
interim_files = sorted(interim_dir.glob('*.csv'))
print(f"Found {len(interim_files)} files")

all_static_means = []

for idx, filepath in enumerate(interim_files):
    print(f"\n[{idx+1}/{len(interim_files)}] {filepath.name}")

    df = pd.read_csv(filepath)
    print(f"  Original: {df.shape}, {df['segment_id'].nunique()} segments")

    processed_segments = []

    # Filter per segment
    for seg_id, seg_df in df.groupby('segment_id'):
        seg_df = seg_df.copy()

        # Accelerometer high-pass filtering
        for axis in ['x', 'y', 'z']:
            col = f'acc_{axis}'
            seg_df[col] = highpass_filter(seg_df[col].values, CUTOFF_HZ, TARGET_FS, order=3)

        # Gyroscope mean removal
        for axis in ['x', 'y', 'z']:
            col = f'gyro_{axis}'
            seg_df[col] = seg_df[col] - seg_df[col].mean()

        processed_segments.append(seg_df)

    df_filtered = pd.concat(processed_segments, ignore_index=True)

    # Validate static segment (from the longest segment)
    longest_seg = df_filtered.groupby('segment_id').size().idxmax()
    seg_for_verify = df_filtered[df_filtered['segment_id'] == longest_seg].reset_index(drop=True)

    window_size = TARGET_FS * 2
    acc_mag = np.sqrt(seg_for_verify['acc_x']**2 + seg_for_verify['acc_y']**2 + seg_for_verify['acc_z']**2)
    static_idx = acc_mag.rolling(window_size).std().idxmin()
    static_seg = seg_for_verify.iloc[static_idx:static_idx+window_size]

    static_means = {f'acc_{ax}': static_seg[f'acc_{ax}'].mean() for ax in ['x', 'y', 'z']}
    all_static_means.append({'file': filepath.name, **static_means})

    # Save
    df_filtered.to_csv(proc_dir / filepath.name, index=False)
    print(f"  ✓ {len(df_filtered)} samples → proc/{filepath.name}")

print(f"\n✓ Completed {len(interim_files)} files")

# Plot verification figure for the first file
if interim_files:
    first_file = interim_files[0]
    df = pd.read_csv(proc_dir / first_file.name)
    longest_seg = df.groupby('segment_id').size().idxmax()
    seg = df[df['segment_id'] == longest_seg].reset_index(drop=True)

    window_size = TARGET_FS * 2
    acc_mag = np.sqrt(seg['acc_x']**2 + seg['acc_y']**2 + seg['acc_z']**2)
    static_idx = acc_mag.rolling(window_size).std().idxmin()
    static_seg = seg.iloc[static_idx:static_idx+window_size]

    fig, axes = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
    time_sec = np.arange(len(seg)) / TARGET_FS

    for i, axis in enumerate(['x', 'y', 'z']):
        ax = axes[i]
        col = f'acc_{axis}'
        ax.plot(time_sec, seg[col], linewidth=0.5, alpha=0.7)
        ax.axhline(0, color='red', linestyle='--', linewidth=1, alpha=0.5)

        static_t = static_idx / TARGET_FS
        static_mean = static_seg[col].mean()
        ax.axvspan(static_t, static_t + 2, color='green', alpha=0.2,
                   label=f'Static (mean={static_mean:.4f})')

        ax.set_ylabel(f'ACC {axis.upper()} (m/s²)')
        ax.grid(alpha=0.3)
        ax.legend(loc='upper right')

    axes[-1].set_xlabel('Time (s)')
    axes[0].set_title(f'Detrended Signal - {first_file.name} (segment {longest_seg})')
    plt.tight_layout()
    plt.savefig('/content/figures/step5_detrend_verify.png', dpi=150)
    plt.close()
    print(f"\n✓ Verification figure: figures/step5_detrend_verify.png")

# Save parameters
filter_params = {
    'acc_highpass': {'cutoff_hz': CUTOFF_HZ, 'order': 3, 'filter_type': 'Butterworth'},
    'gyro_detrend': 'mean_removal',
    'sampling_rate': TARGET_FS,
    'filtering_method': 'per_segment',
    'files_processed': len(interim_files),
    'static_means_samples': all_static_means[:5]
}

with open('/content/logs/step5_filter_params.json', 'w') as f:
    json.dump(filter_params, f, indent=2)

get_ipython().system('git add figures/step5_detrend_verify.png logs/step5_filter_params.json proc/')
get_ipython().system('git commit -m "preproc: batch filtering for all files"')

print(f"\n{'='*60}\nStep 5 completed\n{'='*60}")



Step 5: Gravity Removal / Detrending
Found 112 files

[1/112] proband10_climbingdown_waist.csv
  Original: (21216, 10), 20 segments
  ✓ 21216 samples → proc/proband10_climbingdown_waist.csv

[2/112] proband10_climbingup_waist.csv
  Original: (22201, 10), 21 segments
  ✓ 22201 samples → proc/proband10_climbingup_waist.csv

[3/112] proband10_jumping_waist.csv
  Original: (5193, 10), 1 segments
  ✓ 5193 samples → proc/proband10_jumping_waist.csv

[4/112] proband10_lying_waist.csv
  Original: (31164, 10), 22 segments
  ✓ 31164 samples → proc/proband10_lying_waist.csv

[5/112] proband10_running_waist.csv
  Original: (31071, 10), 31 segments
  ✓ 31071 samples → proc/proband10_running_waist.csv

[6/112] proband10_sitting_waist.csv
  Original: (30836, 10), 32 segments
  ✓ 30836 samples → proc/proband10_sitting_waist.csv

[7/112] proband10_standing_waist.csv
  Original: (31946, 10), 27 segments
  ✓ 31946 samples → proc/proband10_standing_waist.csv

[8/112] proband10_walking_waist.csv
  Origin

In [None]:
# ================ Step 6: Class Mapping ================
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 6: Class Mapping")
print("=" * 60)

proc_dir = Path('/content/proc')
TARGET_FS = 50

# Fixed order of 8 standard classes (consistent across folds)
STANDARD_CLASSES = ['walking', 'running', 'sitting', 'standing',
                    'lying', 'stairs_up', 'stairs_down', 'jumping']

# Mapping from original activity names
activity_mapping = {
    'climbingdown': 'stairs_down',
    'climbingup': 'stairs_up',
    'jumping': 'jumping',
    'lying': 'lying',
    'running': 'running',
    'sitting': 'sitting',
    'standing': 'standing',
    'walking': 'walking'
}

# Sliding-window parameters (aligned with subsequent feature extraction)
WINDOW_SEC = 3
OVERLAP = 0.5
WINDOW_SAMPLES = int(TARGET_FS * WINDOW_SEC)
STRIDE_SAMPLES = int(WINDOW_SAMPLES * (1 - OVERLAP))
MIN_WINDOWS_THRESHOLD = 50

print(f"Sliding window: {WINDOW_SEC}s ({WINDOW_SAMPLES} samples), overlap {OVERLAP*100:.0f}%, stride {STRIDE_SAMPLES}")

# Scan files and count windows per segment
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"\nFound {len(proc_files)} files")

activity_stats = {}
proband_class_matrix = {}

for filepath in proc_files:
    df = pd.read_csv(filepath)

    # Prefer reading from columns
    activity = df['activity'].iloc[0] if 'activity' in df.columns else filepath.stem.split('_')[1]
    proband = df['proband'].iloc[0] if 'proband' in df.columns else filepath.stem.split('_')[0]

    # Count windows per segment (without crossing segments)
    n_windows = 0
    for _, seg in df.groupby('segment_id'):
        seg_len = len(seg)
        if seg_len >= WINDOW_SAMPLES:
            n_windows += 1 + (seg_len - WINDOW_SAMPLES) // STRIDE_SAMPLES

    # Accumulate statistics for original activities
    if activity not in activity_stats:
        activity_stats[activity] = {'samples': 0, 'windows': 0, 'files': 0}
    activity_stats[activity]['samples'] += len(df)
    activity_stats[activity]['windows'] += n_windows
    activity_stats[activity]['files'] += 1

    # Build proband × class matrix
    if activity in activity_mapping:
        std_act = activity_mapping[activity]
        if proband not in proband_class_matrix:
            proband_class_matrix[proband] = {c: 0 for c in STANDARD_CLASSES}
        proband_class_matrix[proband][std_act] += n_windows

print("\nOriginal activity statistics:")
for act in sorted(activity_stats.keys()):
    stats = activity_stats[act]
    print(f"  {act:15s}: {stats['files']:2d} files, {stats['samples']:6d} samples, {stats['windows']:4d} windows")

# Map to the 8 standard classes
mapped_stats = {c: {'windows': 0, 'samples': 0, 'files': 0, 'original_names': []}
                for c in STANDARD_CLASSES}
tail_classes_original = []

for orig_act, stats in activity_stats.items():
    if orig_act in activity_mapping:
        std_act = activity_mapping[orig_act]
        mapped_stats[std_act]['windows'] += stats['windows']
        mapped_stats[std_act]['samples'] += stats['samples']
        mapped_stats[std_act]['files'] += stats['files']
        if orig_act not in mapped_stats[std_act]['original_names']:
            mapped_stats[std_act]['original_names'].append(orig_act)

        if stats['windows'] < MIN_WINDOWS_THRESHOLD:
            tail_classes_original.append({'original': orig_act, 'mapped': std_act, 'windows': stats['windows']})

# Tail-class determination at the standard-class level
tail_standard_classes = [c for c in STANDARD_CLASSES if mapped_stats[c]['windows'] < MIN_WINDOWS_THRESHOLD]
included_flags = {c: (mapped_stats[c]['windows'] >= MIN_WINDOWS_THRESHOLD) for c in STANDARD_CLASSES}

print("\nStatistics for the 8 standard classes:")
for std_act in STANDARD_CLASSES:
    stats = mapped_stats[std_act]
    status = " [TAIL]" if std_act in tail_standard_classes else ""
    status = " [MISSING]" if stats['windows'] == 0 else status
    print(f"  {std_act:15s}: {stats['files']:2d} files, {stats['samples']:6d} samples, {stats['windows']:4d} windows{status}")

# Fixed encoding
label_to_id = {c: i for i, c in enumerate(STANDARD_CLASSES)}
id_to_label = {i: c for c, i in label_to_id.items()}

print("\nLabel encoding:")
for i, c in id_to_label.items():
    print(f"  {i}: {c}")

# Proband coverage matrix
print("\nProband × Class coverage (number of windows):")
print(f"{'Proband':<12}", end='')
for c in STANDARD_CLASSES:
    print(f"{c[:4]:>6}", end='')
print()
for p in sorted(proband_class_matrix.keys()):
    print(f"{p:<12}", end='')
    for c in STANDARD_CLASSES:
        cnt = proband_class_matrix[p][c]
        print(f"{cnt:>6}", end='')
    print()

# Save configuration
classes_config = {
    'standard_classes': STANDARD_CLASSES,
    'num_classes': len(STANDARD_CLASSES),
    'label_to_id': label_to_id,
    'id_to_label': id_to_label,
    'activity_mapping': activity_mapping,
    'window_config': {
        'window_size_sec': WINDOW_SEC,
        'window_samples': WINDOW_SAMPLES,
        'overlap': OVERLAP,
        'stride_samples': STRIDE_SAMPLES,
        'sampling_rate_hz': TARGET_FS
    },
    'statistics': {
        'per_class': {c: {**mapped_stats[c], 'id': label_to_id[c]} for c in STANDARD_CLASSES},
        'tail_classes_original': tail_classes_original,
        'tail_standard_classes': tail_standard_classes,
        'included_flags': included_flags,
        'min_windows_threshold': MIN_WINDOWS_THRESHOLD,
        'proband_coverage': proband_class_matrix
    }
}

with open('/content/configs/classes.json', 'w') as f:
    json.dump(classes_config, f, indent=2)

print(f"\n✓ Class configuration saved: configs/classes.json")

if tail_standard_classes:
    print(f"\n⚠️ Tail classes at the standard level (windows < {MIN_WINDOWS_THRESHOLD}): {tail_standard_classes}")

included_classes = [c for c in STANDARD_CLASSES if included_flags[c]]
print(f"✓ Classes included for training ({len(included_classes)}/{len(STANDARD_CLASSES)}): {included_classes}")

get_ipython().system('git add configs/classes.json')
get_ipython().system('git commit -m "data: add standard-level tail classes and inclusion flags"')

print(f"\n{'='*60}\nStep 6 completed\n{'='*60}")



Step 6: Class Mapping
Sliding window: 3s (150 samples), overlap 50%, stride 75

Found 112 files

Original activity statistics:
  climbingdown   : 12 files, 284118 samples, 3425 windows
  climbingup     : 12 files, 357605 samples, 4331 windows
  jumping        : 15 files,  70663 samples,  842 windows
  lying          : 14 files, 436907 samples, 5343 windows
  running        : 15 files, 518843 samples, 6230 windows
  sitting        : 14 files, 433818 samples, 5259 windows
  standing       : 15 files, 459881 samples, 5574 windows
  walking        : 15 files, 468686 samples, 5618 windows

Statistics for the 8 standard classes:
  walking        : 15 files, 468686 samples, 5618 windows
  running        : 15 files, 518843 samples, 6230 windows
  sitting        : 14 files, 433818 samples, 5259 windows
  standing       : 15 files, 459881 samples, 5574 windows
  lying          : 14 files, 436907 samples, 5343 windows
  stairs_up      : 12 files, 357605 samples, 4331 windows
  stairs_down    : 

In [None]:
# ================ Step 7: LOSO Subject Splits ================
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 7: LOSO Subject Splits")
print("=" * 60)

proc_dir = Path('/content/proc')

# Scan all files and extract subjects
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"Found {len(proc_files)} files")

subjects = set()
file_subject_map = {}

for filepath in proc_files:
    df = pd.read_csv(filepath)
    subject = df['proband'].iloc[0] if 'proband' in df.columns else filepath.stem.split('_')[0]
    subjects.add(subject)
    file_subject_map[filepath.name] = subject

subjects = sorted(subjects)
print(f"\n✓ Total subjects: {len(subjects)}")
print(f"Subject list: {subjects}")

# Create LOSO folds
loso_splits = []

for fold_id, test_subject in enumerate(subjects):
    train_subjects = [s for s in subjects if s != test_subject]

    loso_splits.append({
        'fold': fold_id,
        'test_subject': test_subject,
        'train_subjects': train_subjects,
        'n_train': len(train_subjects),
        'n_test': 1
    })

    print(f"\nFold {fold_id}: Test={test_subject}, Train={train_subjects}")

# Save as CSV
splits_csv = []
for split in loso_splits:
    splits_csv.append({
        'fold': split['fold'],
        'test_subject': split['test_subject'],
        'train_subjects': ','.join(split['train_subjects']),
        'n_train': split['n_train'],
        'n_test': split['n_test']
    })

df_splits = pd.DataFrame(splits_csv)
df_splits.to_csv('/content/logs/splits.csv', index=False)
print(f"\n✓ Splits saved: logs/splits.csv")
print("\n" + df_splits.to_string(index=False))

# Save as JSON (for convenient downstream loading)
splits_config = {
    'split_method': 'LOSO',
    'n_folds': len(subjects),
    'subjects': subjects,
    'file_subject_map': file_subject_map,
    'folds': loso_splits
}

with open('/content/configs/splits.json', 'w') as f:
    json.dump(splits_config, f, indent=2)

print(f"\n✓ Split configuration saved: configs/splits.json")

# Validation: each subject is used exactly once as test set
test_subjects_count = pd.Series([s['test_subject'] for s in loso_splits]).value_counts()
assert (test_subjects_count == 1).all(), "Each subject should appear exactly once as the test set"
print(f"\n✓ Validation passed: each subject appears exactly once as the test set")

get_ipython().system('git add logs/splits.csv configs/splits.json')
get_ipython().system('git commit -m "split: create LOSO folds (leave-one-subject-out)"')

print(f"\n{'='*60}\nStep 7 completed\n{'='*60}")



Step 7: LOSO Subject Splits
Found 112 files

✓ Total subjects: 15
Subject list: ['proband1', 'proband10', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 0: Test=proband1, Train=['proband10', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 1: Test=proband10, Train=['proband1', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 2: Test=proband11, Train=['proband1', 'proband10', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 3: Test=proband12, Train=['proband1', 'proband10', 'proband11', 'proband13', 'proband14', 'proband15', 'proban

In [None]:
# ================ Step 8: Sliding Windowing and Label Assignment ================
import numpy as np
import pandas as pd
from pathlib import Path
import json
from collections import defaultdict

print("\n\nStep 8: Sliding Windowing and Label Assignment")
print("=" * 60)

# Load configuration
with open('/content/configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

proc_dir = Path('/content/proc')
features_dir = Path('/content/features')
features_dir.mkdir(exist_ok=True)

# Window parameters
WINDOW_SEC = 3
OVERLAP = 0.5
TARGET_FS = 50
WINDOW_SAMPLES = int(TARGET_FS * WINDOW_SEC)
STRIDE_SAMPLES = int(WINDOW_SAMPLES * (1 - OVERLAP))
DOMINANT_THRESHOLD = 0.8

label_to_id = classes_cfg['label_to_id']

print(f"Window parameters: {WINDOW_SEC}s ({WINDOW_SAMPLES} samples), overlap {OVERLAP*100:.0f}%, stride {STRIDE_SAMPLES}")
print(f"Dominant-label threshold: {DOMINANT_THRESHOLD*100:.0f}%\n")

# Process each file to generate all windows
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"Processing {len(proc_files)} files...\n")

all_windows = []
discarded_windows = 0

for file_idx, filepath in enumerate(proc_files):
    df = pd.read_csv(filepath)

    subject = df['proband'].iloc[0]
    activity = df['activity'].iloc[0]
    std_label = classes_cfg['activity_mapping'].get(activity, activity)
    label_id = label_to_id[std_label]

    file_windows = 0
    for seg_id, seg_df in df.groupby('segment_id'):
        seg_df = seg_df.reset_index(drop=True)
        seg_len = len(seg_df)

        if seg_len < WINDOW_SAMPLES:
            continue

        for start_idx in range(0, seg_len - WINDOW_SAMPLES + 1, STRIDE_SAMPLES):
            end_idx = start_idx + WINDOW_SAMPLES
            window = seg_df.iloc[start_idx:end_idx]

            # Check dominant label
            window_labels = window['activity'].values
            unique_labels, counts = np.unique(window_labels, return_counts=True)
            dominant_idx = counts.argmax()
            dominant_label = unique_labels[dominant_idx]
            dominant_ratio = counts[dominant_idx] / len(window_labels)

            if dominant_ratio < DOMINANT_THRESHOLD:
                discarded_windows += 1
                continue

            # Save window
            window_data = {
                'subject': subject,
                'activity': std_label,
                'label': label_id,
                'file': filepath.name,
                'segment_id': seg_id,
                'start_idx': start_idx,
                'dominant_ratio': dominant_ratio
            }

            for col in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']:
                window_data[col] = window[col].values.tolist()

            all_windows.append(window_data)
            file_windows += 1

    print(f"[{file_idx+1}/{len(proc_files)}] {filepath.name}: {file_windows} windows ({std_label}, {subject})")

print(f"\n✓ Total windows: {len(all_windows)}")
print(f"✓ Discarded windows: {discarded_windows} (dominant label < {DOMINANT_THRESHOLD*100:.0f}%)")

# Save window metadata (excluding sensor data)
windows_meta = pd.DataFrame([{k: v for k, v in w.items()
                              if k not in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']}
                             for w in all_windows])

# Add window IDs
windows_meta['window_id'] = (windows_meta['file'] + ':' +
                              windows_meta['segment_id'].astype(str) + ':' +
                              windows_meta['start_idx'].astype(str))

windows_meta.to_csv(features_dir / 'windows_meta.csv', index=False)
print(f"\n✓ Global window metadata: features/windows_meta.csv")

# Save complete window data
with open(features_dir / 'windows_raw.json', 'w') as f:
    json.dump(all_windows, f)
print(f"✓ Raw window data: features/windows_raw.json")

# Generate train/test split per fold
print("\n" + "="*60)
print("Generate train/test splits per fold:")
print("="*60)

per_fold_totals = []

for fold in splits_cfg['folds']:
    k = fold['fold']
    test_subj = fold['test_subject']

    # Mark train/test
    fold_meta = windows_meta.copy()
    fold_meta['fold'] = k
    fold_meta['split'] = np.where(fold_meta['subject'] == test_subj, 'test', 'train')

    # Save metadata for this fold
    fold_meta.to_csv(features_dir / f'windows_meta_fold{k}.csv', index=False)

    # Per-fold statistics
    stats = fold_meta.groupby(['split', 'activity', 'subject']).size().reset_index(name='windows')
    stats.to_csv(f'/content/logs/window_stats_fold{k}.csv', index=False)

    n_train = int((fold_meta['split'] == 'train').sum())
    n_test = int((fold_meta['split'] == 'test').sum())

    per_fold_totals.append({
        'fold': k,
        'test_subject': test_subj,
        'n_train_windows': n_train,
        'n_test_windows': n_test,
        'n_total': n_train + n_test
    })

    print(f"Fold {k}: Train={n_train}, Test={n_test}, test subject={test_subj}")

# Save fold-level summary
df_fold_totals = pd.DataFrame(per_fold_totals)
df_fold_totals.to_csv('/content/logs/window_fold_totals.csv', index=False)
print(f"\n✓ Fold-level summary: logs/window_fold_totals.csv")

# Global summary
summary = {
    'total_windows': len(all_windows),
    'discarded_windows': discarded_windows,
    'window_params': {
        'window_size_sec': WINDOW_SEC,
        'window_samples': WINDOW_SAMPLES,
        'overlap': OVERLAP,
        'stride_samples': STRIDE_SAMPLES,
        'dominant_threshold': DOMINANT_THRESHOLD
    },
    'per_class_totals': windows_meta.groupby('activity')['window_id'].count().to_dict(),
    'per_subject_totals': windows_meta.groupby('subject')['window_id'].count().to_dict()
}

with open('/content/logs/window_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("\nGlobal statistics:")
print(f"  Per class: {summary['per_class_totals']}")
print(f"  Per subject: {summary['per_subject_totals']}")

get_ipython().system('git add features/ logs/window_*.csv logs/window_*.json')
get_ipython().system('git commit -m "feature: windowing with per-fold train/test splits"')

print(f"\n{'='*60}\nStep 8 completed\n{'='*60}")



Step 8: Sliding Windowing and Label Assignment
Window parameters: 3s (150 samples), overlap 50%, stride 75
Dominant-label threshold: 80%

Processing 112 files...

[1/112] proband10_climbingdown_waist.csv: 254 windows (stairs_down, proband10)
[2/112] proband10_climbingup_waist.csv: 264 windows (stairs_up, proband10)
[3/112] proband10_jumping_waist.csv: 68 windows (jumping, proband10)
[4/112] proband10_lying_waist.csv: 384 windows (lying, proband10)
[5/112] proband10_running_waist.csv: 367 windows (running, proband10)
[6/112] proband10_sitting_waist.csv: 366 windows (sitting, proband10)
[7/112] proband10_standing_waist.csv: 388 windows (standing, proband10)
[8/112] proband10_walking_waist.csv: 372 windows (walking, proband10)
[9/112] proband11_climbingdown_waist.csv: 293 windows (stairs_down, proband11)
[10/112] proband11_climbingup_waist.csv: 367 windows (stairs_up, proband11)
[11/112] proband11_jumping_waist.csv: 53 windows (jumping, proband11)
[12/112] proband11_lying_waist.csv: 396

In [None]:
# ================ Step 9: Per-Fold Standardization (Performance-Optimized) ================
import numpy as np
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 9: Per-Fold Standardization (z-score)")
print("=" * 60)

# Load configuration
with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

# Load window data
with open('/content/features/windows_raw.json', 'r') as f:
    all_windows = json.load(f)

features_dir = Path('/content/features')
proc_dir = Path('/content/proc')

CHANNELS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
EPS = 1e-8

print(f"Channels: {CHANNELS}")
print(f"Total windows: {len(all_windows)}\n")

scaler_summary = []

for fold in splits_cfg['folds']:
    k = fold['fold']
    test_subj = fold['test_subject']

    print(f"\nFold {k}: test subject={test_subj}")

    fold_meta = pd.read_csv(features_dir / f'windows_meta_fold{k}.csv')
    assert len(all_windows) == len(fold_meta), f"Window count mismatch: {len(all_windows)} vs {len(fold_meta)}"

    train_indices = set(fold_meta[fold_meta['split'] == 'train'].index.tolist())
    test_indices = set(fold_meta[fold_meta['split'] == 'test'].index.tolist())

    print(f"  Train windows: {len(train_indices)}, Test windows: {len(test_indices)}")

    # Vectorized collection of training data
    train_data = {ch: [] for ch in CHANNELS}
    for idx in train_indices:
        window = all_windows[idx]
        for ch in CHANNELS:
            train_data[ch].extend(window[ch])

    # Convert to NumPy arrays and compute parameters
    scaler_params = {}
    for ch in CHANNELS:
        data = np.array(train_data[ch], dtype=np.float32)
        mean = float(data.mean())
        std = float(max(data.std(), EPS))
        scaler_params[ch] = {'mean': mean, 'std': std}

    print(f"  Scaler parameters:")
    for ch in CHANNELS:
        print(f"    {ch}: mean={scaler_params[ch]['mean']:.4f}, std={scaler_params[ch]['std']:.4f}")

    # Vectorized standardization and save as NPZ
    norm_data = {
        'window_ids': [],
        'subjects': [],
        'activities': [],
        'labels': [],
        'splits': []
    }
    for ch in CHANNELS:
        norm_data[ch] = []

    train_norm = {ch: [] for ch in CHANNELS}
    test_norm = {ch: [] for ch in CHANNELS}

    for idx in range(len(all_windows)):
        window = all_windows[idx]

        if idx in train_indices:
            split = 'train'
        elif idx in test_indices:
            split = 'test'
        else:
            continue

        norm_data['window_ids'].append(fold_meta.loc[idx, 'window_id'])
        norm_data['subjects'].append(window['subject'])
        norm_data['activities'].append(window['activity'])
        norm_data['labels'].append(window['label'])
        norm_data['splits'].append(split)

        for ch in CHANNELS:
            data = np.array(window[ch], dtype=np.float32)
            normalized = (data - scaler_params[ch]['mean']) / scaler_params[ch]['std']
            norm_data[ch].append(normalized)

            # Collect statistics for validation
            if split == 'train':
                train_norm[ch].extend(normalized)
            else:
                test_norm[ch].extend(normalized)

    # Post-standardization validation: training set
    print(f"  Training-set validation after standardization:")
    for ch in CHANNELS:
        mean_val = np.mean(train_norm[ch])
        std_val = np.std(train_norm[ch])
        print(f"    {ch}: mean={mean_val:.6f}, std={std_val:.6f}")

    # Post-standardization validation: test set
    print(f"  Test-set validation after standardization:")
    for ch in CHANNELS:
        if test_norm[ch]:
            mean_val = np.mean(test_norm[ch])
            print(f"    {ch}: mean={mean_val:.6f}")

    # Persist scaler parameters
    scaler_file = proc_dir / f'scaler_fold{k}.npz'
    np.savez(scaler_file, **{f'{ch}_mean': scaler_params[ch]['mean'] for ch in CHANNELS},
                          **{f'{ch}_std': scaler_params[ch]['std'] for ch in CHANNELS})

    # Persist standardized windows as NPZ (float32)
    norm_file = features_dir / f'windows_normalized_fold{k}.npz'
    np.savez_compressed(norm_file,
                       window_ids=np.array(norm_data['window_ids']),
                       subjects=np.array(norm_data['subjects']),
                       activities=np.array(norm_data['activities']),
                       labels=np.array(norm_data['labels'], dtype=np.int32),
                       splits=np.array(norm_data['splits']),
                       **{ch: np.array(norm_data[ch], dtype=np.float32) for ch in CHANNELS})

    print(f"  ✓ Saved: {scaler_file.name}, {norm_file.name}")

    scaler_summary.append({
        'fold': k,
        'test_subject': test_subj,
        'n_train': len(train_indices),
        'n_test': len(test_indices),
        'scaler_params': scaler_params
    })

with open('/content/logs/scaler_summary.json', 'w') as f:
    json.dump(scaler_summary, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed standardization across {len(splits_cfg['folds'])} folds")
print(f"✓ Scaler parameters: proc/scaler_fold*.npz")
print(f"✓ Standardized data: features/windows_normalized_fold*.npz (NPZ/float32)")
print(f"✓ Summary: logs/scaler_summary.json")

get_ipython().system('git add proc/scaler_fold*.npz features/windows_normalized_fold*.npz logs/scaler_summary.json')
get_ipython().system('git commit -m "preproc: optimized z-score with NPZ storage and validation"')

print(f"\n{'='*60}\nStep 9 completed\n{'='*60}")



Step 9: Per-Fold Standardization (z-score)
Channels: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
Total windows: 36622


Fold 0: test subject=proband1
  Train windows: 34727, Test windows: 1895
  Scaler parameters:
    acc_x: mean=-0.0001, std=3.8156
    acc_y: mean=0.0000, std=1.8273
    acc_z: mean=0.0001, std=2.0051
    gyro_x: mean=-0.0001, std=0.5433
    gyro_y: mean=-0.0000, std=0.6868
    gyro_z: mean=-0.0001, std=0.3573
  Training-set validation after standardization:
    acc_x: mean=0.000000, std=1.000000
    acc_y: mean=0.000000, std=1.000000
    acc_z: mean=-0.000000, std=1.000000
    gyro_x: mean=0.000000, std=1.000000
    gyro_y: mean=0.000000, std=1.000000
    gyro_z: mean=0.000000, std=1.000000
  Test-set validation after standardization:
    acc_x: mean=-0.000124
    acc_y: mean=0.000252
    acc_z: mean=0.000556
    gyro_x: mean=0.001704
    gyro_y: mean=-0.000234
    gyro_z: mean=0.000859
  ✓ Saved: scaler_fold0.npz, windows_normalized_fold0.npz

Fold 1:

In [None]:
# Run once to specify which folds to execute
import json, os
os.makedirs("logs", exist_ok=True)
json.dump({"folds":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, open("logs/active_folds.json","w"), indent=2)

In [None]:
# ================ Step 10: ROCKET Feature Generation (Academic-Compliant Edition) ================
from pathlib import Path
import json
import shutil

# ============ Configuration Loading ============
def get_active_folds(path="logs/active_folds.json", default_all=None):
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["folds"]
    return default_all

def get_active_rockets(path="logs/active_rockets.json", default_all=None):
    """Load ROCKET model configurations to run"""
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["rockets"]
    return default_all if default_all else ['multirocket', 'minirocket']

def scan_available_folds(data_dir="/content/features"):
    """Scan available folds from standardized window files"""
    available = []
    for f in Path(data_dir).glob("windows_normalized_fold*.npz"):
        try:
            fold_id = int(f.stem.replace("windows_normalized_fold", ""))
            available.append(fold_id)
        except:
            continue
    return sorted(available)

# Fetch configs
available_folds = scan_available_folds()
print(f"Available folds (from data files): {available_folds}")

folds_to_run = get_active_folds(default_all=available_folds)
print(f"Running folds (from config): {folds_to_run}")

rockets_to_run = get_active_rockets(default_all=['minirocket'])
print(f"Running rockets (from config): {rockets_to_run}")

if not folds_to_run:
    print("❌ No folds to run! Please check logs/active_folds.json")
    import sys
    sys.exit(1)

print("\n" + "="*60)
print(f"📋 Will process {len(folds_to_run)} fold(s): {folds_to_run}")
print(f"📋 Will generate {len(rockets_to_run)} rocket(s): {rockets_to_run}")
print("="*60)

# Avoid excessive parallelism / thread oversubscription
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")

import numpy as np
import pandas as pd
from pathlib import Path
import json
import time
import pickle
import sys
from sktime.transformations.panel.rocket import MiniRocketMultivariate, MultiRocketMultivariate
from threadpoolctl import threadpool_limits, threadpool_info
from numpy.lib.format import open_memmap

print("\n\nStep 10: ROCKET Feature Generation (Academic-Compliant Edition)")
print("=" * 60)

# Create directories
logs_dir = Path('/content/logs')
logs_dir.mkdir(parents=True, exist_ok=True)
features_dir = Path('/content/features')
features_dir.mkdir(parents=True, exist_ok=True)
models_dir = Path('/content/models')
models_dir.mkdir(parents=True, exist_ok=True)

# Environment fingerprint
env_info = {
    'numpy': np.__version__,
    'pandas': pd.__version__,
    'sklearn': __import__('sklearn').__version__,
    'sktime': __import__('sktime').__version__,
    'python': sys.version,
    'OMP_NUM_THREADS': os.environ.get('OMP_NUM_THREADS'),
    'MKL_NUM_THREADS': os.environ.get('MKL_NUM_THREADS'),
    'OPENBLAS_NUM_THREADS': os.environ.get('OPENBLAS_NUM_THREADS'),
    'NUMEXPR_NUM_THREADS': os.environ.get('NUMEXPR_NUM_THREADS'),
    'threadpools': threadpool_info()
}

# Load configuration
with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

CHANNELS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
N_CHANNELS = len(CHANNELS)
CALIB_MAX_SAMPLES = 4096  # Number of training subsamples to accelerate fit

def align_to_84(n):
    return n - (n % 84)

def stratified_by_subject_indices(subjects, max_n, seed=0):
    """Stratified sampling by subject to balance proportions across subjects"""
    rng = np.random.default_rng(seed)
    uniq = np.unique(subjects)
    per = max(1, max_n // len(uniq))
    idx = []
    for s in uniq:
        s_idx = np.flatnonzero(subjects == s)
        take = min(per, len(s_idx))
        idx.extend(rng.choice(s_idx, size=take, replace=False))
    if len(idx) < max_n:
        remain = np.setdiff1d(np.arange(len(subjects)), np.array(idx, dtype=int), assume_unique=True)
        need = min(max_n - len(idx), len(remain))
        if need > 0:
            idx.extend(rng.choice(remain, size=need, replace=False))
    return np.array(idx[:max_n], dtype=int)

# Compatibility helpers for differing sktime versions
def _get_used_kernels(tr):
    return int(getattr(tr, "num_kernels_", getattr(tr, "n_kernels_", None))
               or tr.get_params().get("num_kernels")
               or tr.get_params().get("n_kernels"))

def _get_n_fpk(tr, default=4):
    return int(getattr(tr, "n_features_per_kernel", getattr(tr, "n_features_per_kernel_", default)))

# ROCKET parameter configurations
ROCKET_CONFIGS = {
    'minirocket': {
        'class': MiniRocketMultivariate,
        'params': {
            'num_kernels': align_to_84(10_000),
            'max_dilations_per_kernel': 32,
            'n_jobs': -1,
            'random_state': 0
        },
        'batch_size': 16384
    },
    'multirocket': {
        'class': MultiRocketMultivariate,
        'params': {
            'num_kernels': align_to_84(6_250),
            'n_jobs': -1,
            'random_state': 0
        },
        'batch_size': 16384
    }
}

print("Configuration:")
for name, cfg in ROCKET_CONFIGS.items():
    print(f"  {name.upper()}: {cfg['params']}, batch_size={cfg['batch_size']}")
print(f"  Parallelization: n_jobs=-1, BLAS threads=1")
print(f"  I/O optimizations: memmap streaming writes, sampled statistics")
print(f"  Fit strategy: per-fold independent (stratified sampling of {CALIB_MAX_SAMPLES} samples)")
print(f"\nEnvironment fingerprint:")
for k, v in env_info.items():
    if k != 'threadpools':
        print(f"  {k}: {v}")
print()

# Dynamic batch auto-tuning + streaming memmap writes
def transform_to_memmap(transformer, X, output_file, batch_size, rocket_type,
                        target_mem_mb=512, probe=256):
    """Probe feature dimension, adapt batch size to memory budget, stream to memmap"""
    n_samples = len(X)
    batch_times = []

    # 1) Probe with a small batch (JIT warmup)
    probe_n = min(probe, n_samples, max(1, batch_size//8))
    t0 = time.time()
    probe_batch = X[:probe_n]
    # MultiROCKET requires float64
    if rocket_type == 'multirocket':
        probe_batch = probe_batch.astype(np.float64, copy=False)
    first_probe = transformer.transform(probe_batch)
    if hasattr(first_probe, 'values'):
        first_probe = first_probe.values
    n_features = int(first_probe.shape[1])
    batch_times.append(time.time() - t0)

    # 2) Determine safe batch size given memory budget
    bytes_per_row = n_features * 4
    safe_bs = max(128, min(batch_size, int((target_mem_mb * 1024**2) // bytes_per_row)))
    safe_bs = min(safe_bs, n_samples)
    if safe_bs < batch_size:
        print(f"  ⚙️  auto-tune batch_size: {batch_size} → {safe_bs} (target≈{target_mem_mb}MB)")

    # 3) Recompute first batch and create memmap
    t1 = time.time()
    batch = X[:safe_bs]
    # MultiROCKET: convert to float64 if needed
    if rocket_type == 'multirocket':
        batch = batch.astype(np.float64, copy=False)

    first = transformer.transform(batch)
    if hasattr(first, 'values'):
        first = first.values
    first = first.astype(np.float32, copy=False)

    mm = open_memmap(output_file, mode='w+', dtype=np.float32, shape=(n_samples, n_features))
    mm[:len(first)] = first
    batch_times.append(time.time() - t1)
    total_batches = (n_samples - 1) // safe_bs + 1
    print(f"  Batch 1/{total_batches}: {len(first)} samples, {batch_times[-1]:.2f}s")

    # 4) Continue streaming writes
    for i in range(len(first), n_samples, safe_bs):
        s = time.time()
        end = min(i + safe_bs, n_samples)
        batch = X[i:end]
        # MultiROCKET: convert to float64 if needed
        if rocket_type == 'multirocket':
            batch = batch.astype(np.float64, copy=False)

        b = transformer.transform(batch)
        if hasattr(b, 'values'):
            b = b.values
        mm[i:end] = b.astype(np.float32, copy=False)
        bt = time.time() - s
        batch_times.append(bt)
        print(f"  Batch {i//safe_bs+1}/{total_batches}: {end - i} samples, {bt:.2f}s")

    mm.flush()
    del mm
    return n_features, batch_times

# Sampled statistics
def sample_statistics(file_path, sample_rate=0.01):
    """Sampled statistics over memmap file"""
    X = np.load(file_path, mmap_mode='r')
    n_samples = X.shape[0]
    n_sample = max(int(n_samples * sample_rate), 1000)

    indices = np.random.choice(n_samples, size=min(n_sample, n_samples), replace=False)
    sample = X[indices]

    return {
        'min': float(sample.min()),
        'max': float(sample.max()),
        'sparsity_pct': float((sample == 0).sum() / sample.size * 100)
    }

# Main loop
all_summaries = {}

for rocket_type in rockets_to_run:
    rocket_cfg = ROCKET_CONFIGS[rocket_type]
    print(f"\n{'='*60}")
    print(f"Generate {rocket_type.upper()} features")
    print(f"{'='*60}")

    rocket_summary = []

    for fold in splits_cfg['folds']:
        k = fold['fold']

        if k not in folds_to_run:
            print(f"⏭️  Skipping fold {k} (not in active_folds.json)")
            continue

        test_subj = fold['test_subject']

        print(f"\n{'='*60}")
        print(f"Fold {k}: test_subject={test_subj}")
        print(f"{'='*60}")

        # Disk space check
        free_gb = shutil.disk_usage(str(features_dir)).free / (1024**3)
        if free_gb < 5:
            print(f"⚠️  Warning: only {free_gb:.2f} GB of free disk space")
        assert free_gb > 2, f"❌ Insufficient disk space! Remaining {free_gb:.2f} GB < 2 GB"

        # Load standardized data
        norm_file = features_dir / f'windows_normalized_fold{k}.npz'
        print(f"Loading: {norm_file.name}")

        data = np.load(norm_file, allow_pickle=False)

        # Extract arrays
        window_ids = data['window_ids']
        subjects = data['subjects']
        labels = data['labels']
        splits = data['splits']

        # Build (n_samples, n_channels, n_timesteps) format
        X_all = np.stack([data[ch] for ch in CHANNELS], axis=1).astype(np.float32)

        # Release npz handle
        if hasattr(data, "close"):
            data.close()

        n_samples, n_channels, n_timesteps = X_all.shape
        print(f"Data shape: {X_all.shape} (samples, channels, timesteps)")

        # Build train/test masks
        train_mask = splits == 'train'
        test_mask = splits == 'test'

        X_train = X_all[train_mask]
        X_test = X_all[test_mask]

        # Ensure contiguous memory
        X_train = np.ascontiguousarray(X_train)
        X_test = np.ascontiguousarray(X_test)

        y_train = labels[train_mask]
        y_test = labels[test_mask]

        train_ids = window_ids[train_mask]
        test_ids = window_ids[test_mask]
        train_subjs = subjects[train_mask]
        test_subjs = subjects[test_mask]

        print(f"Training set: {X_train.shape[0]} samples")
        print(f"Test set: {X_test.shape[0]} samples")

        # Anti-leakage assertion
        train_subj_set = set(train_subjs)
        test_subj_set = set(test_subjs)
        intersection = train_subj_set & test_subj_set

        assert len(intersection) == 0, f"Leakage detection failed! Overlap between train and test subjects: {intersection}"
        print(f"✓ Leakage check passed: train subjects ∩ test subjects = ∅")

        # Per-fold independent fit (accelerated via stratified sampling)
        transformer = rocket_cfg['class'](**rocket_cfg['params'])

        # Stratified calibration subset
        calib_n = min(CALIB_MAX_SAMPLES, len(X_train))
        calib_idx = stratified_by_subject_indices(train_subjs, calib_n, seed=0)
        X_calib = X_train[calib_idx]

        print(f"\nFitting {rocket_type.upper()} on {len(calib_idx)} calibration samples...")
        fit_start = time.time()
        transformer.fit(X_calib)
        fit_time = time.time() - fit_start

        used_kernels = _get_used_kernels(transformer)
        print(f"✓ Fit completed: {fit_time:.2f}s (actual kernels: {used_kernels}, calibration samples: {len(calib_idx)})")

        # JIT warmup (MultiROCKET requires float64)
        warmup_batch = X_calib[:min(256, len(X_calib))]
        if rocket_type == 'multirocket':
            warmup_batch = warmup_batch.astype(np.float64, copy=False)
        _ = transformer.transform(warmup_batch)

        # Fetch batch size
        BATCH_SIZE = rocket_cfg['batch_size']

        # Transform training set
        print(f"\nTransforming training data (batch_size={BATCH_SIZE}, streaming to disk)...")
        train_feat_file = features_dir / f'X_{rocket_type}_train_fold{k}.npy'
        train_start = time.time()

        with threadpool_limits(limits=1, user_api='blas'):
            n_features, train_batch_times = transform_to_memmap(
                transformer, X_train, train_feat_file, BATCH_SIZE, rocket_type
            )

        train_time = time.time() - train_start
        train_feat_size_mb = train_feat_file.stat().st_size / (1024 ** 2)
        print(f"✓ Train transform: {train_time:.2f}s, shape: ({X_train.shape[0]}, {n_features})")
        print(f"  Batch time p50={np.median(train_batch_times):.2f}s, p90={np.percentile(train_batch_times, 90):.2f}s")
        print(f"  File size: {train_feat_size_mb:.2f} MB")

        # Persist training-set metadata
        train_meta_file = features_dir / f'meta_{rocket_type}_train_fold{k}.npz'
        np.savez(train_meta_file, y=y_train, window_ids=train_ids, subjects=train_subjs)

        # Transform test set
        print(f"\nTransforming test data (batch_size={BATCH_SIZE}, streaming to disk)...")
        test_feat_file = features_dir / f'X_{rocket_type}_test_fold{k}.npy'
        test_start = time.time()

        with threadpool_limits(limits=1, user_api='blas'):
            _, test_batch_times = transform_to_memmap(
                transformer, X_test, test_feat_file, BATCH_SIZE, rocket_type
            )

        test_time = time.time() - test_start
        test_feat_size_mb = test_feat_file.stat().st_size / (1024 ** 2)
        print(f"✓ Test transform: {test_time:.2f}s, shape: ({X_test.shape[0]}, {n_features})")
        print(f"  Batch time p50={np.median(test_batch_times):.2f}s, p90={np.percentile(test_batch_times, 90):.2f}s")
        print(f"  File size: {test_feat_size_mb:.2f} MB")

        # Persist test-set metadata
        test_meta_file = features_dir / f'meta_{rocket_type}_test_fold{k}.npz'
        np.savez(test_meta_file, y=y_test, window_ids=test_ids, subjects=test_subjs)

        # Feature-dimension assertion (MultiROCKET)
        if rocket_type == 'multirocket':
            n_fpk = _get_n_fpk(transformer, 4)
            expected_features = 2 * n_fpk * used_kernels
            assert n_features == expected_features, f"Feature dimension mismatch: {n_features} != {expected_features}"
            print(f"✓ Feature-dimension validation passed: {n_features} = 2 × {n_fpk} × {used_kernels}")

        # Persist per-fold transformer
        transformer_file = models_dir / f'transformer_{rocket_type}_fold{k}.pkl'
        with open(transformer_file, 'wb') as f:
            pickle.dump(transformer, f, protocol=4)
        transformer_size_mb = transformer_file.stat().st_size / (1024 ** 2)
        print(f"\n✓ Transformer saved: {transformer_file.name} ({transformer_size_mb:.2f} MB)")

        total_size_mb = train_feat_size_mb + test_feat_size_mb + transformer_size_mb

        print(f"✓ Train features: {train_feat_file.name} ({train_feat_size_mb:.2f} MB)")
        print(f"✓ Train metadata: {train_meta_file.name}")
        print(f"✓ Test features: {test_feat_file.name} ({test_feat_size_mb:.2f} MB)")
        print(f"✓ Test metadata: {test_meta_file.name}")
        print(f"✓ Total disk usage: {total_size_mb:.2f} MB")

        # Sampled statistics
        print(f"\nFeature statistics (1% sample):")
        train_stats = sample_statistics(train_feat_file)
        test_stats = sample_statistics(test_feat_file)

        print(f"  Number of features: {n_features}")
        print(f"  Train value range: [{train_stats['min']:.4f}, {train_stats['max']:.4f}]")
        print(f"  Test value range: [{test_stats['min']:.4f}, {test_stats['max']:.4f}]")
        print(f"  Train sparsity: {train_stats['sparsity_pct']:.2f}%")

        # Release memory
        del X_all, X_train, X_test, X_calib

        # Record summary
        rocket_summary.append({
            'fold': k,
            'test_subject': test_subj,
            'rocket_type': rocket_type,
            'batch_size': rocket_cfg['batch_size'],
            'n_features': n_features,
            'actual_kernels': used_kernels,
            'calib_samples': len(calib_idx),
            'n_train_samples': int(len(y_train)),
            'n_test_samples': int(len(y_test)),
            'fit_time_sec': round(fit_time, 2),
            'train_transform_time_sec': round(train_time, 2),
            'test_transform_time_sec': round(test_time, 2),
            'total_time_sec': round(fit_time + train_time + test_time, 2),
            'train_batch_p50_sec': round(np.median(train_batch_times), 2),
            'train_batch_p90_sec': round(np.percentile(train_batch_times, 90), 2),
            'disk_usage_mb': round(total_size_mb, 2),
            'train_feat_size_mb': round(train_feat_size_mb, 2),
            'test_feat_size_mb': round(test_feat_size_mb, 2),
            'transformer_size_mb': round(transformer_size_mb, 2),
            'leak_check_passed': True,
            'independent_fit_per_fold': True
        })

    # Persist summary
    summary_df = pd.DataFrame(rocket_summary)
    summary_df.to_csv(logs_dir / f'rocket_{rocket_type}_summary.csv', index=False)

    with open(logs_dir / f'rocket_{rocket_type}_summary.json', 'w') as f:
        json.dump({
            'rocket_type': rocket_type,
            'parameters': rocket_cfg['params'],
            'batch_size': rocket_cfg['batch_size'],
            'optimization': {
                'blas_threads': 1,
                'n_jobs': -1,
                'streaming_memmap_write': True,
                'sampled_statistics': True,
                'adaptive_batch_size': True,
                'jit_warmup': True,
                'independent_fit_per_fold': True,
                'stratified_calibration': True,
                'calib_max_samples': CALIB_MAX_SAMPLES,
                'float64_on_demand': True,
                'contiguous_memory': True,
                'disk_space_check': True,
                'separate_train_test_transform': True,
                'npy_format_for_mmap': True
            },
            'n_folds': len(rocket_summary),
            'per_fold_stats': rocket_summary,
            'aggregated_stats': {
                'avg_n_features': int(summary_df['n_features'].mean()),
                'avg_fit_time_sec': round(summary_df['fit_time_sec'].mean(), 2),
                'avg_train_transform_time_sec': round(summary_df['train_transform_time_sec'].mean(), 2),
                'avg_test_transform_time_sec': round(summary_df['test_transform_time_sec'].mean(), 2),
                'total_disk_usage_mb': round(summary_df['disk_usage_mb'].sum(), 2)
            }
        }, f, indent=2)

    all_summaries[rocket_type] = rocket_summary

    print(f"\n{'='*60}")
    print(f"{rocket_type.upper()} completed")
    print(f"✓ Summary CSV: {logs_dir / f'rocket_{rocket_type}_summary.csv'}")
    print(f"✓ Summary JSON: {logs_dir / f'rocket_{rocket_type}_summary.json'}")
    print(f"{'='*60}")

# Persist environment fingerprint
with open(logs_dir / 'rocket_env.json', 'w') as f:
    json.dump(env_info, f, indent=2)

# Final summary
print(f"\n{'='*60}")
print("All ROCKET feature generation completed")
print(f"{'='*60}")

for rocket_type in rockets_to_run:
    if rocket_type in all_summaries:
        summary_df = pd.DataFrame(all_summaries[rocket_type])
        print(f"\n{rocket_type.upper()} 汇总:")  # keep consistent with earlier semantics
        print(f"  Avg. number of features: {summary_df['n_features'].mean():.0f}")
        print(f"  Avg. fit time: {summary_df['fit_time_sec'].mean():.2f}s")
        print(f"  Avg. train transform time: {summary_df['train_transform_time_sec'].mean():.2f}s")
        print(f"  Avg. test transform time: {summary_df['test_transform_time_sec'].mean():.2f}s")
        print(f"  Avg. total time: {summary_df['total_time_sec'].mean():.2f}s")
        print(f"  Total disk usage: {summary_df['disk_usage_mb'].sum():.2f} MB")

print(f"\n✓ Environment fingerprint: {logs_dir / 'rocket_env.json'}")
print(f"✓ All folds passed anti-leakage validation")
print(f"✓ Academic compliance: per-fold independent fit (stratified sampling), no cross-fold information leakage")
print(f"✓ Optimizations: BLAS single-thread, adaptive batching, memmap streaming writes, sampled statistics, disk checks")
print(f"✓ MultiROCKET: float64 on demand, JIT warmup, contiguous memory")
print(f"\n⚠️  Large files are Git-ignored; commit commands:")
print(f"   git add logs/rocket_*.json configs/ models/transformer_*_fold*.pkl")
print(f"   git commit -m 'feature: academic-compliant ROCKET (independent fit per fold)'")

print(f"\n{'='*60}\nStep 10 completed\n{'='*60}")

Available folds (from data files): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Running folds (from config): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Running rockets (from config): ['minirocket']

📋 Will process 15 fold(s): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
📋 Will generate 1 rocket(s): ['minirocket']


Step 10: ROCKET Feature Generation (Academic-Compliant Edition)
Configuration:
  MINIROCKET: {'num_kernels': 9996, 'max_dilations_per_kernel': 32, 'n_jobs': -1, 'random_state': 0}, batch_size=16384
  MULTIROCKET: {'num_kernels': 6216, 'n_jobs': -1, 'random_state': 0}, batch_size=16384
  Parallelization: n_jobs=-1, BLAS threads=1
  I/O optimizations: memmap streaming writes, sampled statistics
  Fit strategy: per-fold independent (stratified sampling of 4096 samples)

Environment fingerprint:
  numpy: 1.26.4
  pandas: 2.2.2
  sklearn: 1.4.2
  sktime: 0.30.0
  python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
  OMP_NUM_THREADS: 1
  MKL_NUM_THREADS:

In [None]:
# ================ Step 11: MiniROCKET + Ridge Classifier (Ultimate Optimized Edition) ================
# Pin BLAS threads (must be set before imports)
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")

import numpy as np
import pandas as pd
from pathlib import Path
import json
import pickle
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import GroupKFold
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import f1_score
from sklearn.feature_selection import VarianceThreshold
from joblib import Parallel, delayed
from threadpoolctl import threadpool_limits
import warnings
warnings.filterwarnings('ignore')

print("\n\nStep 11: MiniROCKET + Ridge Classifier (Ultimate Optimized Edition)")
print("=" * 60)

# Create output directory
Path('preds').mkdir(exist_ok=True)

# Load configuration
def get_active_folds(path="logs/active_folds.json"):
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["folds"]
    return []

with open('configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

with open('configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

folds_to_run = get_active_folds()
print(f"Folds to run: {folds_to_run}\n")

# Ridge hyperparameters (ultimate optimized edition)
CV_FOLDS = 5

print(f"Ridge hyperparameters (ultimate optimized edition):")
print(f"  Two-stage search: coarse (9 points × 3-fold) → fine (9 points × 5-fold)")
print(f"  Sample weights: balanced (pre-cached)")
print(f"  solver: lsqr")
print(f"  Parallelism: thread-based + single-thread BLAS + pre_dispatch control")
print(f"  Low-variance filter: threshold=1e-6 (train-only fit)\n")

id_to_label = {int(k): v for k, v in classes_cfg['id_to_label'].items()}
label_order = sorted(id_to_label.keys())
all_summaries = []

# Process each fold
for fold in splits_cfg['folds']:
    k = fold['fold']

    if k not in folds_to_run:
        print(f"⏭️  Skipping Fold {k}")
        continue

    test_subj = fold['test_subject']

    print(f"\n{'='*60}")
    print(f"Fold {k}: test subject={test_subj}")
    print(f"{'='*60}")

    # Load MiniROCKET training features
    X_train_raw = np.load(f'features/X_minirocket_train_fold{k}.npy', mmap_mode='r')
    meta_train = np.load(f'features/meta_minirocket_train_fold{k}.npz', allow_pickle=True)

    y_train = meta_train['y']
    subjects_train = meta_train['subjects']

    print(f"Training set: {X_train_raw.shape[0]} samples, {X_train_raw.shape[1]} features")

    # Low-variance filtering (train-only fit, no leakage)
    vt = VarianceThreshold(threshold=1e-6)
    X_train = vt.fit_transform(X_train_raw)
    n_features_removed = X_train_raw.shape[1] - X_train.shape[1]
    print(f"Low-variance filter: removed {n_features_removed} features, kept {X_train.shape[1]}")

    # Guardrail: automatically adjust number of CV folds
    n_unique_subjects = len(np.unique(subjects_train))
    actual_cv_folds = min(CV_FOLDS, n_unique_subjects)
    print(f"Number of subjects: {n_unique_subjects}")
    if actual_cv_folds < CV_FOLDS:
        print(f"⚠️  Insufficient subjects, CV folds adjusted: {CV_FOLDS} → {actual_cv_folds}")

    print(f"Class distribution: {dict(zip(*np.unique(y_train, return_counts=True)))}\n")

    # Precompute splits and weights (shared across all alphas)
    print(f"Stage A: Coarse search (9 points × 3-fold)...")

    # Coarse: 3-fold
    splits_coarse = list(GroupKFold(n_splits=3).split(X_train, y_train, groups=subjects_train))
    w_coarse_list = [compute_sample_weight('balanced', y_train[tr]) for tr, _ in splits_coarse]

    # Fine: 5-fold
    splits_fine = list(GroupKFold(n_splits=actual_cv_folds).split(X_train, y_train, groups=subjects_train))
    w_fine_list = [compute_sample_weight('balanced', y_train[tr]) for tr, _ in splits_fine]

    # Function to compute CV score only (no OOF)
    def cv_score_only(alpha, splits, w_list):
        with threadpool_limits(limits=1):
            scores = []
            for (tr, va), w_tr in zip(splits, w_list):
                clf = RidgeClassifier(alpha=alpha, solver="lsqr", fit_intercept=True)
                clf.fit(X_train[tr], y_train[tr], sample_weight=w_tr)
                scores.append(f1_score(y_train[va], clf.predict(X_train[va]),
                                      average='macro', zero_division=0))
            return float(np.mean(scores))

    # Coarse search: 9 alpha points
    ALPHAS_COARSE = np.logspace(-6, 6, 9)
    n_jobs = min(len(ALPHAS_COARSE), max(1, (os.cpu_count() or 2) - 1))

    scores_coarse = Parallel(n_jobs=n_jobs, prefer="threads", pre_dispatch="2*n_jobs")(
        delayed(cv_score_only)(alpha, splits_coarse, w_coarse_list)
        for alpha in ALPHAS_COARSE
    )

    best_coarse_idx = int(np.argmax(scores_coarse))
    best_coarse_alpha = ALPHAS_COARSE[best_coarse_idx]
    best_coarse_score = scores_coarse[best_coarse_idx]

    print(f"  Coarse best: alpha={best_coarse_alpha:.6e}, CV macro F1={best_coarse_score:.4f}")

    # Fine search: ±1 decade around the best alpha
    print(f"\nStage B: Fine search (9 points × {actual_cv_folds}-fold)...")
    log_alpha = float(np.log10(best_coarse_alpha))
    ALPHAS_FINE = np.logspace(log_alpha - 1, log_alpha + 1, 9)

    scores_fine = Parallel(n_jobs=n_jobs, prefer="threads", pre_dispatch="2*n_jobs")(
        delayed(cv_score_only)(alpha, splits_fine, w_fine_list)
        for alpha in ALPHAS_FINE
    )

    best_fine_idx = int(np.argmax(scores_fine))
    best_alpha = float(ALPHAS_FINE[best_fine_idx])
    best_score = scores_fine[best_fine_idx]

    print(f"  Fine best: alpha={best_alpha:.6e}, CV macro F1={best_score:.4f}")

    # Save alpha curve (coarse + fine combined)
    alpha_grid = list(ALPHAS_COARSE) + list(ALPHAS_FINE)
    score_grid = scores_coarse + scores_fine
    pd.DataFrame({"alpha": alpha_grid, "cv_macro_f1": score_grid}).to_csv(
        f"logs/ridge_cv_fold{k}.csv", index=False
    )
    print(f"✓ Alpha curve saved: logs/ridge_cv_fold{k}.csv")

    # Generate OOF predictions only for the best alpha
    print(f"\nGenerating OOF predictions for the best alpha...")
    def oof_for_best_alpha(alpha, splits, w_list):
        with threadpool_limits(limits=1):
            y_oof = np.empty_like(y_train)
            for (tr, va), w_tr in zip(splits, w_list):
                clf = RidgeClassifier(alpha=alpha, solver="lsqr", fit_intercept=True)
                clf.fit(X_train[tr], y_train[tr], sample_weight=w_tr)
                y_oof[va] = clf.predict(X_train[va])
            return y_oof

    y_oof_pred = oof_for_best_alpha(best_alpha, splits_fine, w_fine_list)

    # OOF validation metrics
    per_class_f1_oof = f1_score(y_train, y_oof_pred, labels=label_order,
                                 average=None, zero_division=0)
    macro_f1_oof = f1_score(y_train, y_oof_pred, average='macro', zero_division=0)

    print(f"\nTraining OOF validation (out-of-fold, not optimistic):")
    print(f"  Macro F1: {macro_f1_oof:.4f}")
    print(f"  Per-class F1:")
    for cid, f1v in zip(label_order, per_class_f1_oof):
        n_c = int((y_train == cid).sum())
        print(f"    {id_to_label[cid]:15s} (n={n_c:4d}): {f1v:.4f}")

    # Retrain on the full training set
    print(f"\nRetraining on the full training set...")
    sample_weights = compute_sample_weight('balanced', y_train)
    ridge = RidgeClassifier(alpha=best_alpha, solver="lsqr", fit_intercept=True)
    ridge.fit(X_train, y_train, sample_weight=sample_weights)
    print(f"✓ Training completed")

    # Save classifier and variance filter
    model_data = {'ridge': ridge, 'variance_filter': vt}
    model_file = f'models/ridge_fold{k}.pkl'
    with open(model_file, 'wb') as f:
        pickle.dump(model_data, f, protocol=4)

    model_size_mb = Path(model_file).stat().st_size / (1024 ** 2)
    print(f"\n✓ Model saved: {model_file} ({model_size_mb:.2f} MB)")

    # Test-set inference
    print(f"\nTest-set inference...")
    X_test_raw = np.load(f'features/X_minirocket_test_fold{k}.npy', mmap_mode='r')
    meta_test = np.load(f'features/meta_minirocket_test_fold{k}.npz', allow_pickle=True)
    y_test = meta_test['y']

    # Apply variance filter (transform only)
    X_test = vt.transform(X_test_raw)
    y_test_pred = ridge.predict(X_test)

    # Save predictions
    np.save(f'preds/preds_fold{k}_minirocket.npy', y_test_pred)
    print(f"✓ Test predictions saved: preds/preds_fold{k}_minirocket.npy")
    print(f"  Test set: {len(y_test)} samples")

    # Record summary
    summary = {
        'fold': k,
        'test_subject': test_subj,
        'n_train_samples': int(len(y_train)),
        'n_test_samples': int(len(y_test)),
        'n_features_original': int(X_train_raw.shape[1]),
        'n_features_filtered': int(X_train.shape[1]),
        'n_features_removed': int(n_features_removed),
        'actual_cv_folds': actual_cv_folds,
        'best_alpha': float(best_alpha),
        'best_coarse_alpha': float(best_coarse_alpha),
        'oof_macro_f1': float(macro_f1_oof),
        'per_class_f1_oof': {id_to_label[cid]: float(f1v) for cid, f1v in zip(label_order, per_class_f1_oof)},
        'model_size_mb': float(model_size_mb)
    }
    all_summaries.append(summary)

# Save summary
summary_df = pd.DataFrame([{
    'fold': s['fold'],
    'test_subject': s['test_subject'],
    'n_train_samples': s['n_train_samples'],
    'n_test_samples': s['n_test_samples'],
    'n_features_filtered': s['n_features_filtered'],
    'actual_cv_folds': s['actual_cv_folds'],
    'best_alpha': s['best_alpha'],
    'oof_macro_f1': s['oof_macro_f1'],
    'model_size_mb': s['model_size_mb']
} for s in all_summaries])

summary_df.to_csv('logs/ridge_summary.csv', index=False)

with open('logs/ridge_summary.json', 'w') as f:
    json.dump({
        'ridge_config': {
            'two_stage_search': {
                'coarse': '9 points × 3-fold',
                'fine': '9 points × 5-fold',
                'total_evaluations': '≈18 evaluations (vs. original 65)'
            },
            'sample_weight': 'balanced (pre-cached)',
            'solver': 'lsqr',
            'variance_threshold': 1e-6,
            'parallel': {
                'prefer': 'threads',
                'blas_threads': 1,
                'pre_dispatch': '2*n_jobs'
            }
        },
        'n_folds': len(all_summaries),
        'per_fold_stats': all_summaries,
        'aggregated_stats': {
            'avg_oof_macro_f1': float(summary_df['oof_macro_f1'].mean()),
            'std_oof_macro_f1': float(summary_df['oof_macro_f1'].std()),
            'avg_best_alpha': float(summary_df['best_alpha'].mean()),
            'avg_features_filtered': float(summary_df['n_features_filtered'].mean()),
            'total_model_size_mb': float(summary_df['model_size_mb'].sum())
        }
    }, f, indent=2)

print(f"\n{'='*60}")
print(f"Ridge classifier training completed")
print(f"{'='*60}")
print(f"\nSummary:")
print(f"  Mean OOF Macro F1: {summary_df['oof_macro_f1'].mean():.4f} ± {summary_df['oof_macro_f1'].std():.4f}")
print(f"  Mean best alpha: {summary_df['best_alpha'].mean():.6e}")
print(f"  Mean number of features (post-filter): {summary_df['n_features_filtered'].mean():.0f}")
print(f"  Total model size: {summary_df['model_size_mb'].sum():.2f} MB")
print(f"\nUltimate optimization notes:")
print(f"  1. Two-stage search: coarse→fine, evaluations 65→18 (~3.6×)")
print(f"  2. Thread-based parallelism: shared memory, avoids process IPC overhead (~1.5–2×)")
print(f"  3. Pre-caching: splits + weights precomputed (~1.3×)")
print(f"  4. Separated computation: compute scores first, then OOF only for best α (~1.2×)")
print(f"  5. Low-variance filtering: reduces feature dimensionality (~1.3–2×)")
print(f"  6. BLAS limits: avoid oversubscription (stability)")
print(f"  Overall speedup: 5–15× (depends on CPU cores and data scale)")
print(f"\n✓ Summary CSV: logs/ridge_summary.csv")
print(f"✓ Summary JSON: logs/ridge_summary.json")
print(f"✓ Alpha curves: logs/ridge_cv_fold*.csv")
print(f"✓ Test predictions: preds/preds_fold*_minirocket.npy")
print(f"\n{'='*60}\nStep 11 completed\n{'='*60}")



Step 11: MiniROCKET + Ridge Classifier (Ultimate Optimized Edition)
Folds to run: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

Ridge hyperparameters (ultimate optimized edition):
  Two-stage search: coarse (9 points × 3-fold) → fine (9 points × 5-fold)
  Sample weights: balanced (pre-cached)
  solver: lsqr
  Parallelism: thread-based + single-thread BLAS + pre_dispatch control
  Low-variance filter: threshold=1e-6 (train-only fit)


Fold 0: test subject=proband1
Training set: 34727 samples, 9996 features
Low-variance filter: removed 0 features, kept 9996
Number of subjects: 14
Class distribution: {0: 5222, 1: 5851, 2: 5259, 3: 5192, 4: 5343, 5: 3946, 6: 3122, 7: 792}

Stage A: Coarse search (9 points × 3-fold)...
  Coarse best: alpha=1.000000e+03, CV macro F1=0.8292

Stage B: Fine search (9 points × 5-fold)...
  Fine best: alpha=1.778279e+02, CV macro F1=0.8330
✓ Alpha curve saved: logs/ridge_cv_fold0.csv

Generating OOF predictions for the best alpha...

Training OOF validati

In [None]:
# ================ Step 13: TST Preparation ================
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import json
from sklearn.model_selection import GroupKFold

print("\n\nStep 13: TST Preparation")
print("=" * 60)

# Load configuration
with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

with open('/content/logs/active_folds.json', 'r') as f:
    active_folds = json.load(f)['folds']

features_dir = Path('/content/features')
interim_dir = Path('/content/interim')
interim_dir.mkdir(exist_ok=True)

# TST parameters
N_CHANNELS = 6
SEQ_LEN = 150
PATCH_LEN = 25
BATCH_SIZE = 64
NUM_WORKERS = 4
N_VAL_SPLITS = 5

CHANNELS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']

print(f"Tensor shape: (C={N_CHANNELS}, L={SEQ_LEN})")
print(f"Patch length: {PATCH_LEN}")
print(f"Batch size: {BATCH_SIZE}, Workers: {NUM_WORKERS}")
print(f"Number of validation splits: {N_VAL_SPLITS}\n")

class TSTDataset(Dataset):
    def __init__(self, data, labels, subjects=None):
        self.data = torch.from_numpy(data).float()
        self.labels = torch.from_numpy(labels).long()
        self.subjects = subjects

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def prepare_fold(fold_id):
    print(f"\n{'='*60}")
    print(f"Fold {fold_id}")
    print(f"{'='*60}")

    # Load standardized data
    norm_file = features_dir / f'windows_normalized_fold{fold_id}.npz'
    data = np.load(norm_file)

    # Extract data
    splits = data['splits']
    labels = data['labels']
    subjects = data['subjects']

    train_mask = (splits == 'train')
    test_mask = (splits == 'test')

    # Assemble tensor (N, C, L)
    sensor_data = np.stack([data[ch] for ch in CHANNELS], axis=1)

    X_train_full = sensor_data[train_mask]
    y_train_full = labels[train_mask]
    subjects_train = subjects[train_mask]

    X_test = sensor_data[test_mask]
    y_test = labels[test_mask]
    subjects_test = subjects[test_mask]

    print(f"Train set: {X_train_full.shape}, Test set: {X_test.shape}")
    print(f"Training subjects: {np.unique(subjects_train).tolist()}")
    print(f"Test subjects: {np.unique(subjects_test).tolist()}")

    # Partition validation folds within the training set using GroupKFold
    gkf = GroupKFold(n_splits=N_VAL_SPLITS)
    val_splits = list(gkf.split(X_train_full, y_train_full, groups=subjects_train))

    print(f"\nValidation splits (GroupKFold={N_VAL_SPLITS}):")
    for val_idx, (train_idx, val_idx_inner) in enumerate(val_splits):
        val_subjects = np.unique(subjects_train[val_idx_inner])
        print(f"  Val Split {val_idx}: Train={len(train_idx)}, Val={len(val_idx_inner)}, Val subjects={val_subjects.tolist()}")

    # Create DataLoaders (using the 0-th validation split as an example)
    train_idx, val_idx = val_splits[0]

    X_train = X_train_full[train_idx]
    y_train = y_train_full[train_idx]
    X_val = X_train_full[val_idx]
    y_val = y_train_full[val_idx]

    print(f"\nUsing validation split 0:")
    print(f"  Train: {X_train.shape}")
    print(f"  Validation: {X_val.shape}")
    print(f"  Test: {X_test.shape}")

    train_dataset = TSTDataset(X_train, y_train)
    val_dataset = TSTDataset(X_val, y_val)
    test_dataset = TSTDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                             num_workers=NUM_WORKERS, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                           num_workers=NUM_WORKERS, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=NUM_WORKERS, pin_memory=True)

    # Tensor sanity check
    print(f"\nTensor sanity check:")
    sample_batch = next(iter(train_loader))
    print(f"  Batch shape: {sample_batch[0].shape}")
    print(f"  Label shape: {sample_batch[1].shape}")
    print(f"  Dtype: {sample_batch[0].dtype}")
    print(f"  Label range: [{sample_batch[1].min()}, {sample_batch[1].max()}]")

    # Save tensor data
    tensors = {
        'fold': fold_id,
        'X_train_full': torch.from_numpy(X_train_full).float(),
        'y_train_full': torch.from_numpy(y_train_full).long(),
        'subjects_train': subjects_train,
        'X_test': torch.from_numpy(X_test).float(),
        'y_test': torch.from_numpy(y_test).long(),
        'subjects_test': subjects_test,
        'val_splits_indices': val_splits,
        'shape': {
            'n_channels': N_CHANNELS,
            'seq_len': SEQ_LEN,
            'patch_len': PATCH_LEN
        },
        'config': {
            'batch_size': BATCH_SIZE,
            'num_workers': NUM_WORKERS,
            'n_val_splits': N_VAL_SPLITS
        }
    }

    save_path = interim_dir / f'tensors_fold{fold_id}.pt'
    torch.save(tensors, save_path)
    print(f"\n✓ Saved: {save_path}")

    return {
        'fold': fold_id,
        'train_full': len(X_train_full),
        'test': len(X_test),
        'train_subjects': np.unique(subjects_train).tolist(),
        'test_subjects': np.unique(subjects_test).tolist(),
        'n_val_splits': N_VAL_SPLITS
    }

# Process all active folds
fold_stats = []
for fold_id in active_folds:
    stats = prepare_fold(fold_id)
    fold_stats.append(stats)

# Save summary
summary = {
    'tensor_shape': f'(C={N_CHANNELS}, L={SEQ_LEN})',
    'patch_len': PATCH_LEN,
    'dataloader_config': {
        'batch_size': BATCH_SIZE,
        'num_workers': NUM_WORKERS,
        'shuffle_train': True,
        'pin_memory': True
    },
    'validation': {
        'method': 'GroupKFold',
        'n_splits': N_VAL_SPLITS,
        'groupby': 'subject'
    },
    'split_order': 'LOSO outer -> GroupKFold inner',
    'dtype': 'torch.float32',
    'folds': fold_stats
}

with open('/content/logs/step13_tst_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed TST preparation for {len(active_folds)} folds")
print(f"✓ Tensors: interim/tensors_fold{{k}}.pt")
print(f"✓ Summary: logs/step13_tst_summary.json")
print(f"{'='*60}\n")

get_ipython().system('git add interim/tensors_fold*.pt logs/step13_tst_summary.json')
get_ipython().system('git commit -m "tst: prepare tensors with GroupKFold validation"')

print(f"Step 13 completed\n{'='*60}")



Step 13: TST Preparation
Tensor shape: (C=6, L=150)
Patch length: 25
Batch size: 64, Workers: 4
Number of validation splits: 5


Fold 0
Train set: (34727, 6, 150), Test set: (1895, 6, 150)
Training subjects: ['proband10', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']
Test subjects: ['proband1']

Validation splits (GroupKFold=5):
  Val Split 0: Train=27593, Val=7134, Val subjects=['proband12', 'proband14', 'proband5']
  Val Split 1: Train=29370, Val=5357, Val subjects=['proband10', 'proband8']
  Val Split 2: Train=27244, Val=7483, Val subjects=['proband3', 'proband4', 'proband6']
  Val Split 3: Train=27469, Val=7258, Val subjects=['proband11', 'proband7', 'proband9']
  Val Split 4: Train=27232, Val=7495, Val subjects=['proband13', 'proband15', 'proband2']

Using validation split 0:
  Train: (27593, 6, 150)
  Validation: (7134, 6, 150)
  Test: (1895, 6, 150)

Tensor sanity

In [None]:
# ================ Step 14: TST Training ================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.cuda.amp import autocast, GradScaler
from pathlib import Path
import json
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, classification_report
import random
import warnings
warnings.filterwarnings('ignore')

print("\n\nStep 14: TST Training")
print("=" * 60)

# Set random seeds
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True, warn_only=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
AMP_ENABLED = (device.type == 'cuda')
amp_dtype = torch.bfloat16 if (AMP_ENABLED and torch.cuda.is_bf16_supported()) else torch.float16
print(f"Device: {device}, AMP: {AMP_ENABLED}, dtype: {amp_dtype}")

# Load configuration
with open('/content/configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

with open('/content/logs/active_folds.json', 'r') as f:
    active_folds = json.load(f)['folds']

interim_dir = Path('/content/interim')
models_dir = Path('/content/models')
figures_dir = Path('/content/figures')
models_dir.mkdir(exist_ok=True)
figures_dir.mkdir(exist_ok=True)

NUM_CLASSES = classes_cfg['num_classes']
INCLUDED_CLASSES = [c for c, flag in classes_cfg['statistics']['included_flags'].items() if flag]
NUM_INCLUDED = len(INCLUDED_CLASSES)

# TST hyperparameters
D_MODEL = 64
N_HEADS = 4
DEPTH = 4
DROPOUT = 0.1
LR = 1e-3
WEIGHT_DECAY = 1e-4
GRAD_CLIP = 1.0
BATCH_SIZE = 64
NUM_WORKERS = 4
MAX_EPOCHS = 100
PATIENCE = 10

print(f"\nHyperparameters:")
print(f"  d_model={D_MODEL}, n_heads={N_HEADS}, depth={DEPTH}, dropout={DROPOUT}")
print(f"  lr={LR}, weight_decay={WEIGHT_DECAY}, grad_clip={GRAD_CLIP}")
print(f"  patience={PATIENCE}, max_epochs={MAX_EPOCHS}")
print(f"  Number of classes: {NUM_CLASSES} (included: {NUM_INCLUDED})\n")

class PatchEmbedding(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, d_model):
        super().__init__()
        self.patch_len = patch_len
        self.n_patches = seq_len // patch_len
        self.proj = nn.Linear(n_channels * patch_len, d_model)

    def forward(self, x):
        B, C, L = x.shape
        x = x.unfold(2, self.patch_len, self.patch_len)
        x = x.permute(0, 2, 1, 3).contiguous()
        x = x.view(B, self.n_patches, -1)
        x = self.proj(x)
        return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class TST(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, num_classes, d_model, n_heads, depth, dropout):
        super().__init__()
        self.patch_embedding = PatchEmbedding(n_channels, seq_len, patch_len, d_model)
        self.pos_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.patch_embedding(x)
        x = self.pos_encoding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

class TSTDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class EarlyStopping:
    def __init__(self, patience, mode='max'):
        self.patience = patience
        self.mode = mode
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0

    def __call__(self, score, epoch):
        if self.best_score is None:
            self.best_score = score
            self.best_epoch = epoch
            return True

        if self.mode == 'max':
            improved = score > self.best_score
        else:
            improved = score < self.best_score

        if improved:
            self.best_score = score
            self.best_epoch = epoch
            self.counter = 0
            return True
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
            return False

def train_epoch(model, loader, criterion, optimizer, scaler, device, amp_enabled, amp_dtype):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for X, y in loader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        with autocast(enabled=amp_enabled, dtype=amp_dtype):
            outputs = model(X)
            loss = criterion(outputs, y)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item() * len(X)
        preds = outputs.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(y.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    return avg_loss, acc, f1

def eval_epoch(model, loader, criterion, device, amp_enabled, amp_dtype):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            with autocast(enabled=amp_enabled, dtype=amp_dtype):
                outputs = model(X)
                loss = criterion(outputs, y)

            total_loss += loss.item() * len(X)
            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(y.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    return avg_loss, acc, f1, all_preds, all_labels

def train_fold(fold_id):
    print(f"\n{'='*60}")
    print(f"Fold {fold_id}")
    print(f"{'='*60}")

    # Load tensors
    tensors = torch.load(interim_dir / f'tensors_fold{fold_id}.pt', weights_only=False)
    X_train_full = tensors['X_train_full']
    y_train_full = tensors['y_train_full']
    X_test = tensors['X_test']
    y_test = tensors['y_test']
    val_splits = tensors['val_splits_indices']

    n_channels = tensors['shape']['n_channels']
    seq_len = tensors['shape']['seq_len']
    patch_len = tensors['shape']['patch_len']

    print(f"Data: Train={len(X_train_full)}, Test={len(X_test)}")
    print(f"Shapes: C={n_channels}, L={seq_len}, Patch={patch_len}")

    # Phase 1: Select best epoch via validation early stopping
    print(f"\n--- Phase 1: Validation-based early stopping ---")
    train_idx, val_idx = val_splits[0]
    X_train = X_train_full[train_idx]
    y_train = y_train_full[train_idx]
    X_val = X_train_full[val_idx]
    y_val = y_train_full[val_idx]

    train_dataset = TSTDataset(X_train, y_train)
    val_dataset = TSTDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                             num_workers=NUM_WORKERS, pin_memory=True,
                             persistent_workers=(NUM_WORKERS > 0))
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                           num_workers=NUM_WORKERS, pin_memory=True,
                           persistent_workers=(NUM_WORKERS > 0))

    model = TST(n_channels, seq_len, patch_len, NUM_CLASSES, D_MODEL, N_HEADS, DEPTH, DROPOUT).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)
    scaler = GradScaler(enabled=(AMP_ENABLED and amp_dtype == torch.float16))
    early_stopping = EarlyStopping(patience=PATIENCE, mode='max')

    history = {'train_loss': [], 'train_acc': [], 'train_f1': [],
               'val_loss': [], 'val_acc': [], 'val_f1': []}

    print(f"Train: {len(X_train)}, Val: {len(X_val)}")

    for epoch in range(MAX_EPOCHS):
        train_loss, train_acc, train_f1 = train_epoch(model, train_loader, criterion, optimizer, scaler, device, AMP_ENABLED, amp_dtype)
        val_loss, val_acc, val_f1, _, _ = eval_epoch(model, val_loader, criterion, device, AMP_ENABLED, amp_dtype)

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['train_f1'].append(train_f1)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)

        scheduler.step(val_f1)

        improved = early_stopping(val_f1, epoch)

        if epoch % 5 == 0 or improved:
            print(f"Epoch {epoch:3d}: Train Loss={train_loss:.4f}, F1={train_f1:.4f} | "
                  f"Val Loss={val_loss:.4f}, F1={val_f1:.4f}")

        if early_stopping.early_stop:
            print(f"Early stopping at epoch {epoch}")
            break

    best_epoch = early_stopping.best_epoch
    best_val_f1 = early_stopping.best_score
    print(f"\nBest epoch: {best_epoch}, Best val_f1: {best_val_f1:.4f}")

    # Phase 2: Retrain on the full training set up to the best epoch
    print(f"\n--- Phase 2: Retrain on full training set (target epoch={best_epoch}) ---")
    train_full_dataset = TSTDataset(X_train_full, y_train_full)
    test_dataset = TSTDataset(X_test, y_test)
    train_full_loader = DataLoader(train_full_dataset, batch_size=BATCH_SIZE, shuffle=True,
                                   num_workers=NUM_WORKERS, pin_memory=True,
                                   persistent_workers=(NUM_WORKERS > 0))
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=NUM_WORKERS, pin_memory=True,
                            persistent_workers=(NUM_WORKERS > 0))

    model_final = TST(n_channels, seq_len, patch_len, NUM_CLASSES, D_MODEL, N_HEADS, DEPTH, DROPOUT).to(device)
    optimizer_final = Adam(model_final.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scaler_final = GradScaler(enabled=(AMP_ENABLED and amp_dtype == torch.float16))

    final_history = {'train_loss': [], 'train_acc': [], 'train_f1': []}

    for epoch in range(best_epoch + 1):
        train_loss, train_acc, train_f1 = train_epoch(model_final, train_full_loader, criterion,
                                                       optimizer_final, scaler_final, device, AMP_ENABLED, amp_dtype)
        final_history['train_loss'].append(train_loss)
        final_history['train_acc'].append(train_acc)
        final_history['train_f1'].append(train_f1)

        if epoch % 10 == 0 or epoch == best_epoch:
            print(f"Epoch {epoch:3d}: Train Loss={train_loss:.4f}, Acc={train_acc:.4f}, F1={train_f1:.4f}")

    # Test-set evaluation
    test_loss, test_acc, test_f1, test_preds, test_labels = eval_epoch(model_final, test_loader, criterion, device, AMP_ENABLED, amp_dtype)
    print(f"\nTest set: Loss={test_loss:.4f}, Acc={test_acc:.4f}, F1={test_f1:.4f}")

    # Save model
    model_path = models_dir / f'tst_fold{fold_id}.pt'
    torch.save({
        'model_state_dict': model_final.state_dict(),
        'model_config': {
            'n_channels': n_channels,
            'seq_len': seq_len,
            'patch_len': patch_len,
            'num_classes': NUM_CLASSES,
            'd_model': D_MODEL,
            'n_heads': N_HEADS,
            'depth': DEPTH,
            'dropout': DROPOUT
        },
        'fold': fold_id,
        'best_epoch': best_epoch,
        'best_val_f1': best_val_f1,
        'test_metrics': {
            'loss': test_loss,
            'accuracy': test_acc,
            'macro_f1': test_f1
        }
    }, model_path)
    print(f"✓ Model saved: {model_path}")

    # Plot training curves
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    # Validation-phase curves
    ax = axes[0]
    ax.plot(history['train_loss'], label='Train')
    ax.plot(history['val_loss'], label='Val')
    ax.axvline(best_epoch, color='red', linestyle='--', label=f'Best Epoch={best_epoch}')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.set_title(f'Fold {fold_id} - Loss (Validation Phase)')
    ax.legend()
    ax.grid(alpha=0.3)

    ax = axes[1]
    ax.plot(history['train_f1'], label='Train')
    ax.plot(history['val_f1'], label='Val')
    ax.axvline(best_epoch, color='red', linestyle='--', label=f'Best Epoch={best_epoch}')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Macro F1')
    ax.set_title(f'Fold {fold_id} - F1 (Validation Phase)')
    ax.legend()
    ax.grid(alpha=0.3)

    # Retraining-phase curves
    ax = axes[2]
    ax.plot(final_history['train_loss'], label='Train Loss')
    ax.plot(final_history['train_f1'], label='Train F1')
    ax.axvline(best_epoch, color='red', linestyle='--', label=f'Stop Epoch={best_epoch}')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Metric')
    ax.set_title(f'Fold {fold_id} - Retrain on Full Train Set')
    ax.legend()
    ax.grid(alpha=0.3)

    plt.tight_layout()
    plt.savefig(f'/content/figures/step14_tst_fold{fold_id}.png', dpi=150)
    plt.close()

    # Save report
    report = {
        'fold': fold_id,
        'phase1_validation': {
            'best_epoch': best_epoch,
            'best_val_f1': best_val_f1,
            'train_size': len(X_train),
            'val_size': len(X_val),
            'history': {k: [float(v) for v in vals] for k, vals in history.items()}
        },
        'phase2_retrain': {
            'target_epoch': best_epoch,
            'train_full_size': len(X_train_full),
            'final_train_f1': float(final_history['train_f1'][-1]),
            'history': {k: [float(v) for v in vals] for k, vals in final_history.items()}
        },
        'test_results': {
            'test_size': len(X_test),
            'loss': float(test_loss),
            'accuracy': float(test_acc),
            'macro_f1': float(test_f1)
        },
        'hyperparameters': {
            'd_model': D_MODEL,
            'n_heads': N_HEADS,
            'depth': DEPTH,
            'dropout': DROPOUT,
            'lr': LR,
            'weight_decay': WEIGHT_DECAY,
            'grad_clip': GRAD_CLIP,
            'batch_size': BATCH_SIZE,
            'patience': PATIENCE,
            'max_epochs': MAX_EPOCHS
        },
        'consistency_check': {
            'val_stopped_at': best_epoch,
            'retrain_stopped_at': best_epoch,
            'consistent': True
        }
    }

    with open(f'/content/logs/step14_tst_fold{fold_id}.json', 'w') as f:
        json.dump(report, f, indent=2)

    return report

# Train all active folds
all_reports = []
for fold_id in active_folds:
    report = train_fold(fold_id)
    all_reports.append(report)

# Aggregate summary
summary = {
    'method': 'TST',
    'training_procedure': 'Two-phase training: (1) Within the training set, use validation-based early stopping to choose the best epoch; (2) Retrain on the full training set up to the best epoch',
    'amp_enabled': AMP_ENABLED,
    'amp_dtype': str(amp_dtype),
    'device': str(device),
    'deterministic': True,
    'random_seed': 42,
    'folds': all_reports,
    'average_test_metrics': {
        'accuracy': np.mean([r['test_results']['accuracy'] for r in all_reports]),
        'macro_f1': np.mean([r['test_results']['macro_f1'] for r in all_reports])
    }
}

with open('/content/logs/step14_tst_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed training for {len(active_folds)} folds")
print(f"✓ Models: models/tst_fold{{k}}.pt")
print(f"✓ Curves: figures/step14_tst_fold{{k}}.png")
print(f"✓ Reports: logs/step14_tst_fold{{k}}.json")
print(f"✓ Summary: logs/step14_tst_summary.json")
print(f"\nAverage test-set performance:")
print(f"  Accuracy: {summary['average_test_metrics']['accuracy']:.4f}")
print(f"  Macro F1: {summary['average_test_metrics']['macro_f1']:.4f}")
print(f"{'='*60}\n")

get_ipython().system('git add models/ figures/step14_*.png logs/step14_*.json')
get_ipython().system('git commit -m "train: TST with two-phase training and early stopping"')

print(f"Step 14 completed\n{'='*60}")



Step 14: TST Training
Device: cuda, AMP: True, dtype: torch.bfloat16

Hyperparameters:
  d_model=64, n_heads=4, depth=4, dropout=0.1
  lr=0.001, weight_decay=0.0001, grad_clip=1.0
  patience=10, max_epochs=100
  Number of classes: 8 (included: 8)


Fold 0
Data: Train=34727, Test=1895
Shapes: C=6, L=150, Patch=25

--- Phase 1: Validation-based early stopping ---
Train: 27593, Val: 7134
Epoch   0: Train Loss=0.9607, F1=0.6118 | Val Loss=0.8718, F1=0.6166
Epoch   1: Train Loss=0.7052, F1=0.7445 | Val Loss=0.8468, F1=0.6422
Epoch   2: Train Loss=0.5977, F1=0.8023 | Val Loss=0.6308, F1=0.8022
Epoch   3: Train Loss=0.4997, F1=0.8464 | Val Loss=0.6202, F1=0.8041
Epoch   5: Train Loss=0.3934, F1=0.8806 | Val Loss=0.5711, F1=0.8059
Epoch   6: Train Loss=0.3583, F1=0.8924 | Val Loss=0.5949, F1=0.8076
Epoch  10: Train Loss=0.2634, F1=0.9200 | Val Loss=0.5906, F1=0.8135
Epoch  15: Train Loss=0.1998, F1=0.9401 | Val Loss=0.6392, F1=0.8117
Epoch  20: Train Loss=0.1166, F1=0.9682 | Val Loss=0.8307,

In [None]:
# ================ Step 15: Inference & Prediction (Revised) ================
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")

import numpy as np
import torch
import torch.nn as nn
from pathlib import Path
import json
import pickle
import time
from datetime import datetime
import subprocess
import random
from threadpoolctl import threadpool_limits

print("\n\nStep 15: Inference & Prediction (Revised)")
print("=" * 60)

# Fix random seeds
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Create directories
Path('preds').mkdir(parents=True, exist_ok=True)
Path('logs').mkdir(parents=True, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_active_folds(path="logs/active_folds.json"):
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["folds"]
    return []

with open('configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

with open('configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

active_folds = get_active_folds()
N_REPEATS = 50
NUM_CLASSES = classes_cfg['num_classes']

print(f"Inference settings: batch=1 (online scenario), repetitions={N_REPEATS}")
print(f"Latency statistics: processing the entire test set sample-by-sample counts as one run; repeat N times and report p50/p90")
print(f"Device: {device}, single-threaded: BLAS=1, fixed seed=42\n")

git_hash = subprocess.getoutput("git rev-parse HEAD")[:8]

# TST model definition
class PatchEmbedding(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, d_model):
        super().__init__()
        self.patch_len = patch_len
        self.n_patches = seq_len // patch_len
        self.proj = nn.Linear(n_channels * patch_len, d_model)

    def forward(self, x):
        B, C, L = x.shape
        x = x.unfold(2, self.patch_len, self.patch_len)
        x = x.permute(0, 2, 1, 3).contiguous()
        x = x.view(B, self.n_patches, -1)
        return self.proj(x)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class TST(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, num_classes, d_model, n_heads, depth, dropout):
        super().__init__()
        self.patch_embedding = PatchEmbedding(n_channels, seq_len, patch_len, d_model)
        self.pos_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.patch_embedding(x)
        x = self.pos_encoding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.dropout(x)
        return self.fc(x)

all_summaries = []

for fold in splits_cfg['folds']:
    k = fold['fold']

    if k not in active_folds:
        print(f"⏭️  Skipping Fold {k}")
        continue

    test_subj = fold['test_subject']

    print(f"\n{'='*60}")
    print(f"Fold {k}: test subject={test_subj}")
    print(f"{'='*60}")

    # ============ MiniROCKET + Ridge Inference ============
    print(f"\n--- MiniROCKET + Ridge ---")

    X_test_raw = np.load(f'features/X_minirocket_test_fold{k}.npy', mmap_mode='r')
    meta_test = np.load(f'features/meta_minirocket_test_fold{k}.npz', allow_pickle=True)
    y_test = meta_test['y']
    subjects_test = meta_test['subjects']
    window_ids_test = meta_test['window_ids']

    with open(f'models/ridge_fold{k}.pkl', 'rb') as f:
        model_data = pickle.load(f)

    ridge = model_data['ridge']
    vt = model_data['variance_filter']

    X_test = vt.transform(X_test_raw)
    n_test = len(X_test)

    print(f"Test set: {n_test} samples, {X_test.shape[1]} features")

    # Assert dimensional consistency
    assert X_test.shape[1] == ridge.coef_.shape[1], f"Feature dimension mismatch: {X_test.shape[1]} vs {ridge.coef_.shape[1]}"

    # Latency statistics: treat processing the entire test set as one run
    latencies_minirocket = []
    with threadpool_limits(limits=1, user_api='blas'):
        for _ in range(N_REPEATS):
            start = time.perf_counter()
            for i in range(n_test):
                _ = ridge.predict(X_test[i:i+1])
            total_time = (time.perf_counter() - start) * 1000
            latencies_minirocket.append(total_time)

    # Final prediction
    with threadpool_limits(limits=1, user_api='blas'):
        y_pred_minirocket = ridge.predict(X_test)
        scores_minirocket = ridge.decision_function(X_test)

    # Handle the binary-class case
    if scores_minirocket.ndim == 1:
        scores_minirocket = np.column_stack([-scores_minirocket, scores_minirocket])

    # Save predictions and scores
    np.save(f'preds/preds_fold{k}_minirocket.npy', y_pred_minirocket)
    np.save(f'preds/scores_fold{k}_minirocket.npy', scores_minirocket.astype(np.float32))
    np.savez(f'preds/meta_fold{k}_minirocket.npz',
             y_true=y_test,
             subjects=subjects_test,
             window_ids=window_ids_test,
             indices=np.arange(n_test))

    p50_mr = np.percentile(latencies_minirocket, 50)
    p90_mr = np.percentile(latencies_minirocket, 90)
    per_sample_p50_mr = p50_mr / n_test
    per_sample_p90_mr = p90_mr / n_test

    print(f"✓ Prediction complete: {n_test} samples")
    print(f"  Total latency: p50={p50_mr:.1f}ms, p90={p90_mr:.1f}ms")
    print(f"  Per-sample: p50={per_sample_p50_mr:.3f}ms, p90={per_sample_p90_mr:.3f}ms")
    print(f"  Saved: preds/{{preds,scores,meta}}_fold{k}_minirocket.*")

    # ============ TST Inference ============
    print(f"\n--- TST ---")

    tensors = torch.load(f'interim/tensors_fold{k}.pt', weights_only=False)
    X_test_tst = tensors['X_test']
    y_test_tst = tensors['y_test']
    subjects_test_tst = tensors['subjects_test']
    n_test_tst = len(X_test_tst)

    checkpoint = torch.load(f'models/tst_fold{k}.pt', weights_only=False, map_location=device)
    model_cfg = checkpoint['model_config']

    model = TST(**model_cfg).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    print(f"Test set: {n_test_tst} samples")

    # Assert dimensional consistency
    assert X_test_tst.shape[1] == model_cfg['n_channels'], f"Channel count mismatch: {X_test_tst.shape[1]} vs {model_cfg['n_channels']}"
    assert X_test_tst.shape[2] == model_cfg['seq_len'], f"Sequence length mismatch: {X_test_tst.shape[2]} vs {model_cfg['seq_len']}"

    # Latency statistics: treat processing the entire test set as one run
    latencies_tst = []
    with torch.inference_mode():
        for _ in range(N_REPEATS):
            if device.type == 'cuda':
                torch.cuda.synchronize()
            start = time.perf_counter()

            for i in range(n_test_tst):
                sample = X_test_tst[i:i+1].to(device)
                _ = model(sample)

            if device.type == 'cuda':
                torch.cuda.synchronize()
            total_time = (time.perf_counter() - start) * 1000
            latencies_tst.append(total_time)

    # Final prediction
    all_preds_tst = []
    all_logits_tst = []
    with torch.inference_mode():
        for i in range(n_test_tst):
            sample = X_test_tst[i:i+1].to(device)
            logits = model(sample)
            pred = logits.argmax(dim=1).cpu().numpy()[0]
            all_preds_tst.append(pred)
            all_logits_tst.append(logits.cpu().numpy()[0])

    y_pred_tst = np.array(all_preds_tst)
    logits_tst = np.array(all_logits_tst, dtype=np.float32)
    probs_tst = torch.softmax(torch.from_numpy(logits_tst), dim=1).numpy()

    # Save predictions and scores
    np.save(f'preds/preds_fold{k}_tst.npy', y_pred_tst)
    np.save(f'preds/logits_fold{k}_tst.npy', logits_tst)
    np.save(f'preds/probs_fold{k}_tst.npy', probs_tst)
    np.savez(f'preds/meta_fold{k}_tst.npz',
             y_true=y_test_tst.numpy(),
             subjects=subjects_test_tst,
             indices=np.arange(n_test_tst))

    p50_tst = np.percentile(latencies_tst, 50)
    p90_tst = np.percentile(latencies_tst, 90)
    per_sample_p50_tst = p50_tst / n_test_tst
    per_sample_p90_tst = p90_tst / n_test_tst

    print(f"✓ Prediction complete: {n_test_tst} samples")
    print(f"  Total latency: p50={p50_tst:.1f}ms, p90={p90_tst:.1f}ms")
    print(f"  Per-sample: p50={per_sample_p50_tst:.3f}ms, p90={per_sample_p90_tst:.3f}ms")
    print(f"  Saved: preds/{{preds,logits,probs,meta}}_fold{k}_tst.*")

    # Summary
    summary = {
        'fold': k,
        'test_subject': test_subj,
        'timestamp': datetime.now().isoformat(),
        'git_hash': git_hash,
        'n_test_samples': int(n_test),
        'minirocket': {
            'n_predictions': int(len(y_pred_minirocket)),
            'total_latency_p50_ms': float(p50_mr),
            'total_latency_p90_ms': float(p90_mr),
            'per_sample_p50_ms': float(per_sample_p50_mr),
            'per_sample_p90_ms': float(per_sample_p90_mr),
            'n_repeats': N_REPEATS,
            'batch_size': 1
        },
        'tst': {
            'n_predictions': int(len(y_pred_tst)),
            'total_latency_p50_ms': float(p50_tst),
            'total_latency_p90_ms': float(p90_tst),
            'per_sample_p50_ms': float(per_sample_p50_tst),
            'per_sample_p90_ms': float(per_sample_p90_tst),
            'n_repeats': N_REPEATS,
            'batch_size': 1
        }
    }

    all_summaries.append(summary)

# Save aggregate summary
with open('logs/step15_inference_summary.json', 'w') as f:
    json.dump({
        'procedure': 'Each repetition processes the entire test set sequentially with batch=1; p50/p90 are computed over N repetitions',
        'n_repeats': N_REPEATS,
        'git_hash': git_hash,
        'random_seed': 42,
        'deterministic': True,
        'single_thread': 'BLAS=1',
        'outputs': {
            'predictions': 'preds/preds_fold{k}_{minirocket,tst}.npy',
            'scores': 'preds/scores_fold{k}_minirocket.npy (decision_function)',
            'logits': 'preds/logits_fold{k}_tst.npy',
            'probs': 'preds/probs_fold{k}_tst.npy (softmax)',
            'meta': 'preds/meta_fold{k}_{minirocket,tst}.npz (y_true, subjects, indices)'
        },
        'folds': all_summaries,
        'aggregated': {
            'avg_minirocket_total_p50_ms': float(np.mean([s['minirocket']['total_latency_p50_ms'] for s in all_summaries])),
            'avg_minirocket_total_p90_ms': float(np.mean([s['minirocket']['total_latency_p90_ms'] for s in all_summaries])),
            'avg_minirocket_per_sample_p50_ms': float(np.mean([s['minirocket']['per_sample_p50_ms'] for s in all_summaries])),
            'avg_minirocket_per_sample_p90_ms': float(np.mean([s['minirocket']['per_sample_p90_ms'] for s in all_summaries])),
            'avg_tst_total_p50_ms': float(np.mean([s['tst']['total_latency_p50_ms'] for s in all_summaries])),
            'avg_tst_total_p90_ms': float(np.mean([s['tst']['total_latency_p90_ms'] for s in all_summaries])),
            'avg_tst_per_sample_p50_ms': float(np.mean([s['tst']['per_sample_p50_ms'] for s in all_summaries])),
            'avg_tst_per_sample_p90_ms': float(np.mean([s['tst']['per_sample_p90_ms'] for s in all_summaries]))
        }
    }, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed inference for {len(active_folds)} folds")
print(f"✓ Predictions: preds/preds_fold{{k}}_{{minirocket,tst}}.npy")
print(f"✓ Scores: preds/{{scores,logits,probs}}_fold{{k}}_*.npy")
print(f"✓ Metadata: preds/meta_fold{{k}}_{{minirocket,tst}}.npz")
print(f"✓ Summary: logs/step15_inference_summary.json")
print(f"\nAverage inference latency (per sample):")
print(f"  MiniROCKET: p50={np.mean([s['minirocket']['per_sample_p50_ms'] for s in all_summaries]):.3f}ms, p90={np.mean([s['minirocket']['per_sample_p90_ms'] for s in all_summaries]):.3f}ms")
print(f"  TST: p50={np.mean([s['tst']['per_sample_p50_ms'] for s in all_summaries]):.3f}ms, p90={np.mean([s['tst']['per_sample_p90_ms'] for s in all_summaries]):.3f}ms")
print(f"{'='*60}\n")

get_ipython().system('git add preds/ logs/step15_*.json')
get_ipython().system('git commit -m \"inference: batch=1 with corrected latency statistics and score outputs\"')

print(f"Step 15 completed\n{'='*60}")



Step 15: Inference & Prediction (Revised)
Inference settings: batch=1 (online scenario), repetitions=50
Latency statistics: processing the entire test set sample-by-sample counts as one run; repeat N times and report p50/p90
Device: cuda, single-threaded: BLAS=1, fixed seed=42


Fold 0: test subject=proband1

--- MiniROCKET + Ridge ---
Test set: 1895 samples, 9996 features
✓ Prediction complete: 1895 samples
  Total latency: p50=287.1ms, p90=297.7ms
  Per-sample: p50=0.151ms, p90=0.157ms
  Saved: preds/{preds,scores,meta}_fold0_minirocket.*

--- TST ---
Test set: 1895 samples
✓ Prediction complete: 1895 samples
  Total latency: p50=3044.5ms, p90=3057.2ms
  Per-sample: p50=1.607ms, p90=1.613ms
  Saved: preds/{preds,logits,probs,meta}_fold0_tst.*

Fold 1: test subject=proband10

--- MiniROCKET + Ridge ---
Test set: 2463 samples, 9996 features
✓ Prediction complete: 2463 samples
  Total latency: p50=387.1ms, p90=394.6ms
  Per-sample: p50=0.157ms, p90=0.160ms
  Saved: preds/{preds,scores

In [None]:
# ================ Step 16: Metric Computation ================
import numpy as np
import pandas as pd
from pathlib import Path
import json
from sklearn.metrics import f1_score, classification_report, confusion_matrix

print("\n\nStep 16: Metric Computation")
print("=" * 60)

Path('logs').mkdir(parents=True, exist_ok=True)

def get_active_folds(path="logs/active_folds.json"):
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["folds"]
    return []

with open('configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

with open('configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

active_folds = get_active_folds()

id_to_label = {int(k): v for k, v in classes_cfg['id_to_label'].items()}
label_order = sorted(id_to_label.keys())
label_names = [id_to_label[i] for i in label_order]

print(f"Class order: {label_names}\n")

all_fold_results = []

for fold in splits_cfg['folds']:
    k = fold['fold']

    if k not in active_folds:
        print(f"⏭️  Skipping Fold {k}")
        continue

    test_subj = fold['test_subject']

    print(f"\n{'='*60}")
    print(f"Fold {k}: test subject={test_subj}")
    print(f"{'='*60}")

    for method in ['minirocket', 'tst']:
        print(f"\n--- {method.upper()} ---")

        y_pred = np.load(f'preds/preds_fold{k}_{method}.npy')
        meta = np.load(f'preds/meta_fold{k}_{method}.npz', allow_pickle=True)
        y_true = meta['y_true']

        macro_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
        per_class_f1 = f1_score(y_true, y_pred, labels=label_order, average=None, zero_division=0)

        report = classification_report(y_true, y_pred, labels=label_order,
                                       target_names=label_names,
                                       output_dict=True, zero_division=0)

        cm = confusion_matrix(y_true, y_pred, labels=label_order)

        metrics_list = []
        for i, label_id in enumerate(label_order):
            label_name = id_to_label[label_id]
            metrics_list.append({
                'fold': k,
                'method': method,
                'class_id': label_id,
                'class_name': label_name,
                'f1': per_class_f1[i],
                'precision': report[label_name]['precision'],
                'recall': report[label_name]['recall'],
                'support': int(report[label_name]['support'])
            })

        metrics_list.append({
            'fold': k,
            'method': method,
            'class_id': -1,
            'class_name': 'macro_avg',
            'f1': macro_f1,
            'precision': report['macro avg']['precision'],
            'recall': report['macro avg']['recall'],
            'support': int(report['macro avg']['support'])
        })

        metrics_df = pd.DataFrame(metrics_list)
        metrics_df.to_csv(f'logs/fold{k}_metrics_{method}.csv', index=False)

        cm_df = pd.DataFrame(cm, index=label_names, columns=label_names)
        cm_df.to_csv(f'logs/fold{k}_cm_{method}.csv')

        print(f"Macro F1: {macro_f1:.4f}")
        print(f"Per-class F1:")
        for i, label_id in enumerate(label_order):
            support = int(report[id_to_label[label_id]]['support'])
            print(f"  {id_to_label[label_id]:15s} (n={support:4d}): {per_class_f1[i]:.4f}")

        print(f"✓ Saved: logs/fold{k}_metrics_{method}.csv")
        print(f"✓ Saved: logs/fold{k}_cm_{method}.csv")

        all_fold_results.append({
            'fold': k,
            'test_subject': test_subj,
            'method': method,
            'macro_f1': float(macro_f1),
            'per_class_f1': {id_to_label[label_order[i]]: float(per_class_f1[i]) for i in range(len(label_order))},
            'per_class_support': {id_to_label[label_id]: int(report[id_to_label[label_id]]['support']) for label_id in label_order}
        })

summary_rows = []
for method in ['minirocket', 'tst']:
    method_results = [r for r in all_fold_results if r['method'] == method]

    avg_macro_f1 = np.mean([r['macro_f1'] for r in method_results])
    std_macro_f1 = np.std([r['macro_f1'] for r in method_results])

    avg_per_class = {}
    for label_name in label_names:
        f1_values = [r['per_class_f1'][label_name] for r in method_results]
        avg_per_class[label_name] = np.mean(f1_values)

    summary_rows.append({
        'method': method,
        'macro_f1_mean': avg_macro_f1,
        'macro_f1_std': std_macro_f1,
        **{f'{label}_f1': avg_per_class[label] for label in label_names}
    })

summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv('logs/metrics_summary.csv', index=False)

with open('logs/step16_metrics_summary.json', 'w') as f:
    json.dump({
        'class_order': label_names,
        'n_folds': len([r for r in all_fold_results if r['method'] == 'minirocket']),
        'per_fold_results': all_fold_results,
        'aggregated': {
            method: {
                'macro_f1_mean': float(summary_df[summary_df['method'] == method]['macro_f1_mean'].values[0]),
                'macro_f1_std': float(summary_df[summary_df['method'] == method]['macro_f1_std'].values[0]),
                'per_class_f1_mean': {label: float(summary_df[summary_df['method'] == method][f'{label}_f1'].values[0]) for label in label_names}
            }
            for method in ['minirocket', 'tst']
        }
    }, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed metric computation for {len(active_folds)} folds")
print(f"✓ Per-fold metrics: logs/fold{{k}}_metrics_{{minirocket,tst}}.csv")
print(f"✓ Per-fold confusion matrices: logs/fold{{k}}_cm_{{minirocket,tst}}.csv")
print(f"✓ Summary: logs/metrics_summary.csv")
print(f"✓ JSON: logs/step16_metrics_summary.json")
print(f"\nAggregated results:")
for method in ['minirocket', 'tst']:
    method_data = summary_df[summary_df['method'] == method].iloc[0]
    print(f"  {method.upper()}: Macro F1 = {method_data['macro_f1_mean']:.4f} ± {method_data['macro_f1_std']:.4f}")
print(f"{'='*60}\n")

get_ipython().system('git add logs/fold*_metrics_*.csv logs/fold*_cm_*.csv logs/metrics_summary.csv logs/step16_*.json')
get_ipython().system('git commit -m \"metrics: compute per-fold F1 and confusion matrices\"')

print(f"Step 16 completed\n{'='*60}")



Step 16: Metric Computation
Class order: ['walking', 'running', 'sitting', 'standing', 'lying', 'stairs_up', 'stairs_down', 'jumping']


Fold 0: test subject=proband1

--- MINIROCKET ---
Macro F1: 0.6667
Per-class F1:
  walking         (n= 396): 0.8958
  running         (n= 379): 0.9400
  sitting         (n=   0): 0.0000
  standing        (n= 382): 0.7373
  lying           (n=   0): 0.0000
  stairs_up       (n= 385): 0.9233
  stairs_down     (n= 303): 0.8476
  jumping         (n=  50): 0.9899
✓ Saved: logs/fold0_metrics_minirocket.csv
✓ Saved: logs/fold0_cm_minirocket.csv

--- TST ---
Macro F1: 0.6492
Per-class F1:
  walking         (n= 396): 0.8475
  running         (n= 379): 0.9458
  sitting         (n=   0): 0.0000
  standing        (n= 382): 0.6862
  lying           (n=   0): 0.0000
  stairs_up       (n= 385): 0.9032
  stairs_down     (n= 303): 0.8779
  jumping         (n=  50): 0.9333
✓ Saved: logs/fold0_metrics_tst.csv
✓ Saved: logs/fold0_cm_tst.csv

Fold 1: test subject=proban

In [None]:
# ================ Step 17: Aggregation & Confidence ================
import numpy as np
import pandas as pd
from pathlib import Path
import json
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("\n\nStep 17: Aggregation & Confidence")
print("=" * 60)

Path('figures').mkdir(parents=True, exist_ok=True)

with open('logs/step16_metrics_summary.json', 'r') as f:
    metrics_data = json.load(f)

with open('configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

label_names = metrics_data['class_order']
per_fold_results = metrics_data['per_fold_results']
METHODS = sorted({r['method'] for r in per_fold_results})

def bootstrap_ci(values, subjects, n_bootstrap=10000, ci=0.95):
    unique_subj = list(dict.fromkeys(subjects))
    if len(unique_subj) < 2:
        m = float(np.mean(values))
        return m, m

    rng = np.random.default_rng(42)
    n_subjects = len(unique_subj)

    bootstrap_means = []
    for _ in range(n_bootstrap):
        sampled_subjects = rng.choice(unique_subj, size=n_subjects, replace=True)
        sampled_values = []
        for subj in sampled_subjects:
            subj_values = [v for v, s in zip(values, subjects) if s == subj]
            if subj_values:
                sampled_values.append(np.mean(subj_values))
        bootstrap_means.append(np.mean(sampled_values))

    lower = np.percentile(bootstrap_means, (1 - ci) / 2 * 100)
    upper = np.percentile(bootstrap_means, (1 + ci) / 2 * 100)
    return lower, upper

def macro_f1_present_of(record):
    present = [lab for lab in label_names if record['per_class_support'].get(lab, 0) > 0]
    if not present:
        return np.nan
    return float(np.mean([record['per_class_f1'][lab] for lab in present]))

summary_data = []

for method in METHODS:
    method_results = [r for r in per_fold_results if r['method'] == method]
    subjects = [r['test_subject'] for r in method_results]

    macro_f1_values = [r['macro_f1'] for r in method_results]
    macro_f1_mean = np.mean(macro_f1_values)
    macro_f1_std = np.std(macro_f1_values)
    macro_f1_ci_lower, macro_f1_ci_upper = bootstrap_ci(macro_f1_values, subjects)

    macro_f1_present_values = [macro_f1_present_of(r) for r in method_results]
    vals, subs = zip(*[(v, s) for v, s in zip(macro_f1_present_values, subjects) if not np.isnan(v)])
    macro_f1_present_mean = np.nanmean(macro_f1_present_values)
    macro_f1_present_std = np.nanstd(macro_f1_present_values)
    macro_f1_present_ci_lower, macro_f1_present_ci_upper = bootstrap_ci(list(vals), list(subs))

    per_class_f1_mean = {}
    per_class_f1_std = {}
    per_class_ci = {}

    for label in label_names:
        f1_values = [r['per_class_f1'][label] for r in method_results]
        per_class_f1_mean[label] = np.mean(f1_values)
        per_class_f1_std[label] = np.std(f1_values)
        ci_lower, ci_upper = bootstrap_ci(f1_values, subjects)
        per_class_ci[label] = (ci_lower, ci_upper)

    summary_data.append({
        'method': method,
        'n_folds': len(method_results),
        'n_subjects': len(set(subjects)),
        'macro_f1_mean': macro_f1_mean,
        'macro_f1_std': macro_f1_std,
        'macro_f1_ci_lower': macro_f1_ci_lower,
        'macro_f1_ci_upper': macro_f1_ci_upper,
        'macro_f1_present_mean': macro_f1_present_mean,
        'macro_f1_present_std': macro_f1_present_std,
        'macro_f1_present_ci_lower': macro_f1_present_ci_lower,
        'macro_f1_present_ci_upper': macro_f1_present_ci_upper,
        **{f'{label}_f1_mean': per_class_f1_mean[label] for label in label_names},
        **{f'{label}_f1_std': per_class_f1_std[label] for label in label_names}
    })

    print(f"\n{method.upper()}:")
    print(f"  Macro F1: {macro_f1_mean:.4f} ± {macro_f1_std:.4f}")
    print(f"  Bootstrap 95% CI: [{macro_f1_ci_lower:.4f}, {macro_f1_ci_upper:.4f}]")
    print(f"  Macro F1 (present): {macro_f1_present_mean:.4f} ± {macro_f1_present_std:.4f}")
    print(f"  Bootstrap 95% CI: [{macro_f1_present_ci_lower:.4f}, {macro_f1_present_ci_upper:.4f}]")
    print(f"  Per-class F1 (mean ± std):")
    for label in label_names:
        ci_l, ci_u = per_class_ci[label]
        print(f"    {label:15s}: {per_class_f1_mean[label]:.4f} ± {per_class_f1_std[label]:.4f}  CI:[{ci_l:.4f}, {ci_u:.4f}]")

summary_df = pd.DataFrame(summary_data)
summary_df.to_csv('logs/summary_metrics.csv', index=False)
print(f"\n✓ Saved: logs/summary_metrics.csv")

fig, axes = plt.subplots(1, 2, figsize=(14, 6), subplot_kw=dict(polar=True))

for idx, method in enumerate(METHODS):
    method_results = [r for r in per_fold_results if r['method'] == method]
    per_class_f1 = [np.mean([r['per_class_f1'][lbl] for r in method_results]) for lbl in label_names]

    angles = np.linspace(0, 2*np.pi, len(label_names), endpoint=False)
    values = np.array(per_class_f1)
    angles_plot = np.concatenate([angles, angles[:1]])
    values_plot = np.concatenate([values, values[:1]])

    ax = axes[idx]
    ax.plot(angles_plot, values_plot, linewidth=2, marker='o', label=method.upper())
    ax.fill(angles_plot, values_plot, alpha=0.25)
    ax.set_xticks(angles)
    ax.set_xticklabels(label_names, fontsize=9)
    ax.set_ylim(0, 1)
    ax.set_title(f'{method.upper()} - Per-Class F1', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('figures/step17_radar.svg', format='svg', dpi=150)
plt.close()
print(f"✓ Saved: figures/step17_radar.svg")

fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(label_names))
width = 0.35

for idx, method in enumerate(METHODS):
    method_results = [r for r in per_fold_results if r['method'] == method]
    per_class_f1 = [np.mean([r['per_class_f1'][label] for r in method_results]) for label in label_names]
    per_class_std = [np.std([r['per_class_f1'][label] for r in method_results]) for label in label_names]

    offset = width * (idx - 0.5)
    ax.bar(x + offset, per_class_f1, width, yerr=per_class_std,
           label=method.upper(), alpha=0.8, capsize=5)

ax.set_xlabel('Class', fontsize=12)
ax.set_ylabel('F1 Score', fontsize=12)
ax.set_title('Per-Class F1 Score Comparison', fontsize=14, weight='bold')
ax.set_xticks(x)
ax.set_xticklabels(label_names, rotation=45, ha='right')
ax.set_ylim(0, 1.0)
ax.legend()
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('figures/step17_bar.svg', format='svg', dpi=150)
plt.close()
print(f"✓ Saved: figures/step17_bar.svg")

with open('logs/step17_summary.json', 'w') as f:
    json.dump({
        'bootstrap_config': {
            'n_bootstrap': 10000,
            'confidence_interval': 0.95,
            'level': 'subject',
            'random_seed': 42
        },
        'methods': {
            method: {
                'n_folds': int(summary_df[summary_df['method'] == method]['n_folds'].values[0]),
                'n_subjects': int(summary_df[summary_df['method'] == method]['n_subjects'].values[0]),
                'macro_f1': {
                    'mean': float(summary_df[summary_df['method'] == method]['macro_f1_mean'].values[0]),
                    'std': float(summary_df[summary_df['method'] == method]['macro_f1_std'].values[0]),
                    'ci_lower': float(summary_df[summary_df['method'] == method]['macro_f1_ci_lower'].values[0]),
                    'ci_upper': float(summary_df[summary_df['method'] == method]['macro_f1_ci_upper'].values[0])
                },
                'macro_f1_present': {
                    'mean': float(summary_df[summary_df['method'] == method]['macro_f1_present_mean'].values[0]),
                    'std': float(summary_df[summary_df['method'] == method]['macro_f1_present_std'].values[0]),
                    'ci_lower': float(summary_df[summary_df['method'] == method]['macro_f1_present_ci_lower'].values[0]),
                    'ci_upper': float(summary_df[summary_df['method'] == method]['macro_f1_present_ci_upper'].values[0])
                },
                'per_class_f1': {
                    label: {
                        'mean': float(summary_df[summary_df['method'] == method][f'{label}_f1_mean'].values[0]),
                        'std': float(summary_df[summary_df['method'] == method][f'{label}_f1_std'].values[0])
                    }
                    for label in label_names
                }
            }
            for method in METHODS
        }
    }, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Aggregation completed")
print(f"✓ CSV: logs/summary_metrics.csv")
print(f"✓ JSON: logs/step17_summary.json")
print(f"✓ Radar plot: figures/step17_radar.svg")
print(f"✓ Bar chart: figures/step17_bar.svg")
print(f"{'='*60}\n")

get_ipython().system('git add logs/summary_metrics.csv logs/step17_*.json figures/step17_*.svg')
get_ipython().system('git commit -m "aggregate: compute mean±std and bootstrap CI for metrics"')

print(f"Step 17 completed\n{'='*60}")



Step 17: Aggregation & Confidence

MINIROCKET:
  Macro F1: 0.7879 ± 0.1273
  Bootstrap 95% CI: [0.7223, 0.8513]
  Macro F1 (present): 0.8432 ± 0.0817
  Bootstrap 95% CI: [0.7998, 0.8832]
  Per-class F1 (mean ± std):
    walking        : 0.8046 ± 0.2203  CI:[0.6773, 0.8961]
    running        : 0.9012 ± 0.1291  CI:[0.8259, 0.9568]
    sitting        : 0.7260 ± 0.2542  CI:[0.5871, 0.8387]
    standing       : 0.7867 ± 0.1237  CI:[0.7218, 0.8465]
    lying          : 0.8202 ± 0.2469  CI:[0.6786, 0.9210]
    stairs_up      : 0.6678 ± 0.3440  CI:[0.4849, 0.8254]
    stairs_down    : 0.6129 ± 0.3745  CI:[0.4170, 0.7920]
    jumping        : 0.9841 ± 0.0126  CI:[0.9774, 0.9903]

TST:
  Macro F1: 0.7452 ± 0.1324
  Bootstrap 95% CI: [0.6778, 0.8103]
  Macro F1 (present): 0.7965 ± 0.0920
  Bootstrap 95% CI: [0.7481, 0.8395]
  Per-class F1 (mean ± std):
    walking        : 0.7816 ± 0.2238  CI:[0.6538, 0.8679]
    running        : 0.8490 ± 0.1201  CI:[0.7834, 0.9035]
    sitting        : 0.6252

In [None]:
# ================ Step 18: Significance Testing ================
import numpy as np
import pandas as pd
from pathlib import Path
import json
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from scipy.stats import rankdata
import warnings
warnings.filterwarnings('ignore')

print("\n\nStep 18: Significance Testing")
print("=" * 60)

Path('figures').mkdir(parents=True, exist_ok=True)

with open('logs/step16_metrics_summary.json', 'r') as f:
    metrics_data = json.load(f)

per_fold_results = metrics_data['per_fold_results']

# Extract per-subject Macro-F1 vectors
subjects = sorted(list(set([r['test_subject'] for r in per_fold_results if r['method'] == 'minirocket'])))
models = ['minirocket', 'tst']

print(f"Number of subjects: {len(subjects)}")
print(f"Models: {models}\n")

# Construct subject × model matrix
f1_matrix = np.zeros((len(subjects), len(models)))
for i, subj in enumerate(subjects):
    for j, model in enumerate(models):
        result = [r for r in per_fold_results if r['test_subject'] == subj and r['method'] == model]
        if result:
            f1_matrix[i, j] = result[0]['macro_f1']

print("Subject × Model Macro-F1 matrix:")
df_matrix = pd.DataFrame(f1_matrix, index=subjects, columns=models)
print(df_matrix)
print()

# Paired Wilcoxon signed-rank test (requires at least 6 samples)
if len(subjects) >= 6:
    stat_wilcoxon, p_wilcoxon = wilcoxon(f1_matrix[:, 0], f1_matrix[:, 1])
    print(f"Wilcoxon signed-rank test (minirocket vs tst):")
    print(f"  Statistic = {stat_wilcoxon:.4f}")
    print(f"  p-value = {p_wilcoxon:.4f}")
    print(f"  Significant: {'Yes' if p_wilcoxon < 0.05 else 'No'} (α=0.05)\n")
else:
    stat_wilcoxon, p_wilcoxon = None, None
    print(f"⚠️  Insufficient sample size (n={len(subjects)} < 6), skipping Wilcoxon test\n")

# Cliff's δ effect size
def cliffs_delta(x, y):
    n1, n2 = len(x), len(y)
    delta = 0
    for i in x:
        for j in y:
            if i > j:
                delta += 1
            elif i < j:
                delta -= 1
    return delta / (n1 * n2)

delta = cliffs_delta(f1_matrix[:, 0], f1_matrix[:, 1])
print(f"Cliff's δ (minirocket vs tst):")
print(f"  δ = {delta:.4f}")

if abs(delta) < 0.147:
    magnitude = "negligible"
elif abs(delta) < 0.33:
    magnitude = "small"
elif abs(delta) < 0.474:
    magnitude = "medium"
else:
    magnitude = "large"
print(f"  Effect size: {magnitude}\n")

# Average ranks
avg_ranks = np.mean(rankdata(-f1_matrix, axis=1), axis=0)
print(f"Average ranks (lower is better):")
for model, rank in zip(models, avg_ranks):
    print(f"  {model:15s}: {rank:.2f}")
print()

# Critical Difference plot
fig, ax = plt.subplots(figsize=(8, 2))

lowv = min(avg_ranks) - 0.5
highv = max(avg_ranks) + 0.5
cline = 0.5

for i, (name, rank) in enumerate(zip(models, avg_ranks)):
    ax.plot([rank, rank], [cline - 0.05, cline + 0.05], 'k-', linewidth=2)
    ax.text(rank, cline - 0.25, f'{rank:.2f}', ha='center', va='top', fontsize=10)
    ax.text(rank, cline + 0.25, name, ha='center', va='bottom', fontsize=11, weight='bold')

ax.set_xlim(lowv, highv)
ax.set_ylim(0, 1)
ax.axis('off')
plt.tight_layout()
plt.savefig('figures/step18_cd.svg', format='svg', dpi=150, bbox_inches='tight')
plt.close()
print("✓ Saved: figures/step18_cd.svg")

# Save results
results = {
    'subjects': subjects,
    'models': models,
    'n_subjects': len(subjects),
    'f1_matrix': f1_matrix.tolist(),
    'wilcoxon_test': {
        'comparison': 'minirocket vs tst',
        'statistic': float(stat_wilcoxon) if stat_wilcoxon is not None else None,
        'p_value': float(p_wilcoxon) if p_wilcoxon is not None else None,
        'significant': bool(p_wilcoxon < 0.05) if p_wilcoxon is not None else None,
        'note': 'Skipped due to insufficient samples' if len(subjects) < 6 else None
    },
    'cliffs_delta': {
        'comparison': 'minirocket vs tst',
        'delta': float(delta),
        'magnitude': magnitude
    },
    'average_ranks': {model: float(rank) for model, rank in zip(models, avg_ranks)}
}

with open('logs/step18_stats.json', 'w') as f:
    json.dump(results, f, indent=2)

df_results = pd.DataFrame({
    'metric': ['Wilcoxon', 'Cliff\'s δ', 'Avg Rank (MiniROCKET)', 'Avg Rank (TST)'],
    'value': [
        stat_wilcoxon if stat_wilcoxon is not None else np.nan,
        delta,
        avg_ranks[0],
        avg_ranks[1]
    ],
    'p_value': [
        p_wilcoxon if p_wilcoxon is not None else np.nan,
        np.nan,
        np.nan,
        np.nan
    ]
})
df_results.to_csv('logs/step18_stats.csv', index=False)

print("\n" + "=" * 60)
print("✓ Significance testing completed")
print("✓ Results: logs/step18_stats.json")
print("✓ CSV: logs/step18_stats.csv")
print("✓ CD plot: figures/step18_cd.svg")
print("=" * 60 + "\n")

print("Step 18 completed\n" + "=" * 60)



Step 18: Significance Testing
Number of subjects: 15
Models: ['minirocket', 'tst']

Subject × Model Macro-F1 matrix:
           minirocket       tst
proband1     0.666741  0.649242
proband10    0.802280  0.841033
proband11    0.906379  0.920052
proband12    0.915105  0.806431
proband13    0.896687  0.836988
proband14    0.614378  0.514166
proband15    0.887102  0.835436
proband2     0.962299  0.872871
proband3     0.763632  0.568620
proband4     0.551127  0.514923
proband5     0.761963  0.801733
proband6     0.909414  0.852789
proband7     0.654848  0.633691
proband8     0.651998  0.687805
proband9     0.875110  0.841656

Wilcoxon signed-rank test (minirocket vs tst):
  Statistic = 21.0000
  p-value = 0.0256
  Significant: Yes (α=0.05)

Cliff's δ (minirocket vs tst):
  δ = 0.2267
  Effect size: small

Average ranks (lower is better):
  minirocket     : 1.27
  tst            : 1.73

✓ Saved: figures/step18_cd.svg

✓ Significance testing completed
✓ Results: logs/step18_stats.json
✓ CS

In [None]:
# ================ Step 19: Latency/Resource Evaluation ================
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from pathlib import Path
import json
import pickle
import time
import psutil
import platform
import subprocess
from threadpoolctl import threadpool_limits
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("\n\nStep 19: Latency/Resource Evaluation")
print("=" * 60)

Path('figures').mkdir(parents=True, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_active_folds(path="logs/active_folds.json"):
    p = Path(path)
    if p.exists():
        return json.loads(p.read_text())["folds"]
    return []

with open('configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

with open('logs/step16_metrics_summary.json', 'r') as f:
    metrics_data = json.load(f)

active_folds = get_active_folds()
N_REPEATS = 50

# Hardware information
hw_info = {
    'cpu': platform.processor() or subprocess.getoutput("cat /proc/cpuinfo | grep 'model name' | head -1").split(':')[1].strip(),
    'gpu': subprocess.getoutput("nvidia-smi --query-gpu=name --format=csv,noheader") if torch.cuda.is_available() else 'N/A',
    'ram_gb': round(psutil.virtual_memory().total / (1024**3), 1),
    'python': platform.python_version(),
    'torch': torch.__version__,
    'numpy': np.__version__,
    'sklearn': __import__('sklearn').__version__,
    'sktime': __import__('sktime').__version__
}

print(f"Hardware information:")
for k, v in hw_info.items():
    print(f"  {k}: {v}")
print()

# TST model definition
class PatchEmbedding(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, d_model):
        super().__init__()
        self.patch_len = patch_len
        self.n_patches = seq_len // patch_len
        self.proj = nn.Linear(n_channels * patch_len, d_model)

    def forward(self, x):
        B, C, L = x.shape
        x = x.unfold(2, self.patch_len, self.patch_len)
        x = x.permute(0, 2, 1, 3).contiguous()
        x = x.view(B, self.n_patches, -1)
        return self.proj(x)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class TST(nn.Module):
    def __init__(self, n_channels, seq_len, patch_len, num_classes, d_model, n_heads, depth, dropout):
        super().__init__()
        self.patch_embedding = PatchEmbedding(n_channels, seq_len, patch_len, d_model)
        self.pos_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.patch_embedding(x)
        x = self.pos_encoding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.dropout(x)
        return self.fc(x)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

def estimate_flops(model, input_shape):
    from thop import profile, clever_format
    dummy_input = torch.randn(1, *input_shape).to(next(model.parameters()).device)
    flops, params = profile(model, inputs=(dummy_input,), verbose=False)
    flops, params = clever_format([flops, params], "%.3f")
    return flops, params

all_results = []

for fold in splits_cfg['folds']:
    k = fold['fold']

    if k not in active_folds:
        continue

    test_subj = fold['test_subject']

    print(f"\n{'='*60}")
    print(f"Fold {k}: test subject={test_subj}")
    print(f"{'='*60}")

    # Retrieve F1 scores
    mr_result = [r for r in metrics_data['per_fold_results'] if r['fold'] == k and r['method'] == 'minirocket'][0]
    tst_result = [r for r in metrics_data['per_fold_results'] if r['fold'] == k and r['method'] == 'tst'][0]

    mr_f1 = mr_result['macro_f1']
    tst_f1 = tst_result['macro_f1']

    # ============ MiniROCKET Resource Evaluation ============
    print(f"\n--- MiniROCKET ---")

    # Load test data (raw windows)
    norm_data = np.load(f'features/windows_normalized_fold{k}.npz')
    test_mask = norm_data['splits'] == 'test'
    CHANNELS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
    X_raw = np.stack([norm_data[ch] for ch in CHANNELS], axis=1)[test_mask]
    n_test = len(X_raw)

    # Load transformer and Ridge
    with open(f'models/transformer_minirocket_fold{k}.pkl', 'rb') as f:
        transformer = pickle.load(f)

    with open(f'models/ridge_fold{k}.pkl', 'rb') as f:
        model_data = pickle.load(f)
    ridge = model_data['ridge']
    vt = model_data['variance_filter']

    # Single-sample test: feature generation time
    transform_times = []
    with threadpool_limits(limits=1, user_api='blas'):
        for _ in range(N_REPEATS):
            sample = X_raw[:1]
            start = time.perf_counter()
            features = transformer.transform(sample)
            if hasattr(features, 'values'):
                features = features.values
            transform_times.append((time.perf_counter() - start) * 1000)

    # Single-sample test: classification time
    sample_features = transformer.transform(X_raw[:1])
    if hasattr(sample_features, 'values'):
        sample_features = sample_features.values
    sample_features = vt.transform(sample_features)

    clf_times = []
    with threadpool_limits(limits=1, user_api='blas'):
        for _ in range(N_REPEATS):
            start = time.perf_counter()
            _ = ridge.predict(sample_features)
            clf_times.append((time.perf_counter() - start) * 1000)

    transform_p50 = np.percentile(transform_times, 50)
    transform_p90 = np.percentile(transform_times, 90)
    clf_p50 = np.percentile(clf_times, 50)
    clf_p90 = np.percentile(clf_times, 90)
    total_p50 = transform_p50 + clf_p50
    total_p90 = transform_p90 + clf_p90

    # Model size
    transformer_size = Path(f'models/transformer_minirocket_fold{k}.pkl').stat().st_size / (1024**2)
    ridge_size = Path(f'models/ridge_fold{k}.pkl').stat().st_size / (1024**2)
    total_size = transformer_size + ridge_size

    print(f"  Transform time: p50={transform_p50:.3f}ms, p90={transform_p90:.3f}ms")
    print(f"  Classification time: p50={clf_p50:.3f}ms, p90={clf_p90:.3f}ms")
    print(f"  Total latency: p50={total_p50:.3f}ms, p90={total_p90:.3f}ms")
    print(f"  Model size: {total_size:.2f}MB (transformer={transformer_size:.2f}MB, ridge={ridge_size:.2f}MB)")
    print(f"  Macro F1: {mr_f1:.4f}")

    # ============ TST Resource Evaluation ============
    print(f"\n--- TST ---")

    # Load model
    checkpoint = torch.load(f'models/tst_fold{k}.pt', weights_only=False, map_location=device)
    model_cfg = checkpoint['model_config']

    model = TST(**model_cfg).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # Parameter count
    n_params = count_parameters(model)

    # FLOPs
    try:
        flops, _ = estimate_flops(model, (model_cfg['n_channels'], model_cfg['seq_len']))
    except:
        flops = 'N/A'

    # Latency test
    tensors = torch.load(f'interim/tensors_fold{k}.pt', weights_only=False)
    X_test = tensors['X_test']

    latencies = []
    with torch.inference_mode():
        for _ in range(N_REPEATS):
            sample = X_test[:1].to(device)
            if device.type == 'cuda':
                torch.cuda.synchronize()
            start = time.perf_counter()
            _ = model(sample)
            if device.type == 'cuda':
                torch.cuda.synchronize()
            latencies.append((time.perf_counter() - start) * 1000)

    latency_p50 = np.percentile(latencies, 50)
    latency_p90 = np.percentile(latencies, 90)

    # Peak memory (approximate)
    if device.type == 'cuda':
        torch.cuda.reset_peak_memory_stats()
        with torch.inference_mode():
            sample = X_test[:1].to(device)
            _ = model(sample)
        peak_mem_mb = torch.cuda.max_memory_allocated() / (1024**2)
    else:
        peak_mem_mb = None

    # Model size
    tst_size = Path(f'models/tst_fold{k}.pt').stat().st_size / (1024**2)

    print(f"  #Parameters: {n_params:,}")
    print(f"  FLOPs: {flops}")
    print(f"  Latency: p50={latency_p50:.3f}ms, p90={latency_p90:.3f}ms")
    if peak_mem_mb:
        print(f"  Peak memory: {peak_mem_mb:.2f}MB")
    print(f"  Model size: {tst_size:.2f}MB")
    print(f"  Macro F1: {tst_f1:.4f}")

    all_results.append({
        'fold': k,
        'test_subject': test_subj,
        'method': 'minirocket',
        'macro_f1': float(mr_f1),
        'transform_time_p50_ms': float(transform_p50),
        'transform_time_p90_ms': float(transform_p90),
        'clf_time_p50_ms': float(clf_p50),
        'clf_time_p90_ms': float(clf_p90),
        'total_latency_p50_ms': float(total_p50),
        'total_latency_p90_ms': float(total_p90),
        'model_size_mb': float(total_size),
        'params': None,
        'flops': None
    })

    all_results.append({
        'fold': k,
        'test_subject': test_subj,
        'method': 'tst',
        'macro_f1': float(tst_f1),
        'transform_time_p50_ms': None,
        'transform_time_p90_ms': None,
        'clf_time_p50_ms': None,
        'clf_time_p90_ms': None,
        'total_latency_p50_ms': float(latency_p50),
        'total_latency_p90_ms': float(latency_p90),
        'model_size_mb': float(tst_size),
        'params': int(n_params),
        'flops': str(flops)
    })

# Save results
df_results = pd.DataFrame(all_results)
df_results.to_csv('logs/step19_resources.csv', index=False)

# Aggregated statistics
agg_results = []
for method in ['minirocket', 'tst']:
    method_data = df_results[df_results['method'] == method]

    agg = {
        'method': method,
        'macro_f1_mean': method_data['macro_f1'].mean(),
        'total_latency_p50_mean_ms': method_data['total_latency_p50_ms'].mean(),
        'total_latency_p90_mean_ms': method_data['total_latency_p90_ms'].mean(),
        'model_size_mean_mb': method_data['model_size_mb'].mean()
    }

    if method == 'minirocket':
        agg['transform_time_p50_mean_ms'] = method_data['transform_time_p50_ms'].mean()
        agg['transform_time_p90_mean_ms'] = method_data['transform_time_p90_ms'].mean()
        agg['clf_time_p50_mean_ms'] = method_data['clf_time_p50_ms'].mean()
        agg['clf_time_p90_mean_ms'] = method_data['clf_time_p90_ms'].mean()
    else:
        agg['params'] = int(method_data['params'].iloc[0])
        agg['flops'] = method_data['flops'].iloc[0]

    agg_results.append(agg)

df_agg = pd.DataFrame(agg_results)
df_agg.to_csv('logs/step19_resources_agg.csv', index=False)

# Pareto plots
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# F1 vs Latency
ax = axes[0]
for method in ['minirocket', 'tst']:
    data = df_results[df_results['method'] == method]
    ax.scatter(data['total_latency_p50_ms'], data['macro_f1'],
              label=method.upper(), s=100, alpha=0.7)

avg_mr = df_results[df_results['method'] == 'minirocket']
avg_tst = df_results[df_results['method'] == 'tst']
ax.scatter(avg_mr['total_latency_p50_ms'].mean(), avg_mr['macro_f1'].mean(),
          marker='*', s=300, c='C0', edgecolors='black', linewidths=1.5, label='MiniROCKET (avg)')
ax.scatter(avg_tst['total_latency_p50_ms'].mean(), avg_tst['macro_f1'].mean(),
          marker='*', s=300, c='C1', edgecolors='black', linewidths=1.5, label='TST (avg)')

ax.set_xlabel('Latency p50 (ms)', fontsize=11)
ax.set_ylabel('Macro F1', fontsize=11)
ax.set_title('F1 vs Latency', fontsize=12, weight='bold')
ax.legend()
ax.grid(alpha=0.3)

# F1 vs Model Size
ax = axes[1]
for method in ['minirocket', 'tst']:
    data = df_results[df_results['method'] == method]
    ax.scatter(data['model_size_mb'], data['macro_f1'],
              label=method.upper(), s=100, alpha=0.7)

ax.scatter(avg_mr['model_size_mb'].mean(), avg_mr['macro_f1'].mean(),
          marker='*', s=300, c='C0', edgecolors='black', linewidths=1.5, label='MiniROCKET (avg)')
ax.scatter(avg_tst['model_size_mb'].mean(), avg_tst['macro_f1'].mean(),
          marker='*', s=300, c='C1', edgecolors='black', linewidths=1.5, label='TST (avg)')

ax.set_xlabel('Model Size (MB)', fontsize=11)
ax.set_ylabel('Macro F1', fontsize=11)
ax.set_title('F1 vs Model Size', fontsize=12, weight='bold')
ax.legend()
ax.grid(alpha=0.3)

# Latency vs Model Size
ax = axes[2]
for method in ['minirocket', 'tst']:
    data = df_results[df_results['method'] == method]
    ax.scatter(data['model_size_mb'], data['total_latency_p50_ms'],
              label=method.upper(), s=100, alpha=0.7)

ax.scatter(avg_mr['model_size_mb'].mean(), avg_mr['total_latency_p50_ms'].mean(),
          marker='*', s=300, c='C0', edgecolors='black', linewidths=1.5, label='MiniROCKET (avg)')
ax.scatter(avg_tst['model_size_mb'].mean(), avg_tst['total_latency_p50_ms'].mean(),
          marker='*', s=300, c='C1', edgecolors='black', linewidths=1.5, label='TST (avg)')

ax.set_xlabel('Model Size (MB)', fontsize=11)
ax.set_ylabel('Latency p50 (ms)', fontsize=11)
ax.set_title('Latency vs Model Size', fontsize=12, weight='bold')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig('figures/step19_pareto.svg', format='svg', dpi=150)
plt.close()

# Save full report
report = {
    'hardware': hw_info,
    'config': {
        'cpu_threads': 1,
        'batch_size': 1,
        'n_repeats': N_REPEATS,
        'platform': device.type
    },
    'per_fold_results': all_results,
    'aggregated_results': agg_results,
    'notes': {
        'minirocket': 'Transform time + Classification time (both CPU single-thread)',
        'tst': 'End-to-end inference time',
        'fairness': 'MiniROCKET includes preprocessing, TST is raw end-to-end'
    }
}

with open('logs/step19_resources.json', 'w') as f:
    json.dump(report, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed resource evaluation for {len(active_folds)} folds")
print(f"✓ Per-fold results: logs/step19_resources.csv")
print(f"✓ Aggregated results: logs/step19_resources_agg.csv")
print(f"✓ JSON report: logs/step19_resources.json")
print(f"✓ Pareto plot: figures/step19_pareto.svg")
print(f"\nSummary:")
print(df_agg.to_string(index=False))
print(f"{'='*60}\n")

print(f"Step 19 completed\n{'='*60}")



Step 19: Latency/Resource Evaluation
Hardware information:
  cpu: x86_64
  gpu: NVIDIA L4
  ram_gb: 53.0
  python: 3.12.12
  torch: 2.8.0+cu126
  numpy: 1.26.4
  sklearn: 1.4.2
  sktime: 0.30.0


Fold 0: test subject=proband1

--- MiniROCKET ---
  Transform time: p50=4.423ms, p90=4.542ms
  Classification time: p50=0.086ms, p90=0.108ms
  Total latency: p50=4.509ms, p90=4.650ms
  Model size: 0.44MB (transformer=0.06MB, ridge=0.38MB)
  Macro F1: 0.6667

--- TST ---
  #Parameters: 210,120
  FLOPs: N/A
  Latency: p50=1.615ms, p90=1.644ms
  Peak memory: 21.33MB
  Model size: 2.04MB
  Macro F1: 0.6492

Fold 1: test subject=proband10

--- MiniROCKET ---
  Transform time: p50=4.428ms, p90=4.538ms
  Classification time: p50=0.082ms, p90=0.102ms
  Total latency: p50=4.510ms, p90=4.640ms
  Model size: 0.44MB (transformer=0.06MB, ridge=0.38MB)
  Macro F1: 0.8023

--- TST ---
  #Parameters: 210,120
  FLOPs: N/A
  Latency: p50=1.585ms, p90=1.652ms
  Peak memory: 21.41MB
  Model size: 2.04MB
  Macro