In [1]:
# RealWorld-HAR (RealWorld2016, University of Mannheim)
!mkdir -p /content/data/rwhar
%cd /content/data/rwhar

# Attempt HTTPS first (disabling certificate verification due to an SNI mismatch on the host); on failure, fall back to HTTP
!wget -c --no-check-certificate "https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip" -O realworld2016_dataset.zip || wget -c "http://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip" -O realworld2016_dataset.zip

# Decompress and perform a brief inspection
!unzip -q -o realworld2016_dataset.zip
!echo "=== top-level ==="
!ls -lah
!echo "=== dirs (depth<=2) ==="
!find . -maxdepth 2 -type d | sort | head -n 20

/content/data/rwhar
--2025-11-15 08:30:56--  https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip
Resolving wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)... 134.155.98.56
Connecting to wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)|134.155.98.56|:443... connected.
	requested host name ‘wifo5-14.informatik.uni-mannheim.de’.
HTTP request sent, awaiting response... 403 Forbidden
2025-11-15 08:30:57 ERROR 403: Forbidden.

--2025-11-15 08:30:57--  http://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/realworld2016_dataset.zip
Resolving wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)... 134.155.98.56
Connecting to wifo5-14.informatik.uni-mannheim.de (wifo5-14.informatik.uni-mannheim.de)|134.155.98.56|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3721016476 (3.5G) [application/zip]
Saving to: ‘realworld2016_dataset.zip’


2

In [2]:
# ================ Step 0: Project Initialization ================
import os
from datetime import datetime

# Create directory structure
dirs = ['data/raw', 'interim', 'proc', 'features', 'models', 'logs', 'figures', 'configs']
for d in dirs:
    os.makedirs(f'/content/{d}', exist_ok=True)
print("✓ Directory structure created")

# Git Initialization
%cd /content
!git init
!git config user.name "HAR-Project"
!git config user.email "har@project.local"
print("✓ Git repository initialized")

# Persist environment information
!pip freeze > logs/env.txt
print("✓ Environment dependencies saved to logs/env.txt")

# Persist random seed list and hardware information
import json
import subprocess

meta = {
    "timestamp": datetime.now().isoformat(),
    "random_seeds": [42, 123, 456, 789, 2024],  # predefined seeds
    "hardware": {
        "gpu": subprocess.getoutput("nvidia-smi --query-gpu=name --format=csv,noheader"),
        "cpu": subprocess.getoutput("cat /proc/cpuinfo | grep 'model name' | head -1").split(':')[1].strip(),
    }
}

with open('logs/init_meta.json', 'w') as f:
    json.dump(meta, f, indent=2)
print("✓ Metadata saved to logs/init_meta.json")

# Initial commit
!git add .
!git commit -m "init: project structure and environment"
git_hash = subprocess.getoutput("git rev-parse HEAD")
print(f"✓ Git commit hash: {git_hash[:8]}")


# ================ Step 1: Data Acquisition (Compliance) ================
# Move raw data to data/raw/ and retain structure
!mv /content/data/rwhar/* /content/data/raw/ 2>/dev/null || true
!rm -rf /content/data/rwhar
print("✓ Raw data moved to data/raw/")

# Compute checksums
import hashlib

def calc_checksum(filepath):
    h = hashlib.sha256()
    with open(filepath, 'rb') as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()

checksums = {}
for root, _, files in os.walk('/content/data/raw'):
    for f in files:
        path = os.path.join(root, f)
        rel_path = os.path.relpath(path, '/content/data/raw')
        checksums[rel_path] = calc_checksum(path)

with open('/content/logs/checksums.txt', 'w') as f:
    f.write(f"# RealWorld2016 dataset checksums (SHA256)\n")
    f.write(f"# Generated at: {datetime.now().isoformat()}\n\n")
    for path, sha in sorted(checksums.items()):
        f.write(f"{sha}  {path}\n")

print(f"✓ Computed checksums for {len(checksums)} files → logs/checksums.txt")

# Record data source
with open('/content/logs/data_source.txt', 'w') as f:
    f.write("RealWorld2016 Human Activity Recognition Dataset\n")
    f.write("=" * 50 + "\n")
    f.write("Source: University of Mannheim\n")
    f.write("URL: https://wifo5-14.informatik.uni-mannheim.de/sensor/dataset/realworld2016/\n")
    f.write("Citation: Sztyler, T., & Stuckenschmidt, H. (2016). On-body localization of wearable devices.\n")
    f.write(f"Downloaded: {datetime.now().isoformat()}\n")

print("✓ Data source recorded to logs/data_source.txt")

# Commit data acquisition records
!git add logs/
!git commit -m "data: add RealWorld2016 checksums and source"
print(f"\n{'='*60}\nProject initialization and data acquisition completed\n{'='*60}")

✓ Directory structure created
/content
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/
✓ Git repository initialized
✓ Environment dependencies saved to logs/env.txt
✓ Metadata saved to logs/init_meta.json
[master (root-commit) 51d5820] init: project structure and environment
 1837 files changed, 51719 insertions(+)
 create mode 100644 .config/.last_opt_in_prompt.yaml
 create mode 100644 .config/.last_survey_prompt.yaml
 create mode 100644 .config/.last_update_check.json
 create mode 100644 .config/active_co

In [3]:
# ================ Step 2: Sensor/Location Selection (Revised) ================
import pandas as pd
from pathlib import Path
import json
import zipfile

print("Step 2: Sensor/Location Selection")
print("=" * 60)

raw_dir = Path('/content/data/raw')

# Decompress all zip files first
print("Extracting sensor data...")
zip_files = list(raw_dir.rglob('*.zip'))
print(f"Found {len(zip_files)} zip files")

for zip_path in zip_files:
    if 'csv.zip' in zip_path.name:
        extract_dir = zip_path.parent / zip_path.stem
        if not extract_dir.exists():
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)

print("✓ Extraction complete")

# Search for CSV files under acc and gyr directories
print("\nSearching for sensor directories...")
acc_dirs = list(raw_dir.rglob('acc_*_csv'))
gyr_dirs = list(raw_dir.rglob('gyr_*_csv'))

print(f"✓ Found {len(acc_dirs)} ACC directories")
print(f"✓ Found {len(gyr_dirs)} GYR directories")

if acc_dirs:
    print(f"\nExample ACC directory: {acc_dirs[0].relative_to(raw_dir)}")
    sample_files = list(acc_dirs[0].glob('*.csv'))
    print(f"Number of files under {acc_dirs[0].name}: {len(sample_files)}")
    if sample_files:
        print(f"Example file: {sample_files[0].name}")

# Find all files containing "waist"
waist_files = {'acc': [], 'gyr': []}

for acc_dir in acc_dirs:
    for f in acc_dir.glob('*waist*.csv'):
        waist_files['acc'].append(f)

for gyr_dir in gyr_dirs:
    for f in gyr_dir.glob('*waist*.csv'):
        waist_files['gyr'].append(f)

print(f"\n✓ Found Waist-ACC files: {len(waist_files['acc'])}")
print(f"✓ Found Waist-GYR files: {len(waist_files['gyr'])}")

# Display example files
if waist_files['acc']:
    print(f"\nExample ACC file: {waist_files['acc'][0].relative_to(raw_dir)}")
    sample_acc = pd.read_csv(waist_files['acc'][0])
    print(f"Columns: {list(sample_acc.columns)}")
    print(f"Shape: {sample_acc.shape}")
    print(sample_acc.head(3))

if waist_files['gyr']:
    print(f"\nExample GYR file: {waist_files['gyr'][0].relative_to(raw_dir)}")
    sample_gyr = pd.read_csv(waist_files['gyr'][0])
    print(f"Columns: {list(sample_gyr.columns)}")
    print(f"Shape: {sample_gyr.shape}")
    print(sample_gyr.head(3))

# Collect metadata
waist_metadata = []
for sensor_type in ['acc', 'gyr']:
    for filepath in waist_files[sensor_type]:
        parts = filepath.parts
        subject = [p for p in parts if p.startswith('proband')][0]
        activity = filepath.parent.name.split('_')[1]

        df = pd.read_csv(filepath)
        waist_metadata.append({
            'subject': subject,
            'activity': activity,
            'sensor': sensor_type,
            'original_path': str(filepath.relative_to(raw_dir)),
            'shape': list(df.shape),
            'columns': list(df.columns)
        })

# Persist selection report
with open('/content/logs/sensor_selection.json', 'w') as f:
    json.dump({
        'selection': {
            'position': 'waist',
            'sensors': ['acc', 'gyr'],
            'channels': 6,
            'rationale': 'Single position to avoid domain shift; ACC+GYRO is the standard configuration for HAR'
        },
        'files_found': {
            'acc': len(waist_files['acc']),
            'gyr': len(waist_files['gyr'])
        },
        'metadata': waist_metadata[:10]
    }, f, indent=2)

print(f"\n✓ Selection report saved: logs/sensor_selection.json")

!git add logs/sensor_selection.json
!git commit -m "data: select waist position with acc+gyr sensors"


# ================ Step 3: Column Alignment and Naming ================
print("\n\nStep 3: Column Alignment and Naming")
print("=" * 60)

# Analyze column names
acc_cols = set()
gyr_cols = set()

for filepath in waist_files['acc'][:3]:
    df = pd.read_csv(filepath)
    acc_cols.update(df.columns)

for filepath in waist_files['gyr'][:3]:
    df = pd.read_csv(filepath)
    gyr_cols.update(df.columns)

print(f"ACC column names: {sorted(acc_cols)}")
print(f"GYR column names: {sorted(gyr_cols)}")

# Define standard mapping
standard_mapping = {
    'acc': {
        'attr_x': 'acc_x',
        'attr_y': 'acc_y',
        'attr_z': 'acc_z',
        'attr_time': 'timestamp'
    },
    'gyr': {
        'attr_x': 'gyro_x',
        'attr_y': 'gyro_y',
        'attr_z': 'gyro_z',
        'attr_time': 'timestamp'
    }
}

cols_config = {
    'standard_columns': ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'],
    'units': {
        'acc_x': 'm/s²', 'acc_y': 'm/s²', 'acc_z': 'm/s²',
        'gyro_x': 'rad/s', 'gyro_y': 'rad/s', 'gyro_z': 'rad/s'
    },
    'mapping': standard_mapping,
    'timestamp_col': 'timestamp'
}

with open('/content/configs/cols.json', 'w') as f:
    json.dump(cols_config, f, indent=2)

print("\n✓ Column mapping configuration saved: configs/cols.json")

# Generate schema report
report = [
    "# RealWorld2016 Data Schema Report\n\n",
    f"Generated at: {datetime.now().isoformat()}\n\n",
    "## Standard column definitions\n\n",
    "| Column | Unit | Description |\n|------|------|------|\n"
]

for col in cols_config['standard_columns']:
    unit = cols_config['units'][col]
    sensor = 'Accelerometer' if 'acc' in col else 'Gyroscope'
    axis = col.split('_')[1].upper()
    report.append(f"| {col} | {unit} | {sensor} {axis}-axis |\n")

report.append("\n## Original column mapping\n\n### Accelerometer\n")
for orig, std in standard_mapping['acc'].items():
    report.append(f"- `{orig}` → `{std}`\n")

report.append("\n### Gyroscope\n")
for orig, std in standard_mapping['gyr'].items():
    report.append(f"- `{orig}` → `{std}`\n")

# Missing-value statistics
report.append("\n## Data quality checks\n\n")
for sensor in ['acc', 'gyr']:
    report.append(f"### {sensor.upper()} Missing values (sample of 5 files)\n\n")
    has_missing = False
    for fp in waist_files[sensor][:5]:
        df = pd.read_csv(fp)
        missing = df.isnull().sum()
        if missing.sum() > 0:
            report.append(f"- {fp.name}: {missing[missing > 0].to_dict()}\n")
            has_missing = True
    if not has_missing:
        report.append("- No missing values ✓\n")
    report.append("\n")

with open('/content/logs/schema_report.md', 'w') as f:
    f.writelines(report)

print("✓ Schema report saved: logs/schema_report.md")
print("\n" + "".join(report))

!git add configs/cols.json logs/schema_report.md
!git commit -m "data: standardize column names and units"

print(f"\n{'='*60}")
print("Steps 2–3 completed")
print(f"{'='*60}")

Step 2: Sensor/Location Selection
Extracting sensor data...
Found 1441 zip files
✓ Extraction complete

Searching for sensor directories...
✓ Found 120 ACC directories
✓ Found 120 GYR directories

Example ACC directory: proband13/data/acc_jumping_csv
Number of files under acc_jumping_csv: 7
Example file: acc_jumping_forearm.csv

✓ Found Waist-ACC files: 114
✓ Found Waist-GYR files: 114

Example ACC file: proband13/data/acc_jumping_csv/acc_jumping_waist.csv
Columns: ['id', 'attr_time', 'attr_x', 'attr_y', 'attr_z']
Shape: (5384, 5)
   id      attr_time     attr_x    attr_y    attr_z
0   1  1436984545021  19.607914 -0.734421  1.403002
1   2  1436984545041  18.750190 -1.446098  2.309806
2   3  1436984545060  18.218678 -1.485602  1.869871

Example GYR file: proband13/data/gyr_sitting_csv/Gyroscope_sitting_waist.csv
Columns: ['id', 'attr_time', 'attr_x', 'attr_y', 'attr_z']
Shape: (32267, 5)
   id      attr_time    attr_x    attr_y    attr_z
0   1  1436980333523 -0.016214 -0.016998 -0.00438

In [4]:
# ================ Step 4: Timeline Normalization (Final) ================
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
from pathlib import Path
import json
import zipfile

print("\n\nStep 4: Timeline Normalization")
print("=" * 60)

raw_dir = Path('/content/data/raw')

# Decompression
print("Extracting waist data...")
for proband_dir in raw_dir.glob('proband*'):
    data_dir = proband_dir / 'data'
    if data_dir.exists():
        for zip_file in data_dir.glob('*_csv.zip'):
            if zip_file.stem.startswith(('acc_', 'gyr_')):
                extract_dir = zip_file.parent / zip_file.stem
                if not extract_dir.exists():
                    with zipfile.ZipFile(zip_file, 'r') as zf:
                        if any('waist' in f.lower() for f in zf.namelist()):
                            zf.extractall(extract_dir)

# Scan
waist_files = {'acc': [], 'gyr': []}
for csv_file in raw_dir.rglob('*.csv'):
    if 'waist' in csv_file.name.lower():
        if csv_file.parent.name.startswith('acc_'):
            waist_files['acc'].append(csv_file)
        elif csv_file.parent.name.startswith('gyr_'):
            waist_files['gyr'].append(csv_file)

print(f"✓ ACC: {len(waist_files['acc'])}, GYR: {len(waist_files['gyr'])}")

# Improved pairing: directory mapping + same-name preference
def find_gyr_for_acc(acc_path):
    gyr_dir = acc_path.parent.parent / acc_path.parent.name.replace('acc_', 'gyr_')
    if not gyr_dir.exists():
        return None
    cand = gyr_dir / acc_path.name.replace('acc_', 'gyr_')
    if cand.exists():
        return cand
    cands = sorted(gyr_dir.glob('*waist*.csv'))
    return cands[0] if cands else None

file_pairs = []
for acc_path in waist_files['acc']:
    gyr_path = find_gyr_for_acc(acc_path)
    if not gyr_path:
        continue
    proband = next(p for p in acc_path.parts if p.startswith('proband'))
    activity = acc_path.parent.name.split('_')[1]
    file_pairs.append((acc_path, gyr_path, proband, activity))

print(f"✓ File pairs: {len(file_pairs)}")

with open('/content/configs/cols.json', 'r') as f:
    cols_config = json.load(f)

TARGET_FS = 50
MAX_GAP_MS = 200
MIN_DURATION_S = 1.0
interim_dir = Path('/content/interim')
interim_dir.mkdir(exist_ok=True)

def detect_time_unit(df, col='timestamp'):
    ts = df[col].sort_values().iloc[:200].values
    diffs = np.diff(ts)
    diffs = diffs[diffs > 0]
    if len(diffs) == 0:
        return None, None
    dt = np.median(diffs)

    if 0.01 < dt < 5:
        return df[col] * 1e9, 's'
    elif 10 < dt < 100:
        return df[col] * 1e6, 'ms'
    elif 10000 < dt < 100000:
        return df[col] * 1e3, 'us'
    elif 1e7 < dt < 1e8:
        return df[col], 'ns'
    else:
        return None, None

all_stats = []
skipped = []

for idx, (acc_path, gyr_path, proband, activity) in enumerate(file_pairs):
    print(f"\n[{idx+1}/{len(file_pairs)}] {proband}/{activity}")

    acc_df = pd.read_csv(acc_path).rename(columns=cols_config['mapping']['acc'])
    gyr_df = pd.read_csv(gyr_path).rename(columns=cols_config['mapping']['gyr'])

    acc_ts_ns, acc_unit = detect_time_unit(acc_df)
    gyr_ts_ns, gyr_unit = detect_time_unit(gyr_df)

    if acc_ts_ns is None or gyr_ts_ns is None:
        print(f"  ⚠️ Skipped: unable to determine timestamp unit")
        skipped.append(f"{proband}_{activity}")
        continue

    acc_df['timestamp_ns'] = acc_ts_ns
    gyr_df['timestamp_ns'] = gyr_ts_ns
    acc_df = acc_df[['timestamp_ns', 'acc_x', 'acc_y', 'acc_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')
    gyr_df = gyr_df[['timestamp_ns', 'gyro_x', 'gyro_y', 'gyro_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')

    df = None
    merge_mode = 'absolute'
    merge_tol = None
    offset_ns = 0

    # Adaptive tolerance
    for tol_ms in [10, 30, 50, 100]:
        tol_ns = int(tol_ms * 1e6)
        df_try = pd.merge_asof(acc_df, gyr_df, on='timestamp_ns', direction='nearest', tolerance=tol_ns).dropna()
        if len(df_try) >= TARGET_FS:
            df = df_try
            merge_tol = tol_ms
            break

    # Fallback 1: relative time (relaxed thresholds)
    if df is None:
        for tol_ms in [10, 30, 50]:
            acc_tmp = acc_df.copy()
            gyr_tmp = gyr_df.copy()
            acc_tmp['t_rel'] = acc_tmp['timestamp_ns'] - acc_tmp['timestamp_ns'].iloc[0]
            gyr_tmp['t_rel'] = gyr_tmp['timestamp_ns'] - gyr_tmp['timestamp_ns'].iloc[0]

            df_try = pd.merge_asof(acc_tmp.sort_values('t_rel'), gyr_tmp.sort_values('t_rel'),
                                   on='t_rel', direction='nearest', tolerance=int(tol_ms*1e6)).dropna()

            if len(df_try) > 1:
                p99 = (df_try['t_rel'].diff() / 1e6).quantile(0.99)
                match_rate = len(df_try) / max(1, min(len(acc_df), len(gyr_df)))

                if len(df_try) >= TARGET_FS and p99 <= 40 and match_rate >= 0.5:
                    df = df_try.rename(columns={'t_rel': 'timestamp_ns'})
                    merge_mode = 'relative'
                    merge_tol = tol_ms
                    break

    # Fallback 2: offset search (broaden range and thresholds)
    if df is None:
        best_df, best_matches, best_offset = None, -1, 0
        for offset_ms in range(-3000, 3001, 50):
            gyr_shift = gyr_df.copy()
            gyr_shift['timestamp_ns'] = gyr_shift['timestamp_ns'] + int(offset_ms * 1e6)
            df_try = pd.merge_asof(acc_df, gyr_shift, on='timestamp_ns',
                                   direction='nearest', tolerance=int(30*1e6)).dropna()
            if len(df_try) > best_matches:
                best_df, best_matches, best_offset = df_try, len(df_try), offset_ms

        if best_matches >= TARGET_FS and best_df is not None and len(best_df) > 1:
            p99 = (best_df['timestamp_ns'].diff() / 1e6).quantile(0.99)
            match_rate = best_matches / max(1, min(len(acc_df), len(gyr_df)))

            if p99 <= 40 and match_rate >= 0.5:
                df = best_df
                merge_mode = 'offset_search'
                merge_tol = 30
                offset_ns = int(best_offset * 1e6)

    # Fallback 3: intersection window resampling
    if df is None:
        t0 = max(acc_df['timestamp_ns'].iloc[0], gyr_df['timestamp_ns'].iloc[0])
        t1 = min(acc_df['timestamp_ns'].iloc[-1], gyr_df['timestamp_ns'].iloc[-1])

        if t1 - t0 >= 1e9:
            STEP_NS = int(1e9 / TARGET_FS)
            t_grid = np.arange(t0, t1, STEP_NS, dtype=np.int64)

            acc_interp = interpolate.interp1d(acc_df['timestamp_ns'].values,
                                              acc_df[['acc_x', 'acc_y', 'acc_z']].values,
                                              axis=0, kind='linear', bounds_error=True)
            gyr_interp = interpolate.interp1d(gyr_df['timestamp_ns'].values,
                                              gyr_df[['gyro_x', 'gyro_y', 'gyro_z']].values,
                                              axis=0, kind='linear', bounds_error=True)

            acc_vals = acc_interp(t_grid)
            gyr_vals = gyr_interp(t_grid)

            df = pd.DataFrame({
                'timestamp': t_grid,
                'segment_id': 0,
                'proband': proband,
                'activity': activity,
                'acc_x': acc_vals[:, 0], 'acc_y': acc_vals[:, 1], 'acc_z': acc_vals[:, 2],
                'gyro_x': gyr_vals[:, 0], 'gyro_y': gyr_vals[:, 1], 'gyro_z': gyr_vals[:, 2]
            })

            out_name = f"{proband}_{activity}_waist.csv"
            df.to_csv(interim_dir / out_name, index=False)

            all_stats.append({
                'file': out_name,
                'proband': proband,
                'activity': activity,
                'acc_unit': acc_unit,
                'gyr_unit': gyr_unit,
                'merge_mode': 'intersection',
                'segments': 1,
                'samples': len(df)
            })

            print(f"  {acc_unit}/{gyr_unit}, intersection, 1 segment, {len(df)} samples")
            continue

    if df is None or len(df) < TARGET_FS:
        print(f"  ⚠️ Skipped: merge failed")
        skipped.append(f"{proband}_{activity}")
        continue

    df = df.reset_index(drop=True)
    df['dt_ms'] = df['timestamp_ns'].diff() / 1e6

    # Segmentation
    gaps = df['dt_ms'].values
    large_gap_idx = np.where(gaps > MAX_GAP_MS)[0]
    split_points = [0] + large_gap_idx.tolist() + [len(df)]

    segments = []
    for i in range(len(split_points) - 1):
        seg = df.iloc[split_points[i]:split_points[i + 1]].copy()
        if len(seg) > 1:
            duration_s = (seg['timestamp_ns'].iloc[-1] - seg['timestamp_ns'].iloc[0]) / 1e9
            if duration_s >= MIN_DURATION_S:
                segments.append(seg)

    if len(segments) == 0:
        print(f"  ⚠️ Skipped: no valid segments")
        skipped.append(f"{proband}_{activity}")
        continue

    # Resampling
    STEP_NS = int(1e9 / TARGET_FS)
    all_resampled = []
    for seg_id, seg in enumerate(segments):
        t_start = seg['timestamp_ns'].iloc[0]
        t_end = seg['timestamp_ns'].iloc[-1]
        t_grid = np.arange(t_start, t_end + 1, STEP_NS, dtype=np.int64)

        df_seg = pd.DataFrame({
            'timestamp': t_grid,
            'segment_id': seg_id,
            'proband': proband,
            'activity': activity
        })
        for col in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']:
            f = interpolate.interp1d(seg['timestamp_ns'], seg[col], kind='linear', bounds_error=True)
            df_seg[col] = f(t_grid)

        all_resampled.append(df_seg)

    df_final = pd.concat(all_resampled, ignore_index=True)

    out_name = f"{proband}_{activity}_waist.csv"
    df_final.to_csv(interim_dir / out_name, index=False)

    stat = {
        'file': out_name,
        'proband': proband,
        'activity': activity,
        'acc_unit': acc_unit,
        'gyr_unit': gyr_unit,
        'merge_mode': merge_mode,
        'merge_tolerance_ms': merge_tol,
        'segments': len(segments),
        'samples': len(df_final)
    }
    if merge_mode == 'offset_search':
        stat['offset_ns'] = offset_ns

    all_stats.append(stat)

    mode_str = f"{merge_mode}" + (f"(Δ={offset_ns/1e6:.0f}ms)" if merge_mode=='offset_search' else '')
    print(f"  {acc_unit}/{gyr_unit}, {mode_str}, {len(segments)} segments, {len(df_final)} samples")

print(f"\n✓ Completed {len(all_stats)} files")
if skipped:
    print(f"⚠️ Skipped {len(skipped)}: {skipped}")

# Plotting
if all_stats:
    first_file = all_stats[0]
    first_pair = [(p[0], p[1], p[2], p[3]) for p in file_pairs if p[2] == first_file['proband'] and p[3] == first_file['activity']][0]

    acc_df = pd.read_csv(first_pair[0]).rename(columns=cols_config['mapping']['acc'])
    gyr_df = pd.read_csv(first_pair[1]).rename(columns=cols_config['mapping']['gyr'])
    acc_ts_ns, _ = detect_time_unit(acc_df)
    gyr_ts_ns, _ = detect_time_unit(gyr_df)
    acc_df['timestamp_ns'] = acc_ts_ns
    gyr_df['timestamp_ns'] = gyr_ts_ns
    acc_df = acc_df[['timestamp_ns', 'acc_x', 'acc_y', 'acc_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')
    gyr_df = gyr_df[['timestamp_ns', 'gyro_x', 'gyro_y', 'gyro_z']].sort_values('timestamp_ns').drop_duplicates('timestamp_ns')

    df = pd.merge_asof(acc_df, gyr_df, on='timestamp_ns', direction='nearest', tolerance=int(100*1e6)).dropna()
    intervals = df['timestamp_ns'].diff() / 1e6

    fig, ax = plt.subplots(figsize=(10, 4))
    ax.hist(intervals[intervals < 100], bins=100, edgecolor='black', linewidth=0.5)
    ax.axvline(20, color='red', linestyle='--', label='Ideal (50Hz=20ms)')
    ax.axvline(MAX_GAP_MS, color='orange', linestyle='--', label=f'Threshold ({MAX_GAP_MS}ms)')
    ax.set_xlabel('Sampling Interval (ms)')
    ax.set_ylabel('Count')
    ax.set_title(f'Sampling Interval Distribution - {first_pair[2]}/{first_pair[3]}')
    ax.legend()
    ax.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig('/content/figures/step4_interval_hist.png', dpi=150)
    plt.close()

with open('/content/logs/step4_summary.json', 'w') as f:
    json.dump({'files': all_stats, 'skipped': skipped}, f, indent=2)

!git add figures/ logs/step4_*.json interim/
!git commit -m "preproc: final time normalization with all fallbacks"

print(f"\n{'='*60}\nStep 4 completed\n{'='*60}")



Step 4: Timeline Normalization
Extracting waist data...
✓ ACC: 114, GYR: 114
✓ File pairs: 114

[1/114] proband13/jumping
  ms/ms, absolute, 2 segments, 5370 samples

[2/114] proband13/lying
  ms/ms, absolute, 23 segments, 31336 samples

[3/114] proband13/climbingdown
  ms/ms, absolute, 20 segments, 21127 samples

[4/114] proband13/sitting
  ms/ms, absolute, 24 segments, 31261 samples

[5/114] proband13/standing
  ms/ms, absolute, 35 segments, 32877 samples

[6/114] proband13/walking
  ms/ms, absolute, 20 segments, 31882 samples

[7/114] proband13/running
  ms/ms, absolute, 21 segments, 29961 samples

[8/114] proband13/climbingup
  ms/ms, absolute, 23 segments, 29031 samples

[9/114] proband9/jumping
  ms/ms, absolute, 4 segments, 4976 samples

[10/114] proband9/lying
  ms/ms, absolute, 15 segments, 30587 samples

[11/114] proband9/climbingdown
  ms/ms, absolute, 18 segments, 24302 samples

[12/114] proband9/sitting
  ms/ms, absolute, 24 segments, 31473 samples

[13/114] proband9/sta

In [5]:
# ================ Step 5: Gravity Removal / Detrending (Batch Processing) ================
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
from pathlib import Path
import json

print("\n\nStep 5: Gravity Removal / Detrending")
print("=" * 60)

interim_dir = Path('/content/interim')
proc_dir = Path('/content/proc')
proc_dir.mkdir(exist_ok=True)

TARGET_FS = 50
CUTOFF_HZ = 0.3

def highpass_filter(data, cutoff, fs, order=3):
    """Third-order Butterworth high-pass filter"""
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return filtfilt(b, a, data)

# Process all files
interim_files = sorted(interim_dir.glob('*.csv'))
print(f"Found {len(interim_files)} files")

all_static_means = []

for idx, filepath in enumerate(interim_files):
    print(f"\n[{idx+1}/{len(interim_files)}] {filepath.name}")

    df = pd.read_csv(filepath)
    print(f"  Original: {df.shape}, {df['segment_id'].nunique()} segments")

    processed_segments = []

    # Filter per segment
    for seg_id, seg_df in df.groupby('segment_id'):
        seg_df = seg_df.copy()

        # Accelerometer high-pass filtering
        for axis in ['x', 'y', 'z']:
            col = f'acc_{axis}'
            seg_df[col] = highpass_filter(seg_df[col].values, CUTOFF_HZ, TARGET_FS, order=3)

        # Gyroscope mean removal
        for axis in ['x', 'y', 'z']:
            col = f'gyro_{axis}'
            seg_df[col] = seg_df[col] - seg_df[col].mean()

        processed_segments.append(seg_df)

    df_filtered = pd.concat(processed_segments, ignore_index=True)

    # Validate static segment (from the longest segment)
    longest_seg = df_filtered.groupby('segment_id').size().idxmax()
    seg_for_verify = df_filtered[df_filtered['segment_id'] == longest_seg].reset_index(drop=True)

    window_size = TARGET_FS * 2
    acc_mag = np.sqrt(seg_for_verify['acc_x']**2 + seg_for_verify['acc_y']**2 + seg_for_verify['acc_z']**2)
    static_idx = acc_mag.rolling(window_size).std().idxmin()
    static_seg = seg_for_verify.iloc[static_idx:static_idx+window_size]

    static_means = {f'acc_{ax}': static_seg[f'acc_{ax}'].mean() for ax in ['x', 'y', 'z']}
    all_static_means.append({'file': filepath.name, **static_means})

    # Save
    df_filtered.to_csv(proc_dir / filepath.name, index=False)
    print(f"  ✓ {len(df_filtered)} samples → proc/{filepath.name}")

print(f"\n✓ Completed {len(interim_files)} files")

# Plot verification figure for the first file
if interim_files:
    first_file = interim_files[0]
    df = pd.read_csv(proc_dir / first_file.name)
    longest_seg = df.groupby('segment_id').size().idxmax()
    seg = df[df['segment_id'] == longest_seg].reset_index(drop=True)

    window_size = TARGET_FS * 2
    acc_mag = np.sqrt(seg['acc_x']**2 + seg['acc_y']**2 + seg['acc_z']**2)
    static_idx = acc_mag.rolling(window_size).std().idxmin()
    static_seg = seg.iloc[static_idx:static_idx+window_size]

    fig, axes = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
    time_sec = np.arange(len(seg)) / TARGET_FS

    for i, axis in enumerate(['x', 'y', 'z']):
        ax = axes[i]
        col = f'acc_{axis}'
        ax.plot(time_sec, seg[col], linewidth=0.5, alpha=0.7)
        ax.axhline(0, color='red', linestyle='--', linewidth=1, alpha=0.5)

        static_t = static_idx / TARGET_FS
        static_mean = static_seg[col].mean()
        ax.axvspan(static_t, static_t + 2, color='green', alpha=0.2,
                   label=f'Static (mean={static_mean:.4f})')

        ax.set_ylabel(f'ACC {axis.upper()} (m/s²)')
        ax.grid(alpha=0.3)
        ax.legend(loc='upper right')

    axes[-1].set_xlabel('Time (s)')
    axes[0].set_title(f'Detrended Signal - {first_file.name} (segment {longest_seg})')
    plt.tight_layout()
    plt.savefig('/content/figures/step5_detrend_verify.png', dpi=150)
    plt.close()
    print(f"\n✓ Verification figure: figures/step5_detrend_verify.png")

# Save parameters
filter_params = {
    'acc_highpass': {'cutoff_hz': CUTOFF_HZ, 'order': 3, 'filter_type': 'Butterworth'},
    'gyro_detrend': 'mean_removal',
    'sampling_rate': TARGET_FS,
    'filtering_method': 'per_segment',
    'files_processed': len(interim_files),
    'static_means_samples': all_static_means[:5]
}

with open('/content/logs/step5_filter_params.json', 'w') as f:
    json.dump(filter_params, f, indent=2)

get_ipython().system('git add figures/step5_detrend_verify.png logs/step5_filter_params.json proc/')
get_ipython().system('git commit -m "preproc: batch filtering for all files"')

print(f"\n{'='*60}\nStep 5 completed\n{'='*60}")



Step 5: Gravity Removal / Detrending
Found 112 files

[1/112] proband10_climbingdown_waist.csv
  Original: (21216, 10), 20 segments
  ✓ 21216 samples → proc/proband10_climbingdown_waist.csv

[2/112] proband10_climbingup_waist.csv
  Original: (22201, 10), 21 segments
  ✓ 22201 samples → proc/proband10_climbingup_waist.csv

[3/112] proband10_jumping_waist.csv
  Original: (5193, 10), 1 segments
  ✓ 5193 samples → proc/proband10_jumping_waist.csv

[4/112] proband10_lying_waist.csv
  Original: (31164, 10), 22 segments
  ✓ 31164 samples → proc/proband10_lying_waist.csv

[5/112] proband10_running_waist.csv
  Original: (31071, 10), 31 segments
  ✓ 31071 samples → proc/proband10_running_waist.csv

[6/112] proband10_sitting_waist.csv
  Original: (30836, 10), 32 segments
  ✓ 30836 samples → proc/proband10_sitting_waist.csv

[7/112] proband10_standing_waist.csv
  Original: (31946, 10), 27 segments
  ✓ 31946 samples → proc/proband10_standing_waist.csv

[8/112] proband10_walking_waist.csv
  Origin

In [6]:
# ================ Step 6: Class Mapping ================
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 6: Class Mapping")
print("=" * 60)

proc_dir = Path('/content/proc')
TARGET_FS = 50

# Fixed order of 8 standard classes (consistent across folds)
STANDARD_CLASSES = ['walking', 'running', 'sitting', 'standing',
                    'lying', 'stairs_up', 'stairs_down', 'jumping']

# Mapping from original activity names
activity_mapping = {
    'climbingdown': 'stairs_down',
    'climbingup': 'stairs_up',
    'jumping': 'jumping',
    'lying': 'lying',
    'running': 'running',
    'sitting': 'sitting',
    'standing': 'standing',
    'walking': 'walking'
}

# Sliding-window parameters (aligned with subsequent feature extraction)
WINDOW_SEC = 3
OVERLAP = 0.5
WINDOW_SAMPLES = int(TARGET_FS * WINDOW_SEC)
STRIDE_SAMPLES = int(WINDOW_SAMPLES * (1 - OVERLAP))
MIN_WINDOWS_THRESHOLD = 50

print(f"Sliding window: {WINDOW_SEC}s ({WINDOW_SAMPLES} samples), overlap {OVERLAP*100:.0f}%, stride {STRIDE_SAMPLES}")

# Scan files and count windows per segment
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"\nFound {len(proc_files)} files")

activity_stats = {}
proband_class_matrix = {}

for filepath in proc_files:
    df = pd.read_csv(filepath)

    # Prefer reading from columns
    activity = df['activity'].iloc[0] if 'activity' in df.columns else filepath.stem.split('_')[1]
    proband = df['proband'].iloc[0] if 'proband' in df.columns else filepath.stem.split('_')[0]

    # Count windows per segment (without crossing segments)
    n_windows = 0
    for _, seg in df.groupby('segment_id'):
        seg_len = len(seg)
        if seg_len >= WINDOW_SAMPLES:
            n_windows += 1 + (seg_len - WINDOW_SAMPLES) // STRIDE_SAMPLES

    # Accumulate statistics for original activities
    if activity not in activity_stats:
        activity_stats[activity] = {'samples': 0, 'windows': 0, 'files': 0}
    activity_stats[activity]['samples'] += len(df)
    activity_stats[activity]['windows'] += n_windows
    activity_stats[activity]['files'] += 1

    # Build proband × class matrix
    if activity in activity_mapping:
        std_act = activity_mapping[activity]
        if proband not in proband_class_matrix:
            proband_class_matrix[proband] = {c: 0 for c in STANDARD_CLASSES}
        proband_class_matrix[proband][std_act] += n_windows

print("\nOriginal activity statistics:")
for act in sorted(activity_stats.keys()):
    stats = activity_stats[act]
    print(f"  {act:15s}: {stats['files']:2d} files, {stats['samples']:6d} samples, {stats['windows']:4d} windows")

# Map to the 8 standard classes
mapped_stats = {c: {'windows': 0, 'samples': 0, 'files': 0, 'original_names': []}
                for c in STANDARD_CLASSES}
tail_classes_original = []

for orig_act, stats in activity_stats.items():
    if orig_act in activity_mapping:
        std_act = activity_mapping[orig_act]
        mapped_stats[std_act]['windows'] += stats['windows']
        mapped_stats[std_act]['samples'] += stats['samples']
        mapped_stats[std_act]['files'] += stats['files']
        if orig_act not in mapped_stats[std_act]['original_names']:
            mapped_stats[std_act]['original_names'].append(orig_act)

        if stats['windows'] < MIN_WINDOWS_THRESHOLD:
            tail_classes_original.append({'original': orig_act, 'mapped': std_act, 'windows': stats['windows']})

# Tail-class determination at the standard-class level
tail_standard_classes = [c for c in STANDARD_CLASSES if mapped_stats[c]['windows'] < MIN_WINDOWS_THRESHOLD]
included_flags = {c: (mapped_stats[c]['windows'] >= MIN_WINDOWS_THRESHOLD) for c in STANDARD_CLASSES}

print("\nStatistics for the 8 standard classes:")
for std_act in STANDARD_CLASSES:
    stats = mapped_stats[std_act]
    status = " [TAIL]" if std_act in tail_standard_classes else ""
    status = " [MISSING]" if stats['windows'] == 0 else status
    print(f"  {std_act:15s}: {stats['files']:2d} files, {stats['samples']:6d} samples, {stats['windows']:4d} windows{status}")

# Fixed encoding
label_to_id = {c: i for i, c in enumerate(STANDARD_CLASSES)}
id_to_label = {i: c for c, i in label_to_id.items()}

print("\nLabel encoding:")
for i, c in id_to_label.items():
    print(f"  {i}: {c}")

# Proband coverage matrix
print("\nProband × Class coverage (number of windows):")
print(f"{'Proband':<12}", end='')
for c in STANDARD_CLASSES:
    print(f"{c[:4]:>6}", end='')
print()
for p in sorted(proband_class_matrix.keys()):
    print(f"{p:<12}", end='')
    for c in STANDARD_CLASSES:
        cnt = proband_class_matrix[p][c]
        print(f"{cnt:>6}", end='')
    print()

# Save configuration
classes_config = {
    'standard_classes': STANDARD_CLASSES,
    'num_classes': len(STANDARD_CLASSES),
    'label_to_id': label_to_id,
    'id_to_label': id_to_label,
    'activity_mapping': activity_mapping,
    'window_config': {
        'window_size_sec': WINDOW_SEC,
        'window_samples': WINDOW_SAMPLES,
        'overlap': OVERLAP,
        'stride_samples': STRIDE_SAMPLES,
        'sampling_rate_hz': TARGET_FS
    },
    'statistics': {
        'per_class': {c: {**mapped_stats[c], 'id': label_to_id[c]} for c in STANDARD_CLASSES},
        'tail_classes_original': tail_classes_original,
        'tail_standard_classes': tail_standard_classes,
        'included_flags': included_flags,
        'min_windows_threshold': MIN_WINDOWS_THRESHOLD,
        'proband_coverage': proband_class_matrix
    }
}

with open('/content/configs/classes.json', 'w') as f:
    json.dump(classes_config, f, indent=2)

print(f"\n✓ Class configuration saved: configs/classes.json")

if tail_standard_classes:
    print(f"\n⚠️ Tail classes at the standard level (windows < {MIN_WINDOWS_THRESHOLD}): {tail_standard_classes}")

included_classes = [c for c in STANDARD_CLASSES if included_flags[c]]
print(f"✓ Classes included for training ({len(included_classes)}/{len(STANDARD_CLASSES)}): {included_classes}")

get_ipython().system('git add configs/classes.json')
get_ipython().system('git commit -m "data: add standard-level tail classes and inclusion flags"')

print(f"\n{'='*60}\nStep 6 completed\n{'='*60}")



Step 6: Class Mapping
Sliding window: 3s (150 samples), overlap 50%, stride 75

Found 112 files

Original activity statistics:
  climbingdown   : 12 files, 284118 samples, 3425 windows
  climbingup     : 12 files, 357605 samples, 4331 windows
  jumping        : 15 files,  70663 samples,  842 windows
  lying          : 14 files, 436907 samples, 5343 windows
  running        : 15 files, 518843 samples, 6230 windows
  sitting        : 14 files, 433818 samples, 5259 windows
  standing       : 15 files, 459881 samples, 5574 windows
  walking        : 15 files, 468686 samples, 5618 windows

Statistics for the 8 standard classes:
  walking        : 15 files, 468686 samples, 5618 windows
  running        : 15 files, 518843 samples, 6230 windows
  sitting        : 14 files, 433818 samples, 5259 windows
  standing       : 15 files, 459881 samples, 5574 windows
  lying          : 14 files, 436907 samples, 5343 windows
  stairs_up      : 12 files, 357605 samples, 4331 windows
  stairs_down    : 

In [7]:
# ================ Step 7: LOSO Subject Splits ================
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 7: LOSO Subject Splits")
print("=" * 60)

proc_dir = Path('/content/proc')

# Scan all files and extract subjects
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"Found {len(proc_files)} files")

subjects = set()
file_subject_map = {}

for filepath in proc_files:
    df = pd.read_csv(filepath)
    subject = df['proband'].iloc[0] if 'proband' in df.columns else filepath.stem.split('_')[0]
    subjects.add(subject)
    file_subject_map[filepath.name] = subject

subjects = sorted(subjects)
print(f"\n✓ Total subjects: {len(subjects)}")
print(f"Subject list: {subjects}")

# Create LOSO folds
loso_splits = []

for fold_id, test_subject in enumerate(subjects):
    train_subjects = [s for s in subjects if s != test_subject]

    loso_splits.append({
        'fold': fold_id,
        'test_subject': test_subject,
        'train_subjects': train_subjects,
        'n_train': len(train_subjects),
        'n_test': 1
    })

    print(f"\nFold {fold_id}: Test={test_subject}, Train={train_subjects}")

# Save as CSV
splits_csv = []
for split in loso_splits:
    splits_csv.append({
        'fold': split['fold'],
        'test_subject': split['test_subject'],
        'train_subjects': ','.join(split['train_subjects']),
        'n_train': split['n_train'],
        'n_test': split['n_test']
    })

df_splits = pd.DataFrame(splits_csv)
df_splits.to_csv('/content/logs/splits.csv', index=False)
print(f"\n✓ Splits saved: logs/splits.csv")
print("\n" + df_splits.to_string(index=False))

# Save as JSON (for convenient downstream loading)
splits_config = {
    'split_method': 'LOSO',
    'n_folds': len(subjects),
    'subjects': subjects,
    'file_subject_map': file_subject_map,
    'folds': loso_splits
}

with open('/content/configs/splits.json', 'w') as f:
    json.dump(splits_config, f, indent=2)

print(f"\n✓ Split configuration saved: configs/splits.json")

# Validation: each subject is used exactly once as test set
test_subjects_count = pd.Series([s['test_subject'] for s in loso_splits]).value_counts()
assert (test_subjects_count == 1).all(), "Each subject should appear exactly once as the test set"
print(f"\n✓ Validation passed: each subject appears exactly once as the test set")

get_ipython().system('git add logs/splits.csv configs/splits.json')
get_ipython().system('git commit -m "split: create LOSO folds (leave-one-subject-out)"')

print(f"\n{'='*60}\nStep 7 completed\n{'='*60}")



Step 7: LOSO Subject Splits
Found 112 files

✓ Total subjects: 15
Subject list: ['proband1', 'proband10', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 0: Test=proband1, Train=['proband10', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 1: Test=proband10, Train=['proband1', 'proband11', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 2: Test=proband11, Train=['proband1', 'proband10', 'proband12', 'proband13', 'proband14', 'proband15', 'proband2', 'proband3', 'proband4', 'proband5', 'proband6', 'proband7', 'proband8', 'proband9']

Fold 3: Test=proband12, Train=['proband1', 'proband10', 'proband11', 'proband13', 'proband14', 'proband15', 'proban

In [8]:
# ================ Step 8: Sliding Windowing and Label Assignment ================
import numpy as np
import pandas as pd
from pathlib import Path
import json
from collections import defaultdict

print("\n\nStep 8: Sliding Windowing and Label Assignment")
print("=" * 60)

# Load configuration
with open('/content/configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)

with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

proc_dir = Path('/content/proc')
features_dir = Path('/content/features')
features_dir.mkdir(exist_ok=True)

# Window parameters
WINDOW_SEC = 3
OVERLAP = 0.5
TARGET_FS = 50
WINDOW_SAMPLES = int(TARGET_FS * WINDOW_SEC)
STRIDE_SAMPLES = int(WINDOW_SAMPLES * (1 - OVERLAP))
DOMINANT_THRESHOLD = 0.8

label_to_id = classes_cfg['label_to_id']

print(f"Window parameters: {WINDOW_SEC}s ({WINDOW_SAMPLES} samples), overlap {OVERLAP*100:.0f}%, stride {STRIDE_SAMPLES}")
print(f"Dominant-label threshold: {DOMINANT_THRESHOLD*100:.0f}%\n")

# Process each file to generate all windows
proc_files = sorted(proc_dir.glob('*.csv'))
print(f"Processing {len(proc_files)} files...\n")

all_windows = []
discarded_windows = 0

for file_idx, filepath in enumerate(proc_files):
    df = pd.read_csv(filepath)

    subject = df['proband'].iloc[0]
    activity = df['activity'].iloc[0]
    std_label = classes_cfg['activity_mapping'].get(activity, activity)
    label_id = label_to_id[std_label]

    file_windows = 0
    for seg_id, seg_df in df.groupby('segment_id'):
        seg_df = seg_df.reset_index(drop=True)
        seg_len = len(seg_df)

        if seg_len < WINDOW_SAMPLES:
            continue

        for start_idx in range(0, seg_len - WINDOW_SAMPLES + 1, STRIDE_SAMPLES):
            end_idx = start_idx + WINDOW_SAMPLES
            window = seg_df.iloc[start_idx:end_idx]

            # Check dominant label
            window_labels = window['activity'].values
            unique_labels, counts = np.unique(window_labels, return_counts=True)
            dominant_idx = counts.argmax()
            dominant_label = unique_labels[dominant_idx]
            dominant_ratio = counts[dominant_idx] / len(window_labels)

            if dominant_ratio < DOMINANT_THRESHOLD:
                discarded_windows += 1
                continue

            # Save window
            window_data = {
                'subject': subject,
                'activity': std_label,
                'label': label_id,
                'file': filepath.name,
                'segment_id': seg_id,
                'start_idx': start_idx,
                'dominant_ratio': dominant_ratio
            }

            for col in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']:
                window_data[col] = window[col].values.tolist()

            all_windows.append(window_data)
            file_windows += 1

    print(f"[{file_idx+1}/{len(proc_files)}] {filepath.name}: {file_windows} windows ({std_label}, {subject})")

print(f"\n✓ Total windows: {len(all_windows)}")
print(f"✓ Discarded windows: {discarded_windows} (dominant label < {DOMINANT_THRESHOLD*100:.0f}%)")

# Save window metadata (excluding sensor data)
windows_meta = pd.DataFrame([{k: v for k, v in w.items()
                              if k not in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']}
                             for w in all_windows])

# Add window IDs
windows_meta['window_id'] = (windows_meta['file'] + ':' +
                              windows_meta['segment_id'].astype(str) + ':' +
                              windows_meta['start_idx'].astype(str))

windows_meta.to_csv(features_dir / 'windows_meta.csv', index=False)
print(f"\n✓ Global window metadata: features/windows_meta.csv")

# Save complete window data
with open(features_dir / 'windows_raw.json', 'w') as f:
    json.dump(all_windows, f)
print(f"✓ Raw window data: features/windows_raw.json")

# Generate train/test split per fold
print("\n" + "="*60)
print("Generate train/test splits per fold:")
print("="*60)

per_fold_totals = []

for fold in splits_cfg['folds']:
    k = fold['fold']
    test_subj = fold['test_subject']

    # Mark train/test
    fold_meta = windows_meta.copy()
    fold_meta['fold'] = k
    fold_meta['split'] = np.where(fold_meta['subject'] == test_subj, 'test', 'train')

    # Save metadata for this fold
    fold_meta.to_csv(features_dir / f'windows_meta_fold{k}.csv', index=False)

    # Per-fold statistics
    stats = fold_meta.groupby(['split', 'activity', 'subject']).size().reset_index(name='windows')
    stats.to_csv(f'/content/logs/window_stats_fold{k}.csv', index=False)

    n_train = int((fold_meta['split'] == 'train').sum())
    n_test = int((fold_meta['split'] == 'test').sum())

    per_fold_totals.append({
        'fold': k,
        'test_subject': test_subj,
        'n_train_windows': n_train,
        'n_test_windows': n_test,
        'n_total': n_train + n_test
    })

    print(f"Fold {k}: Train={n_train}, Test={n_test}, test subject={test_subj}")

# Save fold-level summary
df_fold_totals = pd.DataFrame(per_fold_totals)
df_fold_totals.to_csv('/content/logs/window_fold_totals.csv', index=False)
print(f"\n✓ Fold-level summary: logs/window_fold_totals.csv")

# Global summary
summary = {
    'total_windows': len(all_windows),
    'discarded_windows': discarded_windows,
    'window_params': {
        'window_size_sec': WINDOW_SEC,
        'window_samples': WINDOW_SAMPLES,
        'overlap': OVERLAP,
        'stride_samples': STRIDE_SAMPLES,
        'dominant_threshold': DOMINANT_THRESHOLD
    },
    'per_class_totals': windows_meta.groupby('activity')['window_id'].count().to_dict(),
    'per_subject_totals': windows_meta.groupby('subject')['window_id'].count().to_dict()
}

with open('/content/logs/window_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("\nGlobal statistics:")
print(f"  Per class: {summary['per_class_totals']}")
print(f"  Per subject: {summary['per_subject_totals']}")

get_ipython().system('git add features/ logs/window_*.csv logs/window_*.json')
get_ipython().system('git commit -m "feature: windowing with per-fold train/test splits"')

print(f"\n{'='*60}\nStep 8 completed\n{'='*60}")



Step 8: Sliding Windowing and Label Assignment
Window parameters: 3s (150 samples), overlap 50%, stride 75
Dominant-label threshold: 80%

Processing 112 files...

[1/112] proband10_climbingdown_waist.csv: 254 windows (stairs_down, proband10)
[2/112] proband10_climbingup_waist.csv: 264 windows (stairs_up, proband10)
[3/112] proband10_jumping_waist.csv: 68 windows (jumping, proband10)
[4/112] proband10_lying_waist.csv: 384 windows (lying, proband10)
[5/112] proband10_running_waist.csv: 367 windows (running, proband10)
[6/112] proband10_sitting_waist.csv: 366 windows (sitting, proband10)
[7/112] proband10_standing_waist.csv: 388 windows (standing, proband10)
[8/112] proband10_walking_waist.csv: 372 windows (walking, proband10)
[9/112] proband11_climbingdown_waist.csv: 293 windows (stairs_down, proband11)
[10/112] proband11_climbingup_waist.csv: 367 windows (stairs_up, proband11)
[11/112] proband11_jumping_waist.csv: 53 windows (jumping, proband11)
[12/112] proband11_lying_waist.csv: 396

In [9]:
# ================ Step 9: Per-Fold Standardization (Performance-Optimized) ================
import numpy as np
import pandas as pd
from pathlib import Path
import json

print("\n\nStep 9: Per-Fold Standardization (z-score)")
print("=" * 60)

# Load configuration
with open('/content/configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

# Load window data
with open('/content/features/windows_raw.json', 'r') as f:
    all_windows = json.load(f)

features_dir = Path('/content/features')
proc_dir = Path('/content/proc')

CHANNELS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
EPS = 1e-8

print(f"Channels: {CHANNELS}")
print(f"Total windows: {len(all_windows)}\n")

scaler_summary = []

for fold in splits_cfg['folds']:
    k = fold['fold']
    test_subj = fold['test_subject']

    print(f"\nFold {k}: test subject={test_subj}")

    fold_meta = pd.read_csv(features_dir / f'windows_meta_fold{k}.csv')
    assert len(all_windows) == len(fold_meta), f"Window count mismatch: {len(all_windows)} vs {len(fold_meta)}"

    train_indices = set(fold_meta[fold_meta['split'] == 'train'].index.tolist())
    test_indices = set(fold_meta[fold_meta['split'] == 'test'].index.tolist())

    print(f"  Train windows: {len(train_indices)}, Test windows: {len(test_indices)}")

    # Vectorized collection of training data
    train_data = {ch: [] for ch in CHANNELS}
    for idx in train_indices:
        window = all_windows[idx]
        for ch in CHANNELS:
            train_data[ch].extend(window[ch])

    # Convert to NumPy arrays and compute parameters
    scaler_params = {}
    for ch in CHANNELS:
        data = np.array(train_data[ch], dtype=np.float32)
        mean = float(data.mean())
        std = float(max(data.std(), EPS))
        scaler_params[ch] = {'mean': mean, 'std': std}

    print(f"  Scaler parameters:")
    for ch in CHANNELS:
        print(f"    {ch}: mean={scaler_params[ch]['mean']:.4f}, std={scaler_params[ch]['std']:.4f}")

    # Vectorized standardization and save as NPZ
    norm_data = {
        'window_ids': [],
        'subjects': [],
        'activities': [],
        'labels': [],
        'splits': []
    }
    for ch in CHANNELS:
        norm_data[ch] = []

    train_norm = {ch: [] for ch in CHANNELS}
    test_norm = {ch: [] for ch in CHANNELS}

    for idx in range(len(all_windows)):
        window = all_windows[idx]

        if idx in train_indices:
            split = 'train'
        elif idx in test_indices:
            split = 'test'
        else:
            continue

        norm_data['window_ids'].append(fold_meta.loc[idx, 'window_id'])
        norm_data['subjects'].append(window['subject'])
        norm_data['activities'].append(window['activity'])
        norm_data['labels'].append(window['label'])
        norm_data['splits'].append(split)

        for ch in CHANNELS:
            data = np.array(window[ch], dtype=np.float32)
            normalized = (data - scaler_params[ch]['mean']) / scaler_params[ch]['std']
            norm_data[ch].append(normalized)

            # Collect statistics for validation
            if split == 'train':
                train_norm[ch].extend(normalized)
            else:
                test_norm[ch].extend(normalized)

    # Post-standardization validation: training set
    print(f"  Training-set validation after standardization:")
    for ch in CHANNELS:
        mean_val = np.mean(train_norm[ch])
        std_val = np.std(train_norm[ch])
        print(f"    {ch}: mean={mean_val:.6f}, std={std_val:.6f}")

    # Post-standardization validation: test set
    print(f"  Test-set validation after standardization:")
    for ch in CHANNELS:
        if test_norm[ch]:
            mean_val = np.mean(test_norm[ch])
            print(f"    {ch}: mean={mean_val:.6f}")

    # Persist scaler parameters
    scaler_file = proc_dir / f'scaler_fold{k}.npz'
    np.savez(scaler_file, **{f'{ch}_mean': scaler_params[ch]['mean'] for ch in CHANNELS},
                          **{f'{ch}_std': scaler_params[ch]['std'] for ch in CHANNELS})

    # Persist standardized windows as NPZ (float32)
    norm_file = features_dir / f'windows_normalized_fold{k}.npz'
    np.savez_compressed(norm_file,
                       window_ids=np.array(norm_data['window_ids']),
                       subjects=np.array(norm_data['subjects']),
                       activities=np.array(norm_data['activities']),
                       labels=np.array(norm_data['labels'], dtype=np.int32),
                       splits=np.array(norm_data['splits']),
                       **{ch: np.array(norm_data[ch], dtype=np.float32) for ch in CHANNELS})

    print(f"  ✓ Saved: {scaler_file.name}, {norm_file.name}")

    scaler_summary.append({
        'fold': k,
        'test_subject': test_subj,
        'n_train': len(train_indices),
        'n_test': len(test_indices),
        'scaler_params': scaler_params
    })

with open('/content/logs/scaler_summary.json', 'w') as f:
    json.dump(scaler_summary, f, indent=2)

print(f"\n{'='*60}")
print(f"✓ Completed standardization across {len(splits_cfg['folds'])} folds")
print(f"✓ Scaler parameters: proc/scaler_fold*.npz")
print(f"✓ Standardized data: features/windows_normalized_fold*.npz (NPZ/float32)")
print(f"✓ Summary: logs/scaler_summary.json")

get_ipython().system('git add proc/scaler_fold*.npz features/windows_normalized_fold*.npz logs/scaler_summary.json')
get_ipython().system('git commit -m "preproc: optimized z-score with NPZ storage and validation"')

print(f"\n{'='*60}\nStep 9 completed\n{'='*60}")



Step 9: Per-Fold Standardization (z-score)
Channels: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
Total windows: 36622


Fold 0: test subject=proband1
  Train windows: 34727, Test windows: 1895
  Scaler parameters:
    acc_x: mean=-0.0001, std=3.8156
    acc_y: mean=0.0000, std=1.8273
    acc_z: mean=0.0001, std=2.0051
    gyro_x: mean=-0.0001, std=0.5433
    gyro_y: mean=-0.0000, std=0.6868
    gyro_z: mean=-0.0001, std=0.3573
  Training-set validation after standardization:
    acc_x: mean=0.000000, std=1.000000
    acc_y: mean=0.000000, std=1.000000
    acc_z: mean=-0.000000, std=1.000000
    gyro_x: mean=0.000000, std=1.000000
    gyro_y: mean=0.000000, std=1.000000
    gyro_z: mean=0.000000, std=1.000000
  Test-set validation after standardization:
    acc_x: mean=-0.000124
    acc_y: mean=0.000252
    acc_z: mean=0.000556
    gyro_x: mean=0.001704
    gyro_y: mean=-0.000234
    gyro_z: mean=0.000859
  ✓ Saved: scaler_fold0.npz, windows_normalized_fold0.npz

Fold 1:

In [11]:
# =============================================
# Step 10: rTsfNet (our self-developed lightweight version of rTsfNet) Training and Evaluation
# =============================================
import os, json, random, math, warnings
warnings.filterwarnings("ignore")

# !pip -q install "tensorflow==2.15.1"   # Uncomment in Colab if needed
import numpy as np
import pandas as pd
from pathlib import Path

import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import (
    Dense, Dropout, LayerNormalization, LeakyReLU,
    Layer, Lambda, Flatten, GlobalAveragePooling1D, Activation
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2

from sklearn.metrics import f1_score, accuracy_score

# ---- Random seed ----
SEED = 42
tf.random.set_seed(SEED); np.random.seed(SEED); random.seed(SEED)

print("\n\nStep 10: Model Training and Evaluation (our self-developed lightweight version of rTsfNet)")
print("=" * 76)

# ==================== Configurable parameters ====================
FS = 50.0                 # Sampling rate (Hz)
IMU_ROT_HEADS = 2         # Number of multi-head 3D rotation heads
MLP_BASE = 128            # Base width of the classification head
MLP_DEPTH = 3             # Number of layers in the classification head
DROPOUT = 0.5
LR = 1e-3
WEIGHT_DECAY = 1e-6

BOOTSTRAP_EPOCHS = 150
TOTAL_EPOCHS = 350
BATCH_SIZE = 32
PATIENCE = 50
USE_ORIG_INPUT = True     # Whether to include the original input (+L2) as one stream

# ==================== Directories and configuration ====================
BASE = Path('/content')
features_dir = BASE/'features'
models_dir = BASE/'models'
models_dir.mkdir(parents=True, exist_ok=True)

with open(BASE/'configs/classes.json', 'r') as f:
    classes_cfg = json.load(f)
with open(BASE/'configs/splits.json', 'r') as f:
    splits_cfg = json.load(f)

n_classes = classes_cfg['num_classes']
print(f"\nNumber of classes: {n_classes}")
print(f"Class list: {classes_cfg['standard_classes']}")

# ==================== Data loading ====================
def load_fold_data(fold_k, features_dir: Path):
    npz_file = features_dir / f'windows_normalized_fold{fold_k}.npz'
    data = np.load(npz_file, allow_pickle=True)

    X = np.stack([
        data['acc_x'], data['acc_y'], data['acc_z'],
        data['gyro_x'], data['gyro_y'], data['gyro_z']
    ], axis=-1)  # [N, T, 6]

    y = data['labels']
    splits = data['splits']

    train_mask = splits == 'train'
    test_mask  = splits == 'test'

    return X[train_mask], y[train_mask], X[test_mask], y[test_mask]

# ==================== TSF (time/frequency-domain) layer ====================
class TSFFeatureLayer(Layer):
    """
    Input: [B, T, C]  Output: [B, C, F]
    Time domain: mean/std/max/min/ptp/rms/energy/skew/kurt/zcr/ar1/ar2
    Frequency domain: centroid/entropy/flatness/soft-peak frequency + bandpower (0.5–3 / 3–8 / 8–15 Hz)
    """
    def __init__(self, fs=50.0, **kwargs):
        super().__init__(**kwargs)
        self.fs = float(fs)
        self.eps = 1e-8

    def get_config(self):
        cfg = super().get_config(); cfg.update({'fs': self.fs}); return cfg

    def call(self, x):  # x: [B, T, C]
        mean = tf.reduce_mean(x, axis=1, keepdims=True)
        std  = tf.math.reduce_std(x, axis=1, keepdims=True) + self.eps

        maxv = tf.reduce_max(x, axis=1, keepdims=True)
        minv = tf.reduce_min(x, axis=1, keepdims=True)
        ptp  = maxv - minv
        rms  = tf.sqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True))
        energy = tf.reduce_sum(tf.square(x), axis=1, keepdims=True)

        skew = tf.reduce_mean(tf.pow((x-mean)/std, 3), axis=1, keepdims=True)
        kurt = tf.reduce_mean(tf.pow((x-mean)/std, 4), axis=1, keepdims=True)

        signs = tf.sign(x)
        sign_changes = tf.abs(signs[:,1:,:] - signs[:,:-1,:])
        zcr = tf.reduce_mean(sign_changes, axis=1, keepdims=True) / 2.0

        x_t1 = x[:,:-1,:]; x_tn1 = x[:,1:,:]
        ar1 = tf.reduce_sum(x_t1*x_tn1, axis=1, keepdims=True) / (tf.reduce_sum(tf.square(x_t1), axis=1, keepdims=True) + self.eps)

        x_t2 = x[:,:-2,:]; x_tn2 = x[:,2:,:]
        ar2 = tf.reduce_sum(x_t2*x_tn2, axis=1, keepdims=True) / (tf.reduce_sum(tf.square(x_t2), axis=1, keepdims=True) + self.eps)

        # Frequency domain
        xc = x - mean
        x_bc_t = tf.transpose(xc, [0,2,1])               # [B, C, T]
        fft = tf.signal.rfft(x_bc_t)                     # [B, C, F]
        power = tf.square(tf.abs(fft)) + self.eps        # [B, C, F]
        power = tf.transpose(power, [0,2,1])             # [B, F, C]

        F = tf.shape(power)[1]
        freqs = tf.linspace(0.0, tf.cast(self.fs, tf.float32)/2.0, F)  # [F]
        freqs = tf.reshape(freqs, [1, F, 1])                           # [1, F, 1]

        p = power / (tf.reduce_sum(power, axis=1, keepdims=True) + self.eps)
        centroid = tf.reduce_sum(p * freqs, axis=1, keepdims=True)     # [B, 1, C]
        entropy  = -tf.reduce_sum(p * tf.math.log(p + self.eps), axis=1, keepdims=True) / \
                   (tf.math.log(tf.cast(F, tf.float32) + self.eps))

        geo = tf.exp(tf.reduce_mean(tf.math.log(power), axis=1, keepdims=True))
        ari = tf.reduce_mean(power, axis=1, keepdims=True)
        flatness = geo / (ari + self.eps)

        temp = 10.0
        w = tf.nn.softmax(power * temp, axis=1)                        # [B, F, C]
        soft_peak = tf.reduce_sum(w * freqs, axis=1, keepdims=True)    # [B, 1, C]

        def band(low, high):
            mask = tf.cast((freqs >= low) & (freqs < high), tf.float32)
            bp = tf.reduce_sum(power * mask, axis=1, keepdims=True) / (tf.reduce_sum(power, axis=1, keepdims=True) + self.eps)
            return bp
        bp1 = band(0.5, 3.0)
        bp2 = band(3.0, 8.0)
        bp3 = band(8.0, 15.0)

        feats = [mean, std, maxv, minv, ptp, rms, energy, skew, kurt, zcr, ar1, ar2,
                 centroid, entropy, flatness, soft_peak, bp1, bp2, bp3]   # each [B,1,C]
        res = tf.concat(feats, axis=1)                                   # [B, Fnum, C]
        return tf.transpose(res, [0,2,1])                                # [B, C, Fnum]

# ==================== 3D rotation (multi-head, independent) ====================
class Multihead3DRotation(Layer):
    """
    Input [B, T, 6] (ACC + GYR), output: a list of length head_nums, each element is [B, T, 6].
    Key fixes: construct I via tile, keep R with shape [B, 3, 3]; use matmul + transpose instead of einsum.
    """
    def __init__(self, head_nums=2, base_kn=64, param_depth=2, **kwargs):
        super().__init__(**kwargs)
        self.head_nums = head_nums
        self.base_kn = base_kn
        self.param_depth = param_depth
        self.eps = 1e-8

        # Sub-layers
        self.gap = GlobalAveragePooling1D()
        self.mlp = [Dense(self.base_kn, activation='relu') for _ in range(self.param_depth)]
        self.out_heads = [Dense(4, activation='tanh') for _ in range(self.head_nums)]

    def get_config(self):
        cfg = super().get_config()
        cfg.update({'head_nums': self.head_nums, 'base_kn': self.base_kn, 'param_depth': self.param_depth})
        return cfg

    def compute_output_shape(self, input_shape):
        # Return a list containing head_nums outputs, each with the same spatial shape as the input
        return [tf.TensorShape(input_shape) for _ in range(self.head_nums)]

    def _axis_angle_to_R(self, axis_raw, angle_raw):
        # axis_raw: [B,3], angle_raw: [B,1] in (-1,1)
        axis = axis_raw / (tf.norm(axis_raw, axis=-1, keepdims=True) + self.eps)
        theta = angle_raw * math.pi                                       # [B,1]
        B = tf.shape(axis)[0]

        ux, uy, uz = axis[:,0], axis[:,1], axis[:,2]
        z = tf.zeros_like(ux)
        K = tf.stack([ z, -uz,  uy,
                       uz,  z, -ux,
                      -uy,  ux,  z], axis=-1)                             # [B*9]
        K = tf.reshape(K, [B,3,3])                                        # [B,3,3]

        I3 = tf.eye(3, dtype=axis.dtype)                                  # [3,3]
        I  = tf.tile(I3[None, ...], [B,1,1])                              # [B,3,3]

        u = tf.expand_dims(axis, -1)                                      # [B,3,1]
        uuT = tf.matmul(u, u, transpose_b=True)                           # [B,3,3]

        cos = tf.reshape(tf.cos(theta), [-1,1,1])                         # [B,1,1]
        sin = tf.reshape(tf.sin(theta), [-1,1,1])                         # [B,1,1]

        R = cos*I + (1.0 - cos)*uuT + sin*K                               # [B,3,3]
        return R

    def call(self, x):   # x: [B, T, 6]
        acc, gyr = x[:,:,:3], x[:,:,3:6]
        pooled = self.gap(x)                                              # [B, 6]

        # Shared representation -> each head predicts 4 parameters independently
        h = pooled
        for layer in self.mlp:
            h = layer(h)

        out_list = []
        for oh in self.out_heads:
            p = oh(h)                                                     # [B, 4]
            axis = p[:,:3]
            angle = tf.expand_dims(p[:,3], -1)                            # [B,1]
            R = self._axis_angle_to_R(axis, angle)                        # [B,3,3]

            # R @ acc/gyr: use matmul + transpose to avoid einsum shape inference issues
            acc_t = tf.transpose(acc, [0,2,1])                            # [B,3,T]
            acc_rot_t = tf.matmul(R, acc_t)                               # [B,3,T]
            acc_rot = tf.transpose(acc_rot_t, [0,2,1])                    # [B,T,3]

            gyr_t = tf.transpose(gyr, [0,2,1])                            # [B,3,T]
            gyr_rot_t = tf.matmul(R, gyr_t)                               # [B,3,T]
            gyr_rot = tf.transpose(gyr_rot_t, [0,2,1])                    # [B,T,3]

            out_list.append(tf.concat([acc_rot, gyr_rot], axis=-1))       # [B,T,6]
        return out_list

# ==================== Utility: concatenate L2 channels ====================
def add_l2_channels(x):     # x: [B, T, 6]
    acc = x[:,:,:3]; gyr = x[:,:,3:6]
    l2_acc = tf.sqrt(tf.reduce_sum(tf.square(acc), axis=-1, keepdims=True))
    l2_gyr = tf.sqrt(tf.reduce_sum(tf.square(gyr), axis=-1, keepdims=True))
    return tf.concat([x, l2_acc, l2_gyr], axis=-1)  # [B, T, 8]

# ==================== Main body of rTsfNet ====================
def r_tsf_net(x_shape, n_classes,
              learning_rate=1e-3, base_kn=128, depth=3, dropout_rate=0.5,
              imu_rot_heads=2, fs=50.0, use_orig_input=True):

    inputs = Input(shape=x_shape[1:])     # [T, 6]
    x = inputs

    # Multi-head 3D rotation (independent heads)
    rot_layer = Multihead3DRotation(head_nums=imu_rot_heads, base_kn=64, param_depth=2, name='multihead_rot')
    rotated_list = rot_layer(x)   # list of [B, T, 6]

    # Choose whether to keep the original input
    streams = []
    if use_orig_input:
        streams.append(Lambda(add_l2_channels, name='orig_plus_l2')(x))
    for i, xr in enumerate(rotated_list):
        streams.append(Lambda(add_l2_channels, name=f'rot{i}_plus_l2')(xr))

    # Concatenate all streams along the channel dimension
    concat_streams = Lambda(lambda lst: tf.concat(lst, axis=-1), name='concat_streams')(streams)  # [B,T,8*(1+heads)]

    # TSF feature extraction: output shape [B, C_total, F]
    tsf = TSFFeatureLayer(fs=fs, name='tsf')(concat_streams)

    # Flatten + pure MLP classification head (LayerNorm epsilon = 1e-7 to match the official implementation)
    z = Flatten(name='flatten')(tsf)
    for k in range(depth-1, -1, -1):
        z = Dense(MLP_BASE*(2**k), kernel_regularizer=l2(WEIGHT_DECAY), name=f'fc_{k}')(z)
        z = LayerNormalization(epsilon=1e-7, name=f'ln_{k}')(z)
        z = LeakyReLU(name=f'lrelu_{k}')(z)
        z = Dropout(dropout_rate, name=f'drop_{k}')(z)

    logits = Dense(n_classes, kernel_regularizer=l2(WEIGHT_DECAY), name='logits')(z)
    probs  = Activation('softmax', dtype='float32', name='softmax')(logits)

    model = Model(inputs, probs, name='rTsfNet_officially_aligned_fixed')

    opt = Adam(learning_rate=learning_rate, amsgrad=True)
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=opt,
        metrics=['accuracy']
    )
    return model

def _history_to_df(hist_obj):
    """Convert a Keras History object to a DataFrame and standardize acc/val_acc to accuracy/val_accuracy."""
    d = dict(hist_obj.history)
    if 'acc' in d and 'accuracy' not in d:
        d['accuracy'] = d.pop('acc')
    if 'val_acc' in d and 'val_accuracy' not in d:
        d['val_accuracy'] = d.pop('val_acc')
    return pd.DataFrame(d)

# ==================== Loop over all folds for training ====================
all_results = []

for FOLD_TO_TRAIN in range(0, 15):
    print(f"\nTraining fold {FOLD_TO_TRAIN} (test subject: {splits_cfg['folds'][FOLD_TO_TRAIN]['test_subject']})")
    print(f"Bootstrap epochs: {BOOTSTRAP_EPOCHS}, total epochs: {TOTAL_EPOCHS}, patience: {PATIENCE}")
    print("=" * 76)

    X_train, y_train, X_test, y_test = load_fold_data(FOLD_TO_TRAIN, features_dir)
    print(f"Train set: {X_train.shape}, test set: {X_test.shape}")

    model = r_tsf_net(
        x_shape=X_train.shape,
        n_classes=n_classes,
        learning_rate=LR,
        base_kn=MLP_BASE,
        depth=MLP_DEPTH,
        dropout_rate=DROPOUT,
        imu_rot_heads=IMU_ROT_HEADS,
        fs=FS,
        use_orig_input=USE_ORIG_INPUT
    )

    print(f"\nTotal number of model parameters: {model.count_params():,}")
    model.summary(line_length=140)

    print(f"\nStage 1: Bootstrap training ({BOOTSTRAP_EPOCHS} epochs)...")
    history1 = model.fit(
        X_train, y_train,
        batch_size=BATCH_SIZE,
        epochs=BOOTSTRAP_EPOCHS,
        validation_data=(X_test, y_test),
        verbose=1
    )

    print(f"\nStage 2: Full training (additional {TOTAL_EPOCHS - BOOTSTRAP_EPOCHS} epochs)...")
    early_stop = EarlyStopping(monitor='val_accuracy', patience=PATIENCE, restore_best_weights=True, verbose=1)
    reduce_lr  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6, verbose=1)

    history2 = model.fit(
        X_train, y_train,
        batch_size=BATCH_SIZE,
        epochs=TOTAL_EPOCHS - BOOTSTRAP_EPOCHS,
        validation_data=(X_test, y_test),
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )

    y_pred = model.predict(X_test, verbose=0)
    y_hat  = np.argmax(y_pred, axis=1)

    test_acc         = accuracy_score(y_test, y_hat)
    test_f1_macro    = f1_score(y_test, y_hat, average='macro')
    test_f1_weighted = f1_score(y_test, y_hat, average='weighted')

    print("\n" + "="*76)
    print(f"Fold {FOLD_TO_TRAIN} final evaluation:")
    print(f"  Accuracy: {test_acc*100:.2f}%")
    print(f"  LOSO Macro-F1: {test_f1_macro:.4f}")
    print(f"  Weighted F1: {test_f1_weighted:.4f}")
    print("="*76)

    model_path = models_dir / f'model_fold{FOLD_TO_TRAIN}.weights.h5'
    model.save_weights(model_path)
    print(f"\n✓ Model weights saved to: {model_path}")

    h1_df = _history_to_df(history1)
    h1_df['epoch'] = np.arange(1, len(h1_df)+1)
    h1_df['phase'] = 'bootstrap'

    h2_df = _history_to_df(history2)
    h2_df['epoch'] = np.arange(len(h1_df)+1, len(h1_df)+len(h2_df)+1)
    h2_df['phase'] = 'stage2'

    hist_df = pd.concat([h1_df, h2_df], ignore_index=True, sort=True)

    front_cols = [c for c in ['epoch', 'phase'] if c in hist_df.columns]
    hist_df = hist_df[front_cols + [c for c in hist_df.columns if c not in front_cols]]

    hist_csv = models_dir / f'history_fold{FOLD_TO_TRAIN}.csv'
    hist_df.to_csv(hist_csv, index=False)
    print(f"✓ Training history saved to: {hist_csv}")

    results = {
        'fold': FOLD_TO_TRAIN,
        'test_subject': splits_cfg['folds'][FOLD_TO_TRAIN]['test_subject'],
        'accuracy': float(test_acc),
        'macro_f1': float(test_f1_macro),
        'weighted_f1': float(test_f1_weighted),
        'history_rows': int(len(hist_df)),
        'config': {
            'fs': FS, 'imu_rot_heads': IMU_ROT_HEADS, 'mlp_base': MLP_BASE,
            'mlp_depth': MLP_DEPTH, 'dropout': DROPOUT, 'lr': LR, 'weight_decay': WEIGHT_DECAY,
            'use_orig_input': USE_ORIG_INPUT, 'epochs': TOTAL_EPOCHS, 'bootstrap': BOOTSTRAP_EPOCHS,
            'patience': PATIENCE, 'batch_size': BATCH_SIZE
        }
    }
    with open(models_dir / f'fold{FOLD_TO_TRAIN}_results.json', 'w') as f:
        json.dump(results, f, indent=2)
    print(f"✓ Evaluation results saved to: models/fold{FOLD_TO_TRAIN}_results.json")

    all_results.append({
        'fold': FOLD_TO_TRAIN,
        'test_subject': splits_cfg['folds'][FOLD_TO_TRAIN]['test_subject'],
        'accuracy': float(test_acc),
        'macro_f1': float(test_f1_macro),
        'weighted_f1': float(test_f1_weighted)
    })

    tf.keras.backend.clear_session()

print("\n" + "="*76)
print("All folds have finished training! Summary of results:")
print("="*76)
summary_df = pd.DataFrame(all_results)
print(summary_df)
print(f"\nMean accuracy: {summary_df['accuracy'].mean()*100:.2f}%")
print(f"Mean Macro-F1: {summary_df['macro_f1'].mean():.4f}")
print(f"Mean Weighted-F1: {summary_df['weighted_f1'].mean():.4f}")

summary_df.to_csv(models_dir / 'all_folds_summary.csv', index=False)
print(f"\n✓ Summary results saved to: models/all_folds_summary.csv")

print("\n" + "="*76 + "\nStep 10 finished\n" + "="*76)



Step 10: Model Training and Evaluation (our self-developed lightweight version of rTsfNet)

Number of classes: 8
Class list: ['walking', 'running', 'sitting', 'standing', 'lying', 'stairs_up', 'stairs_down', 'jumping']

Training fold 0 (test subject: proband1)
Bootstrap epochs: 150, total epochs: 350, patience: 50
Train set: (34727, 150, 6), test set: (1895, 150, 6)

Total number of model parameters: 406,160



Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 15ms/step - accuracy: 0.5796 - loss: 1.1600 - val_accuracy: 0.7926 - val_loss: 0.7043
Epoch 2/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7691 - loss: 0.6585 - val_accuracy: 0.7900 - val_loss: 0.7086
Epoch 3/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7960 - loss: 0.5900 - val_accuracy: 0.8100 - val_loss: 0.6505
Epoch 4/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8108 - loss: 0.5542 - val_accuracy: 0.8005 - val_loss: 0.6686
Epoch 5/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8202 - loss: 0.5211 - val_accuracy: 0.7879 - val_loss: 0.7062
Epoch 6/150
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8274 - loss: 0.5018 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 20ms/step - accuracy: 0.5734 - loss: 1.1663 - val_accuracy: 0.7962 - val_loss: 0.6273
Epoch 2/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7726 - loss: 0.6631 - val_accuracy: 0.7978 - val_loss: 0.6049
Epoch 3/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7991 - loss: 0.5884 - val_accuracy: 0.7665 - val_loss: 0.6895
Epoch 4/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8131 - loss: 0.5458 - val_accuracy: 0.7958 - val_loss: 0.6210
Epoch 5/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8259 - loss: 0.5188 - val_accuracy: 0.8287 - val_loss: 0.5276
Epoch 6/150
[1m1068/1068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8314 - loss: 0.4972 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 21ms/step - accuracy: 0.5753 - loss: 1.1491 - val_accuracy: 0.7643 - val_loss: 0.5863
Epoch 2/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7699 - loss: 0.6564 - val_accuracy: 0.7853 - val_loss: 0.5592
Epoch 3/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7970 - loss: 0.5825 - val_accuracy: 0.8089 - val_loss: 0.5405
Epoch 4/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8118 - loss: 0.5475 - val_accuracy: 0.8221 - val_loss: 0.5081
Epoch 5/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8234 - loss: 0.5154 - val_accuracy: 0.8267 - val_loss: 0.5104
Epoch 6/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8271 - loss: 0.4998 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 17ms/step - accuracy: 0.5754 - loss: 1.1616 - val_accuracy: 0.8483 - val_loss: 0.4908
Epoch 2/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7653 - loss: 0.6772 - val_accuracy: 0.8297 - val_loss: 0.4939
Epoch 3/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7922 - loss: 0.5987 - val_accuracy: 0.8640 - val_loss: 0.4130
Epoch 4/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8106 - loss: 0.5554 - val_accuracy: 0.8564 - val_loss: 0.4382
Epoch 5/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8208 - loss: 0.5303 - val_accuracy: 0.8661 - val_loss: 0.3971
Epoch 6/150
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8303 - loss: 0.5029 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 21ms/step - accuracy: 0.5715 - loss: 1.1656 - val_accuracy: 0.8297 - val_loss: 0.5579
Epoch 2/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7538 - loss: 0.6901 - val_accuracy: 0.8463 - val_loss: 0.4891
Epoch 3/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7873 - loss: 0.6130 - val_accuracy: 0.8474 - val_loss: 0.4745
Epoch 4/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7990 - loss: 0.5699 - val_accuracy: 0.8613 - val_loss: 0.4494
Epoch 5/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8129 - loss: 0.5445 - val_accuracy: 0.8575 - val_loss: 0.4493
Epoch 6/150
[1m1064/1064[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8243 - loss: 0.5157 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 18ms/step - accuracy: 0.5761 - loss: 1.1659 - val_accuracy: 0.7907 - val_loss: 0.6272
Epoch 2/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7636 - loss: 0.6828 - val_accuracy: 0.7989 - val_loss: 0.7249
Epoch 3/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7904 - loss: 0.6042 - val_accuracy: 0.7924 - val_loss: 0.7140
Epoch 4/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8055 - loss: 0.5632 - val_accuracy: 0.7831 - val_loss: 0.7151
Epoch 5/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8188 - loss: 0.5281 - val_accuracy: 0.7700 - val_loss: 0.8135
Epoch 6/150
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8293 - loss: 0.5028 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 20ms/step - accuracy: 0.5797 - loss: 1.1582 - val_accuracy: 0.7777 - val_loss: 0.6425
Epoch 2/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7596 - loss: 0.6839 - val_accuracy: 0.7796 - val_loss: 0.5998
Epoch 3/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7918 - loss: 0.6028 - val_accuracy: 0.7188 - val_loss: 0.7087
Epoch 4/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8050 - loss: 0.5671 - val_accuracy: 0.7643 - val_loss: 0.6041
Epoch 5/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8198 - loss: 0.5259 - val_accuracy: 0.7701 - val_loss: 0.6395
Epoch 6/150
[1m1063/1063[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8278 - loss: 0.5084 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 17ms/step - accuracy: 0.5686 - loss: 1.1738 - val_accuracy: 0.8024 - val_loss: 0.5480
Epoch 2/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7635 - loss: 0.6790 - val_accuracy: 0.7989 - val_loss: 0.5484
Epoch 3/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7889 - loss: 0.6048 - val_accuracy: 0.8172 - val_loss: 0.5364
Epoch 4/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8063 - loss: 0.5568 - val_accuracy: 0.8229 - val_loss: 0.5320
Epoch 5/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8168 - loss: 0.5401 - val_accuracy: 0.8202 - val_loss: 0.5114
Epoch 6/150
[1m1073/1073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8236 - loss: 0.5200 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 18ms/step - accuracy: 0.5726 - loss: 1.1628 - val_accuracy: 0.7915 - val_loss: 0.5654
Epoch 2/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7630 - loss: 0.6858 - val_accuracy: 0.8085 - val_loss: 0.5700
Epoch 3/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7939 - loss: 0.5976 - val_accuracy: 0.8218 - val_loss: 0.5727
Epoch 4/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8077 - loss: 0.5580 - val_accuracy: 0.8185 - val_loss: 0.6463
Epoch 5/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8193 - loss: 0.5267 - val_accuracy: 0.8118 - val_loss: 0.6598
Epoch 6/150
[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8278 - loss: 0.5020 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 16ms/step - accuracy: 0.5772 - loss: 1.1535 - val_accuracy: 0.6787 - val_loss: 0.9792
Epoch 2/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7766 - loss: 0.6527 - val_accuracy: 0.6916 - val_loss: 0.8812
Epoch 3/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8002 - loss: 0.5814 - val_accuracy: 0.6951 - val_loss: 0.8108
Epoch 4/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8168 - loss: 0.5460 - val_accuracy: 0.6925 - val_loss: 0.8318
Epoch 5/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8268 - loss: 0.5203 - val_accuracy: 0.7187 - val_loss: 0.7179
Epoch 6/150
[1m1075/1075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8361 - loss: 0.4963 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 17ms/step - accuracy: 0.5773 - loss: 1.1599 - val_accuracy: 0.6702 - val_loss: 0.8740
Epoch 2/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7735 - loss: 0.6520 - val_accuracy: 0.6770 - val_loss: 0.8084
Epoch 3/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8053 - loss: 0.5683 - val_accuracy: 0.6866 - val_loss: 0.7720
Epoch 4/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8162 - loss: 0.5365 - val_accuracy: 0.6849 - val_loss: 0.7780
Epoch 5/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8319 - loss: 0.5056 - val_accuracy: 0.6903 - val_loss: 0.7790
Epoch 6/150
[1m1053/1053[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8362 - loss: 0.4905 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 16ms/step - accuracy: 0.5724 - loss: 1.1660 - val_accuracy: 0.7262 - val_loss: 0.6551
Epoch 2/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7657 - loss: 0.6760 - val_accuracy: 0.7112 - val_loss: 0.6836
Epoch 3/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7931 - loss: 0.6010 - val_accuracy: 0.7519 - val_loss: 0.5929
Epoch 4/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8090 - loss: 0.5570 - val_accuracy: 0.7661 - val_loss: 0.5799
Epoch 5/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8186 - loss: 0.5335 - val_accuracy: 0.8009 - val_loss: 0.5292
Epoch 6/150
[1m1066/1066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8286 - loss: 0.5088 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 23ms/step - accuracy: 0.5620 - loss: 1.1900 - val_accuracy: 0.8072 - val_loss: 0.6040
Epoch 2/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7653 - loss: 0.6773 - val_accuracy: 0.8337 - val_loss: 0.5178
Epoch 3/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7913 - loss: 0.6065 - val_accuracy: 0.8277 - val_loss: 0.4972
Epoch 4/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8107 - loss: 0.5581 - val_accuracy: 0.8347 - val_loss: 0.5224
Epoch 5/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8232 - loss: 0.5259 - val_accuracy: 0.8387 - val_loss: 0.4946
Epoch 6/150
[1m1082/1082[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8330 - loss: 0.5042 - val


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 9ms/step - accuracy: 0.5863 - loss: 1.1087 - val_accuracy: 0.4755 - val_loss: 1.8349
Epoch 2/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7847 - loss: 0.6160 - val_accuracy: 0.4537 - val_loss: 1.8420
Epoch 3/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8109 - loss: 0.5528 - val_accuracy: 0.4385 - val_loss: 1.9262
Epoch 4/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8242 - loss: 0.5134 - val_accuracy: 0.4388 - val_loss: 1.9962
Epoch 5/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8310 - loss: 0.4957 - val_accuracy: 0.4354 - val_loss: 1.9094
Epoch 6/150
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8392 - loss: 0.4760 - val_


Stage 1: Bootstrap training (150 epochs)...
Epoch 1/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 17ms/step - accuracy: 0.5815 - loss: 1.1505 - val_accuracy: 0.7294 - val_loss: 0.6595
Epoch 2/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7710 - loss: 0.6743 - val_accuracy: 0.7892 - val_loss: 0.5525
Epoch 3/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7962 - loss: 0.5966 - val_accuracy: 0.8098 - val_loss: 0.5090
Epoch 4/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8063 - loss: 0.5649 - val_accuracy: 0.8008 - val_loss: 0.5297
Epoch 5/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8175 - loss: 0.5341 - val_accuracy: 0.8229 - val_loss: 0.4887
Epoch 6/150
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8273 - loss: 0.5108 - val