In [None]:
# Setup imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from gaitsetpy.dataset import PhysioNetLoader
from gaitsetpy.features import PhysioNetFeatureExtractor
from gaitsetpy.classification.models import RandomForestModel

DATA_DIR = os.path.join('data', 'physionet')
print(f"Data dir: {DATA_DIR}")


In [None]:
# 1) Load dataset
loader = PhysioNetLoader()
data, names = loader.load_data(DATA_DIR)
labels = loader.get_labels()
print(f"Loaded {len(data)} files. Controls={labels.count('Co')} Patients={labels.count('Pt')}")

if data:
    display(data[0].head())


In [None]:
# 2) Create sliding windows
windows = loader.create_sliding_windows(data, names, window_size=600, step_size=100)
print(f"Window sets created: {len(windows)}")
if windows:
    total_windows = sum(w['metadata']['num_windows'] for w in windows if 'metadata' in w)
    print(f"Total windows: {total_windows}")


In [None]:
# 3) Extract features
extractor = PhysioNetFeatureExtractor(verbose=True)
all_features = []
for window_dict in windows:
    if 'windows' in window_dict:
        feats = extractor.extract_features(window_dict['windows'], fs=loader.metadata['sampling_frequency'])
        all_features.append({
            'name': window_dict['name'],
            'features': feats,
            'metadata': window_dict.get('metadata', {})
        })
print(f"Extracted from {len(all_features)} files")


In [None]:
# 4) Prepare classification data
X = []
y = []
for file_features in all_features:
    file_name = file_features['name']
    feats = file_features['features']
    metadata = file_features.get('metadata', {})
    label = metadata.get('label', 'Co' if 'Co' in file_name else 'Pt')
    for sensor_features in feats:
        sensor_data = sensor_features['features']
        vec = []
        for feature_name, feature_values in sensor_data.items():
            if isinstance(feature_values, list):
                if len(feature_values) > 0:
                    import numpy as _np
                    if isinstance(feature_values[0], (list, _np.ndarray)):
                        flat = []
                        for val in feature_values:
                            if isinstance(val, (list, _np.ndarray)):
                                flat.extend(val)
                            else:
                                flat.append(val)
                        vec.append(_np.mean(flat))
                    else:
                        vec.append(_np.mean(feature_values))
                else:
                    vec.append(0)
            else:
                vec.append(feature_values)
        if vec:
            X.append(vec)
            y.append(label)
import numpy as _np
X = _np.array(X)
y = _np.array(y)
print(f"X shape: {X.shape}, y: {dict(zip(*_np.unique(y, return_counts=True)))}")


In [None]:
# 5) Train RandomForest and evaluate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

if len(X) > 0:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    rf = RandomForestModel()
    feature_dicts_train = [{'name': 'sample', 'features': {f'f{i}': X_train_s[:, i].tolist() for i in range(X_train_s.shape[1])}, 'annotations': y_train.tolist()}]
    feature_dicts_test = [{'name': 'sample', 'features': {f'f{i}': X_test_s[:, i].tolist() for i in range(X_test_s.shape[1])}, 'annotations': y_test.tolist()}]

    rf.train(feature_dicts_train, test_size=0.0, validation_split=False)
    metrics = rf.evaluate(feature_dicts_test, detailed_report=True)
    print(f"Accuracy: {metrics['accuracy']:.3f}")

    cm = confusion_matrix(y_test, rf.model.predict(X_test_s))
    import seaborn as sns
    import matplotlib.pyplot as plt
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Co','Pt'], yticklabels=['Co','Pt'])
    plt.title('Confusion Matrix - PhysioNet')
    plt.ylabel('True')
    plt.xlabel('Pred')
    plt.tight_layout()
    plt.show()

    print("\nClassification report:\n", classification_report(y_test, rf.model.predict(X_test_s), target_names=['Co','Pt']))
else:
    print("No samples to train.")
