In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# PARAMETERS
SAMPLING_RATE = 256               # Hz
WINDOW_DURATION_SEC = 20          # each window is 20 seconds
WINDOW_SIZE = SAMPLING_RATE * WINDOW_DURATION_SEC
STRIDE = WINDOW_SIZE // 2         # 50% overlap
PROB_THRESHOLD = 0.75             # for identifying high-risk windows

# === Step 1: Load Data ===
file_path = r"C:\Users\Narula\Downloads\chbmit_preprocessed_data.csv"
df = pd.read_csv(file_path)

X_raw = df.drop(columns=['Outcome']).values
y_raw = df['Outcome'].values

# === Step 2: Create Sliding Windows ===
X_windows = []
y_labels = []
window_indices = []

for start in range(0, len(X_raw) - WINDOW_SIZE, STRIDE):
    end = start + WINDOW_SIZE
    X_window = X_raw[start:end]
    
    # Simple feature: mean across each channel over time
    X_feat = X_window.mean(axis=0)
    X_windows.append(X_feat)

    # Label the window as 1 if seizure appears during or shortly after
    future_window = y_raw[end:end + WINDOW_SIZE]  # look ahead
    label = int(np.any(future_window == 1))
    y_labels.append(label)

    window_indices.append((start, end))

X_windows = np.array(X_windows)
y_labels = np.array(y_labels)

# === Step 3: Train/Test Split ===
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
    X_windows, y_labels, window_indices, test_size=0.2, random_state=42, stratify=y_labels)

# === Step 4: Train Random Forest ===
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# === Step 5: Predict and Evaluate ===
y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]

print("=== Windowed Classification Report ===")
print(classification_report(y_test, y_pred, target_names=["No Seizure Soon", "Seizure Likely Soon"]))

# === Step 6: Analyze High-Risk Windows ===
risk_df = pd.DataFrame({
    'start': [start for (start, end) in idx_test],
    'end': [end for (start, end) in idx_test],
    'probability': y_probs,
    'true_label': y_test
})

# Filter and sort by high seizure probability
high_risk_windows = risk_df[risk_df['probability'] >= PROB_THRESHOLD].sort_values(by='probability', ascending=False)

print(f"\n=== High-Risk Time Windows (Prob ≥ {PROB_THRESHOLD}) ===")
true_positives = 0
false_positives = 0

for _, row in high_risk_windows.iterrows():
    start_idx = int(row['start'])
    end_idx = int(row['end'])
    start_min = start_idx / SAMPLING_RATE / 60
    end_min = end_idx / SAMPLING_RATE / 60
    actual_window = y_raw[start_idx:end_idx]

    if np.any(actual_window == 1):
        match = "✅ SEIZURE"
        true_positives += 1
    else:
        match = "❌ NO SEIZURE"
        false_positives += 1

    print(f"From {start_min:.2f} min to {end_min:.2f} min — "
          f"Prob: {row['probability']:.2f} — {match}")

# === Step 7: Summary ===
print("\n=== Summary ===")
print(f"Total High-Risk Windows: {len(high_risk_windows)}")
print(f"True Positives (correct seizure forecasts): {true_positives}")
print(f"False Positives (false alarms): {false_positives}")


=== Windowed Classification Report ===
                     precision    recall  f1-score   support

    No Seizure Soon       0.84      0.67      0.74        81
Seizure Likely Soon       0.73      0.88      0.80        83

           accuracy                           0.77       164
          macro avg       0.79      0.77      0.77       164
       weighted avg       0.79      0.77      0.77       164


=== High-Risk Time Windows (Prob ≥ 0.75) ===
From 84.17 min to 84.50 min — Prob: 0.85 — ✅ SEIZURE
From 107.17 min to 107.50 min — Prob: 0.84 — ✅ SEIZURE
From 92.83 min to 93.17 min — Prob: 0.84 — ✅ SEIZURE
From 133.67 min to 134.00 min — Prob: 0.84 — ✅ SEIZURE
From 109.17 min to 109.50 min — Prob: 0.83 — ✅ SEIZURE
From 112.33 min to 112.67 min — Prob: 0.83 — ✅ SEIZURE
From 104.17 min to 104.50 min — Prob: 0.81 — ✅ SEIZURE
From 88.17 min to 88.50 min — Prob: 0.81 — ✅ SEIZURE
From 26.83 min to 27.17 min — Prob: 0.80 — ❌ NO SEIZURE
From 120.50 min to 120.83 min — Prob: 0.80 — ✅ SEIZURE
F