# Rolling Window Analysis: PRE to POST Decision Period (Dynamic Feature Extraction)

This notebook analyzes how model performance changes as we incrementally expand the analysis window from the PRE-decision period into the POST-decision period.

**Key Difference from Static Version:**
- This notebook **dynamically extracts POST features** for each rolling window
- Each window (0-0.2s, 0-0.4s, etc.) gets its own POST physiology features
- Slower but provides true rolling window analysis

**Analysis Strategy:**
1. Load PRE features (pre-extracted)
2. For each window, dynamically extract POST features from preprocessing files
3. Train fusion model with PRE + window-specific POST features
4. Visualize how performance evolves with increasing POST window size

In [1]:
import sys
sys.path.append('../..')

import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import json
import re
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Import project utilities
from src.utils.io import save_results
from src.utils.config import get_model_params
from src.models.fusion import weighted_late_fusion
from src.visualization.plots import set_style

np.random.seed(42)
set_style('whitegrid')

print(f"\n{'='*80}")
print(f"ROLLING WINDOW ANALYSIS (DYNAMIC): PRE → POST DECISION PERIOD")
print(f"{'='*80}\n")
print(f"Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


ROLLING WINDOW ANALYSIS (DYNAMIC): PRE → POST DECISION PERIOD

Analysis started: 2026-01-09 14:11:54


## 1. Define Rolling Windows

In [2]:
# Define rolling window endpoints
PRE_START = -2.0
PRE_END = 0.0
POST_STEP = 0.2  # Increment size for POST expansion
POST_MAX = 2.0   # Maximum POST time

# Generate windows
windows = []

# Window 1: PRE only
windows.append({
    'name': 'PRE Only',
    'pre_window': (PRE_START, PRE_END),
    'post_window': None,
    'description': 'Baseline: PRE-decision only (-2.0 to 0.0s)'
})

# Windows 2+: PRE + incremental POST
post_endpoints = np.arange(POST_STEP, POST_MAX + POST_STEP, POST_STEP)
for post_end in post_endpoints:
    windows.append({
        'name': f'PRE+POST[0→{post_end:.1f}s]',
        'pre_window': (PRE_START, PRE_END),
        'post_window': (0.0, post_end),
        'description': f'PRE (-2.0 to 0.0s) + POST (0.0 to {post_end:.1f}s)'
    })

print(f"Created {len(windows)} rolling windows:")
print("\nWindow Details:")
print("-" * 80)
for i, w in enumerate(windows):
    print(f"{i+1:2d}. {w['name']:25s} | {w['description']}")
print("-" * 80)

Created 11 rolling windows:

Window Details:
--------------------------------------------------------------------------------
 1. PRE Only                  | Baseline: PRE-decision only (-2.0 to 0.0s)
 2. PRE+POST[0→0.2s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 0.2s)
 3. PRE+POST[0→0.4s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 0.4s)
 4. PRE+POST[0→0.6s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 0.6s)
 5. PRE+POST[0→0.8s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 0.8s)
 6. PRE+POST[0→1.0s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 1.0s)
 7. PRE+POST[0→1.2s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 1.2s)
 8. PRE+POST[0→1.4s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 1.4s)
 9. PRE+POST[0→1.6s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 1.6s)
10. PRE+POST[0→1.8s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 1.8s)
11. PRE+POST[0→2.0s]          | PRE (-2.0 to 0.0s) + POST (0.0 to 2.0s)
-------------------------------------------------------------------------------

## 2. Load PRE Features

PRE features are pre-extracted and loaded once.

In [3]:
print("Loading PRE-decision features...")
with open('../../data/results/features_PRE/extracted_features_PRE.pkl', 'rb') as f:
    pre_data = pickle.load(f)

pre_df = pre_data['merged_df']
pre_physio_cols = pre_data['physio_cols']
pre_behavior_cols = pre_data['behavior_cols']
pre_gaze_cols = pre_data['gaze_cols']

print(f"  PRE trials: {len(pre_df)}")
print(f"  PRE subjects: {pre_df['subject_id'].nunique()}")
print(f"  PRE features: {len(pre_physio_cols)} physio + {len(pre_behavior_cols)} behavior + {len(pre_gaze_cols)} gaze")
print(f"\n✓ PRE features loaded")

Loading PRE-decision features...
  PRE trials: 12511
  PRE subjects: 97
  PRE features: 13 physio + 7 behavior + 20 gaze

✓ PRE features loaded


## 3. Define Dynamic POST Feature Extraction

Extract POST physiology features for a specific time window from preprocessing files.

In [4]:
def extract_post_physio_for_window(time_window, baseline_method='t3_stable_pre_decision'):
    """
    Extract POST physiology features for a specific time window.
    
    Parameters
    ----------
    time_window : tuple
        (start, end) in seconds relative to submit (e.g., (0.0, 0.2))
    baseline_method : str
        Baseline correction method name
    
    Returns
    -------
    pd.DataFrame
        DataFrame with trial_id, subject_id, and POST physiology features
    """
    preprocessing_dir = Path('../../data/results/preprocessing_outputs/preprocessing')
    preprocessing_files = sorted(preprocessing_dir.glob('preprocessing_*.json'))
    raw_dir = Path('../../data/raw/json')
    
    all_features = []
    
    for preprocessed_file in preprocessing_files:
        with open(preprocessed_file, 'r') as f:
            preprocessed = json.load(f)
        
        subject_id = preprocessed['subject_id']
        
        # Find matching raw JSON
        matches = list(raw_dir.glob(f"*{subject_id.split('_')[-1]}.json"))
        pattern = subject_id.replace("_", ".*")
        match = next((f for f in matches if re.search(pattern, f.name)), None)
        if not match:
            continue
        
        with open(match, 'r') as f:
            raw_data = json.load(f)
        
        for trial_id, trial_data in preprocessed['trial_data'].items():
            method_data = trial_data['methods'][baseline_method]
            
            if method_data['success'] != True:
                continue
            
            raw_trial = raw_data['trials'][int(trial_id)-1]
            if not raw_trial['gamble details']['submitted']:
                continue
            
            # Extract pupil data
            time_aligned = np.array(trial_data['time_relative_to_submit'])
            pupil_avg = np.array(method_data['pupil_avg_baselined'])
            pupil_L = np.array(method_data['pupil_L_baselined'])
            pupil_R = np.array(method_data['pupil_R_baselined'])
            
            # Clean NaN values
            valid_mask = ~np.isnan(pupil_avg)
            pupil_avg_clean = pupil_avg[valid_mask]
            pupil_L_clean = pupil_L[valid_mask]
            pupil_R_clean = pupil_R[valid_mask]
            time_clean = time_aligned[valid_mask]
            
            if len(pupil_avg_clean) < 20:
                continue
            
            # Filter to specific POST window
            time_mask = (time_clean > time_window[0]) & (time_clean <= time_window[1])
            pupil = pupil_avg_clean[time_mask]
            pupil_L_filtered = pupil_L_clean[time_mask]
            pupil_R_filtered = pupil_R_clean[time_mask]
            time_filtered = time_clean[time_mask]
            
            if len(pupil) < 5:
                continue
            
            # Calculate derivatives
            pupil_velocity = np.diff(pupil) if len(pupil) > 1 else np.array([0])
            pupil_acceleration = np.diff(pupil_velocity) if len(pupil_velocity) > 1 else np.array([0])
            dilation_mask = pupil_velocity > 0 if len(pupil_velocity) > 0 else np.array([False])
            
            # Extract features
            features = {
                'trial_id': f"{trial_id}_{subject_id}",
                'subject_id': subject_id,
                'pupil_mean_post_window': np.mean(pupil),
                'pupil_std_post_window': np.std(pupil),
                'pupil_slope_post_window': np.polyfit(time_filtered, pupil, 1)[0] if len(time_filtered) > 1 else 0,
                'time_to_peak_post_window': time_filtered[np.argmax(pupil)] - time_filtered[0] if len(time_filtered) > 0 else 0,
                'pupil_cv_post_window': np.std(pupil) / np.abs(np.mean(pupil)) if (len(pupil) > 0 and np.mean(pupil) != 0) else 0,
                'pupil_velocity_mean_post_window': np.mean(np.abs(pupil_velocity)) if len(pupil_velocity) > 0 else 0,
                'pupil_max_dilation_rate_post_window': np.max(pupil_velocity) if len(pupil_velocity) > 0 else 0,
                'pupil_max_constriction_rate_post_window': np.abs(np.min(pupil_velocity)) if len(pupil_velocity) > 0 else 0,
                'pupil_acceleration_std_post_window': np.std(pupil_acceleration) if len(pupil_acceleration) > 1 else 0,
                'pct_time_dilating_post_window': np.mean(dilation_mask) if len(dilation_mask) > 0 else 0,
                'num_dilation_peaks_post_window': np.sum(np.diff(np.sign(pupil_velocity)) > 0) if len(pupil_velocity) > 1 else 0,
                'eye_asymmetry_post_window': np.nanmean(np.abs(pupil_L_filtered - pupil_R_filtered)) if len(pupil_L_filtered) > 0 else 0,
                'eye_asymmetry_std_post_window': np.nanstd(pupil_L_filtered - pupil_R_filtered) if len(pupil_L_filtered) > 1 else 0,
            }
            
            all_features.append(features)
    
    return pd.DataFrame(all_features)

print("✓ POST feature extraction function defined")

✓ POST feature extraction function defined


## 4. Run Rolling Window Analysis

For each window, extract POST features dynamically and train the model.

In [None]:
rolling_results = []

print(f"\n{'='*80}")
print("RUNNING ROLLING WINDOW ANALYSIS (DYNAMIC)")
print(f"{'='*80}\n")

for i, window in enumerate(windows):
    print(f"\nWindow {i+1}/{len(windows)}: {window['name']}")
    print(f"  {window['description']}")
    print("-" * 80)
    
    # Prepare feature matrix
    if window['post_window'] is None:
        # PRE only - use pre_df directly
        combined_df = pre_df.copy()
        
        X_physio = SimpleImputer(strategy='mean').fit_transform(combined_df[pre_physio_cols])
        X_behavior = SimpleImputer(strategy='mean').fit_transform(combined_df[pre_behavior_cols])
        X_gaze = SimpleImputer(strategy='mean').fit_transform(combined_df[pre_gaze_cols])
        
        print(f"  Using PRE features only: {len(pre_physio_cols)} physio")
        
    else:
        # PRE + POST - dynamically extract POST features
        print(f"  Extracting POST features for window {window['post_window']}...")
        post_df_window = extract_post_physio_for_window(window['post_window'])
        print(f"    Extracted {len(post_df_window)} POST trials")
        
        # Merge PRE and window-specific POST
        combined_df = pre_df.merge(
            post_df_window,
            on=['trial_id', 'subject_id'],
            how='inner'
        )
        
        print(f"    Merged to {len(combined_df)} trials with both PRE and POST")
        
        # Get POST column names
        post_physio_cols_window = [c for c in post_df_window.columns if c.endswith('_post_window')]
        
        # Combine PRE + POST physiology
        combined_physio_cols = pre_physio_cols + post_physio_cols_window
        X_physio = SimpleImputer(strategy='mean').fit_transform(combined_df[combined_physio_cols])
        X_behavior = SimpleImputer(strategy='mean').fit_transform(combined_df[pre_behavior_cols])
        X_gaze = SimpleImputer(strategy='mean').fit_transform(combined_df[pre_gaze_cols])
        
        print(f"  Using PRE+POST features: {len(pre_physio_cols)} PRE + {len(post_physio_cols_window)} POST = {X_physio.shape[1]} total")
    
    y = combined_df['outcome'].values
    subjects = combined_df['subject_id'].values
    
    print(f"  Feature shapes: Physio={X_physio.shape}, Behavior={X_behavior.shape}, Gaze={X_gaze.shape}")
    
    # Run weighted late fusion
    X_modalities = [X_physio, X_behavior, X_gaze]
    modality_names = ['Physiology', 'Behavior', 'Gaze']
    
    try:
        results = weighted_late_fusion(
            X_modalities, y, subjects, modality_names,
            fusion_method='weighted'
        )
        
        window_result = {
            'window_id': i,
            'window_name': window['name'],
            'post_end_time': window['post_window'][1] if window['post_window'] else 0.0,
            'accuracy': results['accuracy_mean'],
            'accuracy_sem': results['accuracy_sem'],
            'accuracy_std': results['accuracy_std'],
            'f1_score': results['f1_mean'],
            'f1_sem': results['f1_sem'],
            'physio_weight': results['weights'][0],
            'behavior_weight': results['weights'][1],
            'gaze_weight': results['weights'][2],
            'n_subjects': results['n_subjects'],
            'n_trials': results['n_trials'],
            'n_physio_features': X_physio.shape[1],
            'invest_ratio': np.mean(y)
        }
        
        rolling_results.append(window_result)
        
        print(f"  ✓ Accuracy: {results['accuracy_mean']:.4f} ± {results['accuracy_sem']:.4f}")
        print(f"    F1-Score: {results['f1_mean']:.4f} ± {results['f1_sem']:.4f}")
        print(f"    Weights: Physio={results['weights'][0]:.3f}, Behavior={results['weights'][1]:.3f}, Gaze={results['weights'][2]:.3f}")
        
    except Exception as e:
        print(f"  ✗ Error: {e}")
        import traceback
        traceback.print_exc()
        continue

results_df = pd.DataFrame(rolling_results)

print(f"\n{'='*80}")
print(f"✓ Analysis complete: {len(results_df)} windows analyzed")
print(f"{'='*80}")


RUNNING ROLLING WINDOW ANALYSIS (DYNAMIC)


Window 1/11: PRE Only
  Baseline: PRE-decision only (-2.0 to 0.0s)
--------------------------------------------------------------------------------
  Using PRE features only: 13 physio
  Feature shapes: Physio=(12511, 13), Behavior=(12511, 7), Gaze=(12511, 20)
  ✓ Accuracy: 0.6931 ± 0.0133
    F1-Score: 0.6794 ± 0.0157
    Weights: Physio=0.031, Behavior=0.967, Gaze=0.002

Window 2/11: PRE+POST[0→0.2s]
  PRE (-2.0 to 0.0s) + POST (0.0 to 0.2s)
--------------------------------------------------------------------------------
  Extracting POST features for window (0.0, np.float64(0.2))...
    Extracted 11467 POST trials
    Merged to 12511 trials with both PRE and POST
  Using PRE+POST features: 13 PRE + 13 POST = 26 total
  Feature shapes: Physio=(12511, 26), Behavior=(12511, 7), Gaze=(12511, 20)
  ✓ Accuracy: 0.6940 ± 0.0133
    F1-Score: 0.6808 ± 0.0156
    Weights: Physio=0.029, Behavior=0.968, Gaze=0.002

Window 3/11: PRE+POST[0→0.4s]
  PR

## 5. Visualize Results

In [None]:
# Create comprehensive visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Rolling Window Analysis: PRE → POST Decision Period (Dynamic)', 
             fontsize=16, fontweight='bold', y=0.995)

# Subplot 1: Accuracy over time
ax1 = axes[0, 0]
ax1.plot(results_df['post_end_time'], results_df['accuracy'], 
         marker='o', linewidth=2, markersize=6, color='#2E86AB', label='Accuracy')
ax1.fill_between(results_df['post_end_time'],
                  results_df['accuracy'] - results_df['accuracy_sem'],
                  results_df['accuracy'] + results_df['accuracy_sem'],
                  alpha=0.2, color='#2E86AB')
ax1.axhline(y=0.5, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='Chance')
ax1.axvline(x=0.0, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='Decision Point')
ax1.set_xlabel('POST-Decision End Time (seconds)', fontsize=11, fontweight='bold')
ax1.set_ylabel('Accuracy', fontsize=11, fontweight='bold')
ax1.set_title('A. Model Accuracy vs. POST Window Extension', fontsize=12, fontweight='bold')
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# Subplot 2: F1-Score over time
ax2 = axes[0, 1]
ax2.plot(results_df['post_end_time'], results_df['f1_score'], 
         marker='s', linewidth=2, markersize=6, color='#A23B72', label='F1-Score')
ax2.fill_between(results_df['post_end_time'],
                  results_df['f1_score'] - results_df['f1_sem'],
                  results_df['f1_score'] + results_df['f1_sem'],
                  alpha=0.2, color='#A23B72')
ax2.axvline(x=0.0, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='Decision Point')
ax2.set_xlabel('POST-Decision End Time (seconds)', fontsize=11, fontweight='bold')
ax2.set_ylabel('F1-Score', fontsize=11, fontweight='bold')
ax2.set_title('B. F1-Score vs. POST Window Extension', fontsize=12, fontweight='bold')
ax2.legend(loc='best')
ax2.grid(True, alpha=0.3)

# Subplot 3: Modality weights over time
ax3 = axes[1, 0]
ax3.plot(results_df['post_end_time'], results_df['physio_weight'], 
         marker='o', linewidth=2, markersize=6, label='Physiology', color='#F18F01')
ax3.plot(results_df['post_end_time'], results_df['behavior_weight'], 
         marker='s', linewidth=2, markersize=6, label='Behavior', color='#6A994E')
ax3.plot(results_df['post_end_time'], results_df['gaze_weight'], 
         marker='^', linewidth=2, markersize=6, label='Gaze', color='#BC4B51')
ax3.axvline(x=0.0, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='Decision Point')
ax3.set_xlabel('POST-Decision End Time (seconds)', fontsize=11, fontweight='bold')
ax3.set_ylabel('Modality Weight', fontsize=11, fontweight='bold')
ax3.set_title('C. Modality Contributions Over Time', fontsize=12, fontweight='bold')
ax3.legend(loc='best')
ax3.grid(True, alpha=0.3)
ax3.set_ylim([-0.05, 1.05])

# Subplot 4: Number of physio features over time
ax4 = axes[1, 1]
ax4.plot(results_df['post_end_time'], results_df['n_physio_features'], 
         marker='D', linewidth=2, markersize=6, color='#8338EC', label='Physio Features')
ax4.axvline(x=0.0, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='Decision Point')
ax4.set_xlabel('POST-Decision End Time (seconds)', fontsize=11, fontweight='bold')
ax4.set_ylabel('Number of Physiology Features', fontsize=11, fontweight='bold')
ax4.set_title('D. Feature Count per Window', fontsize=12, fontweight='bold')
ax4.legend(loc='best')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n✓ Visualization complete")

## 6. Statistical Analysis

In [None]:
print(f"\n{'='*80}")
print("STATISTICAL ANALYSIS")
print(f"{'='*80}\n")

# Compare PRE-only vs. best POST window
pre_only_acc = results_df.iloc[0]['accuracy']
pre_only_sem = results_df.iloc[0]['accuracy_sem']

best_idx = results_df['accuracy'].idxmax()
best_window = results_df.iloc[best_idx]

print("1. PRE-only vs. Best POST Window:")
print(f"   PRE-only:      Accuracy = {pre_only_acc:.4f} ± {pre_only_sem:.4f}")
print(f"   Best window:   {best_window['window_name']}")
print(f"                  Accuracy = {best_window['accuracy']:.4f} ± {best_window['accuracy_sem']:.4f}")
print(f"   Improvement:   {(best_window['accuracy'] - pre_only_acc):.4f} ({100*(best_window['accuracy'] - pre_only_acc)/pre_only_acc:.2f}%)")

# Trend analysis
if len(results_df) > 2:
    post_times = results_df['post_end_time'].values[1:]
    post_accs = results_df['accuracy'].values[1:]
    
    from scipy.stats import pearsonr
    r, p = pearsonr(post_times, post_accs)
    
    print(f"\n2. Trend Analysis (POST windows):")
    print(f"   Pearson r = {r:.4f}, p = {p:.4f}")
    if p < 0.05:
        trend = "positive" if r > 0 else "negative"
        print(f"   ✓ Significant {trend} trend detected")
    else:
        print(f"   ✗ No significant trend")

print(f"\n3. Summary:")
print(f"   Accuracy range: {results_df['accuracy'].min():.4f} to {results_df['accuracy'].max():.4f}")
print(f"   Variability: {results_df['accuracy'].std():.4f}")
print(f"\n{'='*80}")

## 7. Save Results

In [None]:
output_dir = Path('../../data/results/analysis_outputs_PRE')
output_dir.mkdir(parents=True, exist_ok=True)

output_file = output_dir / 'rolling_window_post_decision_dynamic.csv'
save_results(results_df, str(output_file))

print(f"\n✓ Results saved to: {output_file}")
print(f"\n{'='*80}")
print(f"ANALYSIS COMPLETE")
print(f"{'='*80}")
print(f"Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")