## Imports (common)

In [None]:
import os
import numpy as np
import gc
import h5py
import scipy.io
import scipy.signal as sgl
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, balanced_accuracy_score # Added balanced_accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, cross_val_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from scipy.stats import skew, kurtosis
from scipy.signal import welch
import neurokit2 as nk
# from imblearn.over_sampling import SMOTE # Not using SMOTE currently

## Data Loading & Preprocessing

In [13]:
# --- Constants for Windowing ---
FS = 128
WINDOW_SEC = 2 # Try 2 seconds first
STEP_SEC = 1   # Correspondingly smaller step (e.g., 50% overlap)
WINDOW_SAMPLES = int(WINDOW_SEC * FS) # Will be 256
STEP_SAMPLES = int(STEP_SEC * FS)     # Will be 128


# --- Data Loading Function ---
def load_patient_preprocessed_data(patient_number):
    # Keep this function as is
    base_dir = r"C:\Users\ferri\Downloads\PoliTO\Tesi\DSs\Emotion-Stress\AMIGOS"
    file_path = os.path.join(
        base_dir, "Data preprocessed",
        f"Data_Preprocessed_P{patient_number:02d}",
        f"Data_Preprocessed_P{patient_number:02d}.mat"
    )
    data = scipy.io.loadmat(file_path)
    return data

# --- Preprocessing functions for pipeline ---
def process_trial_signal(signal, target_length=None, fs=512):
    """
    Convert a trial's raw signal into a 2D array [channels, time].
    Applies transpose fix, downsampling, filtering, baseline removal.
    """
    # Convert to float32.
    signal = np.array(signal, dtype=np.float32)

    # --- ADD TRANSPOSE STEP ---
    # Check if dimensions look like (samples, channels) and transpose if needed
    if signal.ndim == 2 and signal.shape[0] > signal.shape[1]:
         # Heuristic: If rows > columns, assume it's (samples, channels)
         print(f"    Transposing input signal from {signal.shape} to {signal.T.shape}") # Add print
         signal = signal.T # Transpose to (channels, samples)
    # --- END TRANSPOSE STEP ---

    # Check if signal is empty AFTER potential transpose
    if signal.size == 0:
        return np.empty((0,0), dtype=np.float32)

    # If the signal is 1D, reshape to (1, length).
    # This should happen AFTER transpose if original was (L, 1)
    if signal.ndim == 1:
        signal = signal[None, :] # Now shape (1, L)

    # Downsampling parameters.
    N = 4
    lowcut, highcut = 1.0, 45.0
    desired_fs = 128 # This is the FS used for feature extraction later
    down_factor = fs // desired_fs

    # 1) Downsample each channel. (Now iterates correctly over channels)
    downsampled = []
    for ch_data in signal: # ch_data should now be one channel's time series
        if ch_data.size == 0: continue
        ch_data_down = ch_data[::down_factor]
        downsampled.append(ch_data_down)

    # If no channels had data, return an empty array.
    if not downsampled:
        return np.empty((0,0), dtype=np.float32)

    # Stack downsampled channels. Pad if necessary (though simple slicing should yield same length)
    try:
        max_len_down = max(len(ch) for ch in downsampled)
        padded_downsampled = []
        for ch in downsampled:
             pad_width = max_len_down - len(ch)
             if pad_width > 0:
                  padded_ch = np.pad(ch, (0, pad_width), mode='edge')
                  padded_downsampled.append(padded_ch)
             else:
                  padded_downsampled.append(ch)
        signal = np.vstack([ch[None, :] for ch in padded_downsampled])
    except ValueError as e:
         print(f"Error during vstack after downsampling: {e}. Lengths: {[len(ch) for ch in downsampled]}")
         return np.empty((0,0), dtype=np.float32) # Return empty on error

    # 2) Bandpass filter design.
    nyquist = 0.5 * desired_fs
    b, a = sgl.butter(N=4, Wn=[lowcut/nyquist, highcut/nyquist], btype='band')
    min_len_filt = 3 * (max(len(a), len(b)) - 1) # Renamed variable

    # Filter each channel; if too short, skip filtering.
    filtered = []
    for ch_data in signal:
        if len(ch_data) < min_len_filt:
            ch_data_filt = ch_data  # Fallback: leave unfiltered.
        else:
            try:
                 ch_data_filt = sgl.filtfilt(b, a, ch_data)
            except ValueError as e: # Add specific error handling for filtfilt
                 print(f"    filtfilt error: {e} on data length {len(ch_data)}. Skipping filter.")
                 ch_data_filt = ch_data # Fallback if filtfilt fails
        filtered.append(ch_data_filt)
    signal = np.vstack([ch[None, :] for ch in filtered])

    # 3) Baseline removal (subtract mean from each channel).
    baseline_removed = []
    for ch_data in signal:
        mean_val = np.mean(ch_data) if ch_data.size > 0 else 0
        ch_data_bs = ch_data - mean_val
        baseline_removed.append(ch_data_bs)
    signal = np.vstack([ch[None, :] for ch in baseline_removed])

    # 4) Padding/Truncation - Keep disabled if target_length is None
    if target_length is not None:
        processed = []
        for ch_data in signal:
            ch_len = len(ch_data)
            if ch_len == 0:
                proc = np.zeros(target_length, dtype=np.float32)
            elif ch_len < target_length:
                pad_width = target_length - ch_len
                proc = np.pad(ch_data, (0, pad_width), mode='edge')
            else:
                proc = ch_data[:target_length]
            processed.append(proc.astype(np.float32))
        signal = np.vstack([p[None, :] for p in processed])

    return signal.astype(np.float32)


def split_into_modalities(signal_window):
    # Keep this function as is, it works on a window [channels, time]
    # If the signal is 1D, assume it represents a single modality (e.g., ECG).
    if signal_window.ndim == 1:
        # Reshape if 1D (e.g., single channel passed)
        signal_window = signal_window[None, :]
        if signal_window.shape[0] == 1: # Assume it's ECG if only one channel
            return {"ecg": signal_window.flatten()}
        else: # Should not happen if input is always [channels, time]
             return {}

    elif signal_window.ndim == 2:
        # If multi-channel, split into ECG, GSR, and EEG as desired.
        # Adjust indices based on your actual channel order in 'joined_data'
        # Assuming: 0=ECG, 1=GSR, 2=EEG (as per your original code)
        modalities = {}
        if signal_window.shape[0] > 0:
            modalities["ecg"] = signal_window[0, :]
        if signal_window.shape[0] > 1:
            modalities["gsr"] = signal_window[1, :]
        if signal_window.shape[0] > 2:
             # Assuming EEG is just the 3rd channel for simplicity here
             # If multiple EEG channels, you'd handle them differently in extract_features
            modalities["eeg"] = signal_window[2, :]
        return modalities
    else:
        # Handle unexpected dimensions
        print(f"Warning: Unexpected signal dimension {signal_window.ndim}")
        return {}


def discretize_label(label):
    # Keep this function as is
    flat_label = np.array(label).flatten()  # Ensure label is 1D.
    if flat_label.size == 2:
        valence, arousal = flat_label
    elif flat_label.size >= 3:
        valence, arousal = flat_label[1], flat_label[2]
    else:
        return "Unknown"

    if valence < 0 and arousal < 0:
        return "Low valence, Low arousal"
    elif valence < 0 and arousal >= 0:
        return "Low valence, High arousal"
    elif valence >= 0 and arousal < 0:
        return "High valence, Low arousal"
    else:
        return "High valence, High arousal"

# --- Feature Extraction Functions ---
def extract_features(signals_dict, fs=FS): # Use FS=128 defined above
    # Keep this function as is, it extracts features from a dictionary of signals (now windowed signals)
    """
    Extract features for multiple signals (ECG, GSR, EEG) from a dictionary.
    If advanced processing (e.g., HRV from ECG) fails, falls back to basic statistics.
    **Consider adding more advanced features here later.**
    """
    feat_list = []
    # --- ECG Features (Consider adding more HRV: nk.hrv_frequency, nk.hrv_nonlinear) ---
    if 'ecg' in signals_dict:
        ecg_signal = np.array(signals_dict['ecg']).flatten()
        # Check length against minimum needed by neurokit processing steps
        min_len_nk = 30 # Arbitrary minimum, adjust based on nk requirements for short signals
        if len(ecg_signal) < min_len_nk:
             # Fallback for very short windows
            basic_stats = [np.mean(ecg_signal), np.std(ecg_signal), np.min(ecg_signal), np.max(ecg_signal), np.median(ecg_signal)]
            ecg_feats = basic_stats + [0.0] * (10 - len(basic_stats)) # Pad to expected length
        else:
            try:
                ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=fs)
                _, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=fs, correct_artifacts=True) # Added correct_artifacts
                # Check if enough R-peaks were detected for HRV analysis
                if len(rpeaks.get("ECG_R_Peaks", [])) < 3: # Need at least a few peaks for HRV
                    raise ValueError("Not enough R-peaks detected for HRV.")
                hrv = nk.hrv(rpeaks, sampling_rate=fs, show=False)
                # Select a consistent set of HRV features (adjust as needed)
                feature_names = [
                    "RMSSD", "SDNN", "pNN50", "pNN20",
                    "LFHF", # Use LFHF instead of LF/HF if available
                    "HF", "LF", "VLF", "SDSD", "IQRNN" # Replaced HRV_TI with IQRNN
                 ]
                ecg_feats = []
                # Check HRV columns, handle potential missing columns or NaNs
                hrv_cols = hrv.columns
                for name in feature_names:
                     # Handle different naming conventions (e.g., LF/HF vs LFHF)
                    actual_name = name
                    if name == "LFHF" and "LF/HF" in hrv_cols:
                        actual_name = "LF/HF"
                    elif name == "LF/HF" and "LFHF" in hrv_cols:
                         actual_name = "LFHF"

                    if actual_name in hrv_cols and not np.isnan(hrv[actual_name].values[0]):
                         ecg_feats.append(hrv[actual_name].values[0])
                    else:
                         ecg_feats.append(0.0) # Use 0.0 for missing/NaN features

                 # Ensure we always have 10 features
                if len(ecg_feats) < 10:
                    ecg_feats.extend([0.0] * (10 - len(ecg_feats)))
                elif len(ecg_feats) > 10:
                     ecg_feats = ecg_feats[:10] # Truncate if more than 10 somehow

            except Exception as e:
                # Fallback: Compute basic statistics if advanced features fail.
                # print(f"Warning: ECG feature extraction failed for a window: {e}") # Optional warning
                basic_stats = [
                     np.mean(ecg_signal) if len(ecg_signal) > 0 else 0.0,
                     np.std(ecg_signal) if len(ecg_signal) > 1 else 0.0,
                     np.min(ecg_signal) if len(ecg_signal) > 0 else 0.0,
                     np.max(ecg_signal) if len(ecg_signal) > 0 else 0.0,
                     np.median(ecg_signal) if len(ecg_signal) > 0 else 0.0
                 ]
                # Pad to reach length 10.
                ecg_feats = basic_stats + [0.0] * (10 - len(basic_stats))
        feat_list.append(np.array(ecg_feats))

    # --- GSR Features (Consider nk.eda_phasic features: SCR_Peaks_Amplitude_Mean, etc.) ---
    if 'gsr' in signals_dict:
        gsr_signal = np.array(signals_dict['gsr']).flatten()
        if len(gsr_signal) > 2: # Need at least 3 points for stats like skew/kurtosis
            try:
                # Use nk.eda_process for more robust feature extraction
                eda_signals, info = nk.eda_process(gsr_signal, sampling_rate=fs)
                num_scr_peaks = len(info["SCR_Peaks"])
                mean_scr_amp = np.mean(eda_signals["SCR_Amplitude"]) if len(info["SCR_Peaks"]) > 0 else 0.0
                mean_scl = np.mean(eda_signals["EDA_Tonic"])
                std_scl = np.std(eda_signals["EDA_Tonic"])

                # Replace basic stats with these more informative ones + some basic stats
                gsr_feats = [
                     mean_scl, # Mean Tonic component
                     std_scl, # Std Dev Tonic
                     num_scr_peaks, # Number of SCR peaks in window
                     mean_scr_amp, # Mean SCR Amplitude
                     kurtosis(gsr_signal),
                     skew(gsr_signal),
                     np.std(gsr_signal) # Keep overall std dev
                 ]
            except Exception as e:
                 # Fallback to basic stats if eda_process fails
                 # print(f"Warning: GSR feature extraction failed: {e}") # Optional
                 num_scr_peaks = 0.0 # Default if failed
                 gsr_feats = [
                     np.mean(gsr_signal), np.std(gsr_signal),
                     np.min(gsr_signal), np.max(gsr_signal),
                     kurtosis(gsr_signal) if len(gsr_signal)>3 else 0.0,
                     skew(gsr_signal) if len(gsr_signal)>3 else 0.0,
                     num_scr_peaks
                 ]
        else:
             gsr_feats = [0.0] * 7 # Default for too short signal
        feat_list.append(np.array(gsr_feats))

    # --- EEG Features (Consider specific band powers: Alpha, Beta, Gamma, Theta, Delta & Ratios) ---
    if 'eeg' in signals_dict:
        eeg_data = np.array(signals_dict['eeg'])
        if eeg_data.ndim == 1:
            eeg_data = eeg_data[None, :]  # Ensure 2D shape [channels, time]

        all_channels_feats = []
        for ch in range(eeg_data.shape[0]):
            channel_signal = eeg_data[ch, :]
            # Need enough data points for reliable PSD calculation with Welch
            min_len_welch = 256 # Or adjust nperseg based on window size
            if len(channel_signal) < min_len_welch:
                 ch_feats = [0.0] * 8 # Default value for all EEG features if too short
            else:
                try:
                    # Hjorth parameters
                    activity = np.var(channel_signal)
                    diff_signal = np.diff(channel_signal)
                    mobility = np.sqrt(np.var(diff_signal) / (activity + 1e-8))
                    diff_diff_signal = np.diff(diff_signal)
                    complexity = np.sqrt(np.var(diff_diff_signal) / (np.var(diff_signal) + 1e-8)) / (mobility + 1e-8)

                    # Band power using Welch
                    # Adjust nperseg based on window length, ensure it's not longer than signal
                    nperseg = min(min_len_welch, len(channel_signal))
                    freqs, psd = welch(channel_signal, fs=fs, nperseg=nperseg)

                    def bandpower(f, pxx, fmin, fmax):
                        idx = np.logical_and(f >= fmin, f <= fmax)
                        if not np.any(idx): return 0.0 # Handle case where band is outside freq range
                         # Use trapezoid integration
                        return np.trapz(pxx[idx], x=f[idx])

                    # Define frequency bands
                    delta = bandpower(freqs, psd, 1, 4)
                    theta = bandpower(freqs, psd, 4, 8)
                    alpha = bandpower(freqs, psd, 8, 13) # Adjusted alpha range
                    beta  = bandpower(freqs, psd, 13, 30) # Adjusted beta range
                    gamma = bandpower(freqs, psd, 30, 45) # Limited gamma to match filtering

                    # Combine Hjorth and band powers
                    ch_feats = [activity, mobility, complexity, delta, theta, alpha, beta, gamma]

                except Exception as e:
                    # print(f"Warning: EEG feature extraction failed for channel {ch}: {e}") # Optional
                    ch_feats = [0.0] * 8 # Default if any error occurs

            all_channels_feats.append(ch_feats)

        # Average features across channels (if multiple EEG channels were processed)
        # If only one channel, this just returns that channel's features
        eeg_feats = np.mean(all_channels_feats, axis=0)
        feat_list.append(eeg_feats)

    # ---------- Combine all features ----------
    if len(feat_list) == 0:
        # Return array of zeros with expected total feature dimension
        # (10 ECG + 7 GSR + 8 EEG = 25 features)
        return np.zeros(25)
    # Concatenate features from all modalities processed
    combined_features = np.concatenate(feat_list)

    # Ensure consistent length (e.g., if one modality was missing)
    expected_len = 25 # Update this if you change the number of features per modality
    if len(combined_features) < expected_len:
        combined_features = np.pad(combined_features, (0, expected_len - len(combined_features)), 'constant')
    elif len(combined_features) > expected_len:
        combined_features = combined_features[:expected_len] # Should not happen with careful implementation

    return combined_features


# --- NEW FUNCTION: Build Windowed Dataset ---
def build_windowed_dataset(joined_data, labels_array):
    X_list = []
    y_list = []
    n_trials = joined_data.shape[1]

    print(f"Processing {n_trials} trials for windowing...")

    for i in range(n_trials):
        # 1. Get preprocessed signal for the *entire* trial
        #    Set target_length=None as windowing handles length implicitly
        trial_signal_processed = process_trial_signal(joined_data[0, i], target_length=None, fs=512)

        # --- ADD DEBUG PRINTS ---
        print(f"  Trial {i}: Processed signal shape = {trial_signal_processed.shape}")
        if trial_signal_processed.shape[1] > 0: # Avoid errors if shape is (n, 0)
            print(f"            Signal length = {trial_signal_processed.shape[1]} samples. Required >= {WINDOW_SAMPLES}")
        # --- END DEBUG PRINTS ---

        if trial_signal_processed.size == 0 or trial_signal_processed.shape[1] < WINDOW_SAMPLES:
            print(f"  Trial {i}: Skipping - Signal empty or too short.") # Simplified message
            continue # Skip trial if too short for even one window

        # 2. Get the corresponding label for this trial
        lbl = np.array(labels_array[0, i]).squeeze()
        if lbl.size < 3:
            # print(f"Warning: Trial {i} does not have enough label data. Skipping trial.")
            continue
        selected_label = lbl[1:3]  # use only valence and arousal
        discrete_label = discretize_label(selected_label)
        if discrete_label == "Unknown":
             # print(f"Warning: Trial {i} has unknown label. Skipping trial.")
             continue

        # 3. Iterate through windows of the trial signal
        num_samples_trial = trial_signal_processed.shape[1]
        for start in range(0, num_samples_trial - WINDOW_SAMPLES + 1, STEP_SAMPLES):
            end = start + WINDOW_SAMPLES
            signal_window = trial_signal_processed[:, start:end] # Shape [channels, WINDOW_SAMPLES]

            # 4. Extract features for this window
            signals_dict = split_into_modalities(signal_window)
            if not signals_dict: # Check if split failed
                 continue
            window_features = extract_features(signals_dict, fs=FS)

            # 5. Append features and the trial's label
            X_list.append(window_features)
            y_list.append(discrete_label)

    if len(X_list) == 0:
        print("Warning: No windows generated from the data.")
        return None, None

    X_array = np.vstack(X_list)
    y_array = np.array(y_list)
    print(f"Generated {X_array.shape[0]} windows from {n_trials} trials.")
    return X_array, y_array


# --- NEW FUNCTION: Load all patient data using windowing ---
def load_all_patients_windowed_data(num_patients=40):
    X_list_all = []
    y_list_all = []
    for patient in range(1, num_patients + 1):
        print(f"\nLoading and windowing patient {patient}...")
        try:
            data = load_patient_preprocessed_data(patient)
            joined_data = data['joined_data']
            labels_array = data['labels_ext_annotation']

            # Use the new windowed function
            X_patient_windows, y_patient_windows = build_windowed_dataset(joined_data, labels_array)

            if X_patient_windows is not None and y_patient_windows is not None:
                X_list_all.append(X_patient_windows)
                y_list_all.append(y_patient_windows)
            else:
                 print(f"No windows generated for patient {patient}.")
            # Clean up memory
            del data, joined_data, labels_array, X_patient_windows, y_patient_windows
            gc.collect()
        except Exception as e:
            print(f"Error processing patient {patient}: {e}")


    if not X_list_all: # Check if the list is empty
        raise ValueError("No windowed data loaded for any patient.")

    X_all_windows = np.vstack(X_list_all)
    y_all_windows = np.concatenate(y_list_all)

    print(f"\nTotal windows loaded from all patients: {X_all_windows.shape[0]}")
    return X_all_windows, y_all_windows

# --- Remove old loading functions not needed now ---
# def build_patient_data(joined_data, label_array): ... # Removed
# def load_all_patients_data(num_patients=40): ... # Removed
# def load_all_patients_raw_signal(num_patients=40, target_length=None): ... # Removed
# def pad_trials(trials, pad_mode='constant', constant_values=0): ... # Removed (padding within windowing/feature extraction if needed)


## Load and Prepare Data

In [None]:
# --- Load the NEW windowed data ---
print("Loading windowed data for all patients...")
# Set num_patients lower (e.g., 5) for initial testing to speed things up
X_windows, y_windows_desc = load_all_patients_windowed_data(num_patients=40) # ADJUST number of patients!

# --- Prepare labels for windowed data ---
print("\nPreparing labels...")
unique_labels_w = np.unique(y_windows_desc)
label_to_int_w = {label: idx for idx, label in enumerate(unique_labels_w)}
y_int_windows = np.array([label_to_int_w[label] for label in y_windows_desc])
# y_cat_windows = to_categorical(y_int_windows) # Use if needed for specific models like Keras NN

print(f"Window data shape: {X_windows.shape}")
print(f"Window labels shape: {y_int_windows.shape}")
print(f"Class distribution in windowed data: {np.bincount(y_int_windows)}")

# --- Split windowed data ---
# Stratify based on the window labels
X_train_w, X_test_w, y_train_labels_w, y_test_labels_w, y_int_train_w, y_int_test_w = train_test_split(
    X_windows, y_windows_desc, y_int_windows,
    test_size=0.2, random_state=42, stratify=y_int_windows
)

print(f"Train window data shape: {X_train_w.shape}")
print(f"Test window data shape: {X_test_w.shape}")
print(f"Class distribution in train windows: {np.bincount(y_int_train_w)}")
print(f"Class distribution in test windows: {np.bincount(y_int_test_w)}")

## Run Statistical Feature Extraction + ML classifiers

In [16]:
import os
import numpy as np
import gc
import h5py
import scipy.io
import scipy.signal as sgl
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.feature_selection import VarianceThreshold # Consider using feature selection later
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, balanced_accuracy_score # Make sure balanced is imported
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
# from sklearn.decomposition import PCA # Not using PCA now
# from imblearn.over_sampling import SMOTE # Not using SMOTE now
import xgboost as xgb # Using the updated XGBoost
# import lightgbm as lgb # Still removed as requested previously
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)


# --- Preprocessing for Windowed Features ---
# Apply Imputation and Scaling to the windowed data
# These variables (X_train_w, X_test_w, y_int_train_w, y_int_test_w etc.)
# should be available from the previous cell execution.
print("Preprocessing windowed data...")

# Imputation - Fit on training windows ONLY
# Handle case where X_train_w might be empty if loading failed
if 'X_train_w' in locals() and X_train_w.size > 0:
    imputer_w = SimpleImputer(strategy='mean').fit(X_train_w)
    X_train_w_imputed = imputer_w.transform(X_train_w)
    X_test_w_imputed = imputer_w.transform(X_test_w) # Use the same imputer for test set
    print("Imputation complete.")

    # Check for NaNs/Infs after imputation (important sanity check)
    if np.any(np.isnan(X_train_w_imputed)) or np.any(np.isinf(X_train_w_imputed)):
        print("Warning: NaNs or Infs found in training data AFTER imputation! Replacing with 0.")
        X_train_w_imputed = np.nan_to_num(X_train_w_imputed, nan=0.0, posinf=0.0, neginf=0.0)
    if np.any(np.isnan(X_test_w_imputed)) or np.any(np.isinf(X_test_w_imputed)):
        print("Warning: NaNs or Infs found in test data AFTER imputation! Replacing with 0.")
        X_test_w_imputed = np.nan_to_num(X_test_w_imputed, nan=0.0, posinf=0.0, neginf=0.0)

    # Global normalization - Fit on training windows ONLY
    scaler_w = StandardScaler().fit(X_train_w_imputed)
    X_train_final = scaler_w.transform(X_train_w_imputed) # Use this for training/CV
    X_test_final = scaler_w.transform(X_test_w_imputed)   # Use this for final testing
    y_train_final = y_int_train_w                         # Integer labels for training
    y_test_final = y_int_test_w                           # Integer labels for testing
    print("Scaling complete.")
    print(f"Final training data shape: {X_train_final.shape}")
    print(f"Final test data shape: {X_test_final.shape}")

    # --- Define Cross-Validation Strategy (Stratified) ---
    cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # --- Define Classifiers (Using class_weight='balanced') ---
    classifiers = {
        # "Logistic Regression": LogisticRegression(max_iter=2000, class_weight='balanced', random_state=42, solver='liblinear'),
        "SVM": SVC(probability=True, class_weight='balanced', random_state=42, cache_size=700), # Added cache_size
        "KNN": KNeighborsClassifier(n_neighbors=5), # Consider larger n_neighbors based on CV
        # "Decision Tree": DecisionTreeClassifier(class_weight='balanced', random_state=42, max_depth=20), # Limit depth
        "Random Forest": RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42, n_jobs=-1, max_depth=20), # Limit depth initially
        "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42, max_depth=5), # Limit depth
        "Extra Trees": ExtraTreesClassifier(n_estimators=100, class_weight='balanced', random_state=42, n_jobs=-1, max_depth=20), # Limit depth initially
        # "XGBoost": xgb.XGBClassifier(objective='multi:softprob', eval_metric='mlogloss', use_label_encoder=False, random_state=42, n_jobs=-1), # Keep commented if still issues
    }

    # --- Optional: Initial Cross-Validation (on WINDOWED imbalanced train data) ---
    results_cv = {}
    print("\nEvaluating classifiers with 5-fold Stratified cross-validation (on WINDOWED train data):")
    for clf_name, clf in classifiers.items():
        try:
            # Use the windowed X_train_final, y_train_final and stratified CV
            # Increase CV timeout if needed for larger data
            cv_scores = cross_val_score(clf, X_train_final, y_train_final, cv=cv_strategy, scoring='balanced_accuracy', n_jobs=-1, error_score='raise') # Add error_score='raise'
            results_cv[clf_name] = np.mean(cv_scores)
            print(f"{clf_name}: Mean Balanced Accuracy = {np.mean(cv_scores):.3f} (+/- {np.std(cv_scores):.3f})")
        except Exception as e:
            print(f"{clf_name} encountered an error during cross-validation: {e}")


    # --- Hyperparameter Tuning using GridSearchCV ---
    print("\n--- Starting Hyperparameter Tuning ---")
    # Adjusted grids slightly, focusing on parameters likely affected by more data
    param_grids = {
        "Extra Trees": {
            'n_estimators': [200, 400], # Increased estimators
            'max_depth': [30, 40, None], # Deeper trees might be possible
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 3],
            'class_weight': ['balanced']
        },
        "Random Forest": {
            'n_estimators': [200, 400],
            'max_depth': [30, 40, None],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 3],
            'class_weight': ['balanced']
        },
        "SVM": {
            'C': [1, 10, 50], # Keep C range reasonable
            'gamma': ['scale', 0.01], # Scale is often good, added 0.01
            'kernel': ['rbf'],
            'class_weight': ['balanced']
         },
         "KNN": {
            'n_neighbors': [9, 15, 21, 29] # Explore more neighbors
         }
        # Add XGBoost back here if it's working
    }

    best_estimators = {}
    best_cv_scores = {}

    # Select models for tuning based on the NEW CV results on windowed data
    if results_cv:
        sorted_cv = sorted(results_cv.items(), key=lambda item: item[1], reverse=True)
        # Adjust threshold based on observed CV results
        tune_threshold = max(0.30, np.mean(list(results_cv.values())) - 0.05) if results_cv else 0.30
        models_to_tune = [name for name, score in sorted_cv if name in param_grids and score >= tune_threshold]
        print(f"\nModels selected for tuning based on initial CV (Windowed Data, Balanced Acc >= {tune_threshold:.3f}): {models_to_tune}")
    else:
        print("\nNo initial CV results available, attempting to tune all models defined in param_grids.")
        models_to_tune = [name for name in param_grids if name in classifiers]


    for name in models_to_tune:
        if name in classifiers and name in param_grids:
            print(f"\nTuning {name}...")
            clf = classifiers[name]
            # Use windowed data and stratified CV for tuning
            # Reduce verbosity if output is too long
            grid_search = GridSearchCV(estimator=clf, param_grid=param_grids[name],
                                       cv=cv_strategy, # Use stratified CV strategy
                                       n_jobs=-1, verbose=1, scoring='balanced_accuracy')
            try:
                # Fit on windowed training data
                grid_search.fit(X_train_final, y_train_final)
                best_estimators[name] = grid_search.best_estimator_
                best_cv_scores[name] = grid_search.best_score_
                print(f"Best parameters for {name}: {grid_search.best_params_}")
                print(f"Best CV balanced accuracy for {name} (on windowed data): {grid_search.best_score_:.4f}")
            except Exception as e:
                print(f"GridSearchCV for {name} failed: {e}")
        else:
             print(f"Skipping tuning for {name} (not in classifiers or param_grids defined for tuning)")


    # --- Select the overall best model based on tuning CV score ---
    if best_estimators: # Check if any tuning was successful
        valid_scores = {name: score for name, score in best_cv_scores.items() if score is not None and not np.isnan(score)}
        if valid_scores:
            best_model_name = max(valid_scores, key=valid_scores.get)
            best_overall_model = best_estimators[best_model_name]
            print(f"\n--- Best Overall Model based on Tuning CV Score (Windowed Data): {best_model_name} ({valid_scores[best_model_name]:.4f}) ---")

            # --- Final Evaluation on Test Set using the BEST Tuned Model (Windowed Data) ---
            print(f"\nEvaluating best model ({best_model_name}) on the test windows...")
            # Predict on the scaled test set windows (X_test_final)
            predictions = best_overall_model.predict(X_test_final)
            final_acc = accuracy_score(y_test_final, predictions) # Evaluate against test window labels
            final_balanced_acc = balanced_accuracy_score(y_test_final, predictions)

            print(f"\nFinal {best_model_name} Accuracy on Test Windows: {final_acc:.4f}")
            print(f"Final {best_model_name} Balanced Accuracy on Test Windows: {final_balanced_acc:.4f}") # Key metric for window performance
            print("\nConfusion Matrix (Test Windows):")
            print(confusion_matrix(y_test_final, predictions))
            print("\nClassification Report (Test Windows):")
            # Ensure label_to_int_w exists from the previous cell
            if 'label_to_int_w' in locals():
                 target_names_w = [label for label, idx in sorted(label_to_int_w.items(), key=lambda item: item[1])]
                 print(classification_report(y_test_final, predictions, target_names=target_names_w, digits=3))
            else:
                 print("Warning: label_to_int_w not found. Cannot print target names in report.")
                 print(classification_report(y_test_final, predictions, digits=3))

            # Note: This evaluation is per-window. To get per-trial accuracy, you would need
            # to group predictions by trial and use a voting/aggregation strategy.
        else:
            print("\nNo models completed tuning successfully with a valid score (Windowed Data). Cannot evaluate final performance.")
    else:
        print("\nNo models were successfully tuned or tuning list was empty (Windowed Data). Cannot evaluate final performance.")

else:
     print("\nError: Windowed training data ('X_train_w') not found or is empty. Cannot proceed with ML pipeline.")
     # Add any necessary cleanup or exit logic here if required

Preprocessing windowed data...
Imputation complete.
Scaling complete.
Final training data shape: (45554, 25)
Final test data shape: (11389, 25)

Evaluating classifiers with 5-fold Stratified cross-validation (on WINDOWED train data):
SVM: Mean Balanced Accuracy = 0.462 (+/- 0.009)
KNN: Mean Balanced Accuracy = 0.321 (+/- 0.004)
Random Forest: Mean Balanced Accuracy = 0.366 (+/- 0.005)
Gradient Boosting: Mean Balanced Accuracy = 0.324 (+/- 0.004)
Extra Trees: Mean Balanced Accuracy = 0.492 (+/- 0.014)

--- Starting Hyperparameter Tuning ---

Models selected for tuning based on initial CV (Windowed Data, Balanced Acc >= 0.343): ['Extra Trees', 'SVM', 'Random Forest']

Tuning Extra Trees...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best parameters for Extra Trees: {'class_weight': 'balanced', 'max_depth': 30, 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 400}
Best CV balanced accuracy for Extra Trees (on windowed data): 0.4853

Tuning SVM...
Fitting 5 