## Imports (common)

In [14]:
import os
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from scipy.signal import butter, filtfilt



## Data Loading & Preprocessing

In [None]:
# --- Data Loading Function ---
def load_patient_preprocessed_data(patient_number):
    base_dir = r"C:\Users\ferri\Downloads\PoliTO\Tesi\DSs\Emotion-Stress\AMIGOS"
    file_path = os.path.join(
        base_dir, "Data preprocessed",
        f"Data_Preprocessed_P{patient_number:02d}",
        f"Data_Preprocessed_P{patient_number:02d}.mat"
    )
    data = scipy.io.loadmat(file_path)
    return data


# Fixed target length for each trial signal
TARGET_LENGTH = 1000

# --- Preprocessing functions for CNN pipeline ---

def process_trial_signals_together(trial_data, target_length=1000, fs=128):
    signal_types = ['EEG', 'ECG', 'GSR']
    processed_signals = []

    for idx, signal_type in enumerate(signal_types):
        raw_signal = np.array(trial_data[idx], dtype=float).squeeze()
        filtered_signal = filter_signal(raw_signal, signal_type, fs)
        
        # resize or pad the signal
        if len(filtered_signal) < target_length:
            pad_width = target_length - len(filtered_signal)
            filtered_signal = np.pad(filtered_signal, (0, pad_width), mode='constant')
        else:
            filtered_signal = filtered_signal[:target_length]
        
        processed_signals.append(filtered_signal)

    # Combine signals into a single multi-channel array (shape: target_length x num_signals)
    return np.vstack(processed_signals).T

def discretize_label(label):
    """
    Convert a continuous label [1, valence, arousal] into a descriptive class.
    The first element is ignored.
    """
    if label.size < 3:
        return "Unknown"
    valence = label[1]
    arousal = label[2]
    if valence < 0 and arousal < 0:
        return "Low valence, Low arousal"
    elif valence < 0 and arousal >= 0:
        return "Low valence, High arousal"
    elif valence >= 0 and arousal < 0:
        return "High valence, Low arousal"
    else:
        return "High valence, High arousal"

# --- Feature Extraction Functions ---
def extract_features(signal):
    # Flatten the signal and check if it's empty.
    signal = signal.flatten()
    if signal.size == 0:
        return np.zeros(5)
    features = {
        "mean": np.mean(signal),
        "std": np.std(signal),
        "max": np.max(signal),
        "min": np.min(signal),
        "median": np.median(signal)
    }
    return np.array(list(features.values()))

def build_patient_data(joined_data, label_array):
    X_list = []
    y_list = []
    n_trials = joined_data.shape[1]
    for i in range(n_trials):
        trial_data = joined_data[0, i]
        trial_data = np.array(trial_data, dtype=float).squeeze()
        features = extract_features(trial_data)
        
        lbl = np.array(label_array[0, i]).squeeze()
        if lbl.size == 0:
            print(f"Warning: Trial {i} has empty label. Skipping trial.")
            continue
        if lbl.ndim == 2:
            lbl_processed = np.mean(lbl, axis=0)
        elif lbl.ndim == 1:
            lbl_processed = lbl
        else:
            lbl_processed = lbl.flatten()[0]
        discrete_label = discretize_label(lbl_processed)
        
        X_list.append(features)
        y_list.append(discrete_label)
    if len(X_list) == 0:
        return None, None
    return np.vstack(X_list), np.array(y_list)

def load_all_patients_data(num_patients=40):
    X_list = []
    y_list = []
    for patient in range(1, num_patients+1):
        print(f"Loading patient {patient}")
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']
        X_patient, y_patient = build_patient_data(joined_data, labels_array)
        if X_patient is not None and y_patient is not None:
            X_list.append(X_patient)
            y_list.append(y_patient)
    if len(X_list) == 0:
        raise ValueError("No patient data loaded.")
    X_all = np.vstack(X_list)
    y_all = np.concatenate(y_list)
    return X_all, y_all

def butter_bandpass(lowcut, highcut, fs, order=4):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_lowpass(cutoff, fs, order=4):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low')
    return b, a

def filter_signal(data, signal_type, fs=128):
    nyquist = 0.5 * fs
    if signal_type == 'EEG':
        low, high = 1 / nyquist, 40 / nyquist
        b, a = butter(4, [low, high], btype='band')
    elif signal_type == 'ECG':
        low, high = 0.5 / nyquist, 40 / nyquist
        b, a = butter(4, [low, high], btype='band')
    elif signal_type == 'GSR':
        cutoff = 2 / nyquist
        b, a = butter(4, cutoff, btype='low')
    else:
        raise ValueError("Unknown signal type.")

    padlen = 3 * max(len(a), len(b))
    if len(data) <= padlen:
        return data
    else:
        return filtfilt(b, a, data)

def load_all_patients_raw_signal(num_patients=40, target_length=1000, fs=128):
    X_list, y_list = [], []

    for patient in range(1, num_patients + 1):
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']

        for trial in range(joined_data.shape[1]):
            trial_data = joined_data[0, trial]
            lbl = np.array(labels_array[0, trial]).squeeze()

            if lbl.size == 0:
                print(f"Skipping empty label trial {trial}")
                continue

            if lbl.ndim == 2:
                lbl_processed = np.mean(lbl, axis=0)
            else:
                lbl_processed = lbl.flatten()

            discrete_label = discretize_label(lbl_processed)

            processed_signal = process_trial_signals_together(trial_data, target_length, fs)

            X_list.append(processed_signal)
            y_list.append(discrete_label)

    if not X_list:
        raise ValueError("No data loaded!")

    return np.array(X_list), np.array(y_list)

def butter_highpass(cutoff, fs, order=4):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high')
    return b, a

def apply_filter(data, filter_type='bandpass', lowcut=None, highcut=None, cutoff=None, fs=128, order=4):
    if filter_type == 'bandpass':
        b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    elif filter_type == 'lowpass':
        b, a = butter_lowpass(cutoff, fs, order=order)
    elif filter_type == 'highpass':
        b, a = butter_highpass(cutoff, fs, order=order)
    else:
        raise ValueError("Invalid filter type. Choose 'bandpass', 'lowpass', or 'highpass'.")

    filtered_data = filtfilt(b, a, data)
    return filtered_data


## Load and Prepare Data

In [17]:
X_raw, y_desc = load_all_patients_data(num_patients=40)
unique_labels = np.unique(y_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int = np.array([label_to_int[label] for label in y_desc])
num_classes = len(unique_labels)

# Split once (used by all models)
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_raw, to_categorical(y_int), y_int, test_size=0.2, random_state=42, stratify=y_int
)


Loading patient 1
Loading patient 2
Loading patient 3
Loading patient 4
Loading patient 5
Loading patient 6
Loading patient 7
Loading patient 8
Loading patient 9
Loading patient 10
Loading patient 11
Loading patient 12
Loading patient 13
Loading patient 14
Loading patient 15
Loading patient 16
Loading patient 17
Loading patient 18
Loading patient 19
Loading patient 20
Loading patient 21
Loading patient 22
Loading patient 23
Loading patient 24
Loading patient 25
Loading patient 26
Loading patient 27
Loading patient 28
Loading patient 29
Loading patient 30
Loading patient 31
Loading patient 32
Loading patient 33
Loading patient 34
Loading patient 35
Loading patient 36
Loading patient 37
Loading patient 38
Loading patient 39
Loading patient 40


## Feature Extraction

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping

# CNN feature extraction function
def extract_cnn_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = Conv1D(16, kernel_size=5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(32, kernel_size=5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(64, kernel_size=5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    features = Flatten(name='features')(x)
    x = Dense(128, activation='relu')(features)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

# LSTM feature extraction function
def extract_lstm_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = LSTM(64, return_sequences=True)(inputs)
    x = Dropout(0.2)(x)
    x = LSTM(64)(x)
    features = Dense(128, activation='relu', name='features')(x)
    x = Dropout(0.5)(features)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

# GRU feature extraction function
def extract_gru_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = GRU(64, return_sequences=True)(inputs)
    x = Dropout(0.2)(x)
    x = GRU(64)(x)
    features = Dense(128, activation='relu', name='features')(x)
    x = Dropout(0.5)(features)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

## Model Training and Evaluation Functions

In [19]:
def train_evaluate_classifiers(X_tr, X_te, y_tr, y_te):
    classifiers = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "SVM": SVC(probability=True),
        "KNN": KNeighborsClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(n_estimators=100)
    }

    for clf_name, clf in classifiers.items():
        clf.fit(X_tr, y_tr)
        pred = clf.predict(X_te)
        acc = accuracy_score(y_te, pred)
        print(f"{clf_name}: Accuracy = {acc:.3f}")
        print(confusion_matrix(y_te, pred))
        print(classification_report(y_te, pred))
        try:
            prob = clf.predict_proba(X_te)
            auc = roc_auc_score(to_categorical(y_te), prob, multi_class="ovr", average="macro")
            print("AUC:", auc)
        except Exception as e:
            print("AUC Error:", e)
            

# --- Build CNN Model ---
def create_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=64, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def create_lstm_model(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

def create_gru_model(input_shape, num_classes):
    model = Sequential()
    model.add(GRU(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(GRU(64))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

## Run Statistical Feature Extraction + ML classifiers

In [7]:
# Correct feature extraction:
X_train_stat = np.array([extract_features(signal) for signal in X_train])
X_test_stat = np.array([extract_features(signal) for signal in X_test])

# Impute missing values
imputer = SimpleImputer(strategy='mean').fit(X_train_stat)
X_train_stat_imputed = imputer.transform(X_train_stat)
X_test_stat_imputed = imputer.transform(X_test_stat)

# Standardize features
scaler = StandardScaler().fit(X_train_stat_imputed)
X_train_stat_scaled = scaler.transform(X_train_stat_imputed)
X_test_stat_scaled = scaler.transform(X_test_stat_imputed)

# Train and evaluate classifiers
train_evaluate_classifiers(X_train_stat_scaled, X_test_stat_scaled, y_int_train, y_int_test)


Logistic Regression: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.4447409467617899
SVM: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest: Accuracy = 0.615
[[ 0  0  0 14]
 [ 1  1  0 28]
 [ 0  0  0  5]
 [ 1  7  1 90]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.12      0.03      0.05        30
           2       0.00      0.00      0.00         5
           3       0.66      0.91      0.76        99

    accuracy                           0.61       148
   macro avg       0.20      0.24      0.20       148
weighted avg       0.46      0.61      0.52       148

AUC: 0.5669143802914266


##  CNN Feature Extraction + ML classifiers

In [20]:
# Load and preprocess signals (EEG, ECG, GSR together)
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Check shape for CNN/LSTM/GRU (samples, timesteps, channels)
print("X_signal shape:", X_signal.shape)  # should be (samples, target_length, 3)

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run CNN feature extraction + ML classifiers
train_features, test_features = extract_cnn_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)


Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13
Skipping empty label trial 14
Skipping empty label trial 15
Skipping empty label trial 16
Skipping empty label trial 17
Skipping empty label trial 18
Skipping empty label trial 19
Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13


ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression: Accuracy = 0.095
[[14  0  0  0]
 [30  0  0  0]
 [ 5  0  0  0]
 [99  0  0  0]]
              precision    recall  f1-score   support

           0       0.09      1.00      0.17        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00        99

    accuracy                           0.09       148
   macro avg       0.02      0.25      0.04       148
weighted avg       0.01      0.09      0.02       148

AUC: 0.5
SVM: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


KNN: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Decision Tree: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## LSTM Feature Extraction + ML classifiers

In [21]:
# Load and preprocess signals (EEG, ECG, GSR together)
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Verify shape for LSTM input (samples, timesteps, channels)
print("X_signal shape:", X_signal.shape)  # Should be (samples, TARGET_LENGTH, 3)

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run LSTM feature extraction + ML classifiers
train_features, test_features = extract_lstm_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)


Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13
Skipping empty label trial 14
Skipping empty label trial 15
Skipping empty label trial 16
Skipping empty label trial 17
Skipping empty label trial 18
Skipping empty label trial 19
Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

## GRU Feature Extraction + ML classifiers

In [22]:
# Load and preprocess signals (EEG, ECG, GSR together)
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Verify shape for GRU input (samples, timesteps, channels)
print("X_signal shape:", X_signal.shape)  # Should be (samples, TARGET_LENGTH, 3)

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run GRU feature extraction + ML classifiers
train_features, test_features = extract_gru_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)

Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13
Skipping empty label trial 14
Skipping empty label trial 15
Skipping empty label trial 16
Skipping empty label trial 17
Skipping empty label trial 18
Skipping empty label trial 19
Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

## CNN/LSTM/GRU end-to-end model evaluation

In [23]:
# Load and preprocess signals (EEG, ECG, GSR together)
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Verify final data shape for CNN/LSTM/GRU (samples, timesteps, channels=3)
print("X_signal shape:", X_signal.shape)  # expected (samples, TARGET_LENGTH, 3)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_signal, y_cat_signal, test_size=0.2, random_state=42, stratify=y_int_signal
)

# Consistent input shape across models
input_shape = X_train.shape[1:]

# --- CNN standalone ---
print("\n--- CNN standalone ---")
cnn_model = create_cnn_model(input_shape, len(unique_labels))
cnn_model.fit(
    X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
)
cnn_eval = cnn_model.evaluate(X_test, y_test, verbose=0)
print("CNN evaluation (loss, accuracy):", cnn_eval)

# --- LSTM standalone ---
print("\n--- LSTM standalone ---")
lstm_model = create_lstm_model(input_shape, len(unique_labels))
lstm_model.fit(
    X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
)
lstm_eval = lstm_model.evaluate(X_test, y_test, verbose=0)
print("LSTM evaluation (loss, accuracy):", lstm_eval)

# --- GRU standalone ---
print("\n--- GRU standalone ---")
gru_model = create_gru_model(input_shape, len(unique_labels))
gru_model.fit(
    X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
)
gru_eval = gru_model.evaluate(X_test, y_test, verbose=0)
print("GRU evaluation (loss, accuracy):", gru_eval)


Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13
Skipping empty label trial 14
Skipping empty label trial 15
Skipping empty label trial 16
Skipping empty label trial 17
Skipping empty label trial 18
Skipping empty label trial 19
Skipping empty label trial 0
Skipping empty label trial 1
Skipping empty label trial 2
Skipping empty label trial 3
Skipping empty label trial 4
Skipping empty label trial 5
Skipping empty label trial 6
Skipping empty label trial 7
Skipping empty label trial 8
Skipping empty label trial 9
Skipping empty label trial 10
Skipping empty label trial 11
Skipping empty label trial 12
Skipping empty label trial 13


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.0773 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0837 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0841 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0715 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.1090 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0535 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/s

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 302ms/step - accuracy: 0.5300 - loss: 1.3833 - val_accuracy: 0.6134 - val_loss: 1.3758
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 282ms/step - accuracy: 0.7088 - loss: 1.3702 - val_accuracy: 0.6134 - val_loss: 1.3653
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 284ms/step - accuracy: 0.6672 - loss: 1.3596 - val_accuracy: 0.6134 - val_loss: 1.3553
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 280ms/step - accuracy: 0.6838 - loss: 1.3464 - val_accuracy: 0.6134 - val_loss: 1.3453
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 278ms/step - accuracy: 0.6919 - loss: 1.3336 - val_accuracy: 0.6134 - val_loss: 1.3356
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 282ms/step - accuracy: 0.6918 - loss: 1.3221 - val_accuracy: 0.6134 - val_loss: 1.3263
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━