## Imports (common)

In [1]:
import os
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

## Data Loading & Preprocessing

In [9]:

# --- Data Loading Function ---
def load_patient_preprocessed_data(patient_number):
    base_dir = r"C:\Users\ferri\Downloads\PoliTO\Tesi\DSs\Emotion-Stress\AMIGOS"
    file_path = os.path.join(
        base_dir, "Data preprocessed",
        f"Data_Preprocessed_P{patient_number:02d}",
        f"Data_Preprocessed_P{patient_number:02d}.mat"
    )
    data = scipy.io.loadmat(file_path)
    return data


# Fixed target length for each trial signal
TARGET_LENGTH = 1000

# --- Preprocessing functions for CNN pipeline ---

def process_trial_signal(signal, target_length=TARGET_LENGTH):
    """
    Convert a trial's raw signal into a 1D array of fixed length.
    If the signal is shorter than target_length, pad with zeros;
    if longer, truncate.
    """
    signal = np.array(signal, dtype=float).squeeze()
    if signal.size == 0:
        return np.zeros(target_length)
    if len(signal) < target_length:
        pad_width = target_length - len(signal)
        signal = np.pad(signal, (0, pad_width), mode='constant')
    else:
        signal = signal[:target_length]
    return signal


def discretize_label(label):
    """
    Convert a continuous label [1, valence, arousal] into a descriptive class.
    The first element is ignored.
    """
    if label.size < 3:
        return "Unknown"
    valence = label[1]
    arousal = label[2]
    if valence < 0 and arousal < 0:
        return "Low valence, Low arousal"
    elif valence < 0 and arousal >= 0:
        return "Low valence, High arousal"
    elif valence >= 0 and arousal < 0:
        return "High valence, Low arousal"
    else:
        return "High valence, High arousal"

# --- Feature Extraction Functions ---
def extract_features(signal):
    # Flatten the signal and check if it's empty.
    signal = signal.flatten()
    if signal.size == 0:
        return np.zeros(5)
    features = {
        "mean": np.mean(signal),
        "std": np.std(signal),
        "max": np.max(signal),
        "min": np.min(signal),
        "median": np.median(signal)
    }
    return np.array(list(features.values()))

def build_dataset(joined_data, labels_array, target_length=1000):
    """
    Processes raw joined_data and labels_array to build dataset (X, y).

    Parameters:
        joined_data (np.array): Raw data array from patient data.
        labels_array (np.array): Corresponding labels array.
        target_length (int): Fixed length for signal processing.

    Returns:
        X (np.array): Processed signals, shape: (n_trials, target_length)
        y (np.array): Corresponding discretized labels
    """
    X_list = []
    y_list = []
    n_trials = joined_data.shape[1]

    for i in range(n_trials):
        trial_data = joined_data[0, i]
        signal = process_trial_signal(trial_data, target_length)

        lbl = np.array(labels_array[0, i]).squeeze()
        if lbl.size == 0:
            print(f"Warning: Trial {i} has empty label. Skipping trial.")
            continue

        if lbl.ndim == 2:
            lbl_processed = np.mean(lbl, axis=0)
        elif lbl.ndim == 1:
            lbl_processed = lbl
        else:
            lbl_processed = lbl.flatten()[0]

        discrete_label = discretize_label(lbl_processed)
        X_list.append(signal)
        y_list.append(discrete_label)

    if len(X_list) == 0:
        return None, None

    X_array = np.vstack([x[None, :] for x in X_list])
    y_array = np.array(y_list)

    return X_array, y_array


def build_patient_data(joined_data, label_array):
    X_list = []
    y_list = []
    n_trials = joined_data.shape[1]
    for i in range(n_trials):
        trial_data = joined_data[0, i]
        trial_data = np.array(trial_data, dtype=float).squeeze()
        features = extract_features(trial_data)
        
        lbl = np.array(label_array[0, i]).squeeze()
        if lbl.size == 0:
            print(f"Warning: Trial {i} has empty label. Skipping trial.")
            continue
        if lbl.ndim == 2:
            lbl_processed = np.mean(lbl, axis=0)
        elif lbl.ndim == 1:
            lbl_processed = lbl
        else:
            lbl_processed = lbl.flatten()[0]
        discrete_label = discretize_label(lbl_processed)
        
        X_list.append(features)
        y_list.append(discrete_label)
    if len(X_list) == 0:
        return None, None
    return np.vstack(X_list), np.array(y_list)

def load_all_patients_data(num_patients=40):
    X_list = []
    y_list = []
    for patient in range(1, num_patients+1):
        print(f"Loading patient {patient}")
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']
        X_patient, y_patient = build_patient_data(joined_data, labels_array)
        if X_patient is not None and y_patient is not None:
            X_list.append(X_patient)
            y_list.append(y_patient)
    if len(X_list) == 0:
        raise ValueError("No patient data loaded.")
    X_all = np.vstack(X_list)
    y_all = np.concatenate(y_list)
    return X_all, y_all

# Load raw signals for CNN/LSTM/GRU
def load_all_patients_raw_signal(num_patients=40, target_length=1000):
    X_list, y_list = [], []
    for patient in range(1, num_patients + 1):
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']
        
        X_patient, y_patient = build_dataset(joined_data, labels_array, target_length=target_length)
        
        if X_patient is not None:
            X_list.append(X_patient)
            y_list.append(y_patient)

    if not X_list:
        raise ValueError("No data loaded!")

    X_all = np.vstack(X_list)
    y_all = np.concatenate(y_list)

    return X_all, y_all

## Load and Prepare Data

In [3]:
X_raw, y_desc = load_all_patients_data(num_patients=40)
unique_labels = np.unique(y_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int = np.array([label_to_int[label] for label in y_desc])
num_classes = len(unique_labels)

# Split once (used by all models)
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_raw, to_categorical(y_int), y_int, test_size=0.2, random_state=42, stratify=y_int
)


Loading patient 1
Loading patient 2
Loading patient 3
Loading patient 4
Loading patient 5
Loading patient 6
Loading patient 7
Loading patient 8
Loading patient 9
Loading patient 10
Loading patient 11
Loading patient 12
Loading patient 13
Loading patient 14
Loading patient 15
Loading patient 16
Loading patient 17
Loading patient 18
Loading patient 19
Loading patient 20
Loading patient 21
Loading patient 22
Loading patient 23
Loading patient 24
Loading patient 25
Loading patient 26
Loading patient 27
Loading patient 28
Loading patient 29
Loading patient 30
Loading patient 31
Loading patient 32
Loading patient 33
Loading patient 34
Loading patient 35
Loading patient 36
Loading patient 37
Loading patient 38
Loading patient 39
Loading patient 40


## Feature Extraction

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping

# CNN feature extraction function
def extract_cnn_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = Conv1D(16, kernel_size=5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(32, kernel_size=5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(64, kernel_size=5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    features = Flatten(name='features')(x)
    x = Dense(128, activation='relu')(features)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

# LSTM feature extraction function
def extract_lstm_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = LSTM(64, return_sequences=True)(inputs)
    x = Dropout(0.2)(x)
    x = LSTM(64)(x)
    features = Dense(128, activation='relu', name='features')(x)
    x = Dropout(0.5)(features)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

# GRU feature extraction function
def extract_gru_features(X_train, y_train, X_test, num_classes):
    input_shape = X_train.shape[1:]
    inputs = Input(shape=input_shape)
    x = GRU(64, return_sequences=True)(inputs)
    x = Dropout(0.2)(x)
    x = GRU(64)(x)
    features = Dense(128, activation='relu', name='features')(x)
    x = Dropout(0.5)(features)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[early_stop], verbose=1)

    extractor = Model(inputs=model.input, outputs=model.get_layer('features').output)
    return extractor.predict(X_train), extractor.predict(X_test)

## Model Training and Evaluation Functions

In [5]:
def train_evaluate_classifiers(X_tr, X_te, y_tr, y_te):
    classifiers = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "SVM": SVC(probability=True),
        "KNN": KNeighborsClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(n_estimators=100)
    }

    for clf_name, clf in classifiers.items():
        clf.fit(X_tr, y_tr)
        pred = clf.predict(X_te)
        acc = accuracy_score(y_te, pred)
        print(f"{clf_name}: Accuracy = {acc:.3f}")
        print(confusion_matrix(y_te, pred))
        print(classification_report(y_te, pred))
        try:
            prob = clf.predict_proba(X_te)
            auc = roc_auc_score(to_categorical(y_te), prob, multi_class="ovr", average="macro")
            print("AUC:", auc)
        except Exception as e:
            print("AUC Error:", e)
            

# --- Build CNN Model ---
def create_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=64, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def create_lstm_model(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

def create_gru_model(input_shape, num_classes):
    model = Sequential()
    model.add(GRU(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(GRU(64))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

## Run Statistical Feature Extraction + ML classifiers

In [7]:
# Correct feature extraction:
X_train_stat = np.array([extract_features(signal) for signal in X_train])
X_test_stat = np.array([extract_features(signal) for signal in X_test])

# Impute missing values
imputer = SimpleImputer(strategy='mean').fit(X_train_stat)
X_train_stat_imputed = imputer.transform(X_train_stat)
X_test_stat_imputed = imputer.transform(X_test_stat)

# Standardize features
scaler = StandardScaler().fit(X_train_stat_imputed)
X_train_stat_scaled = scaler.transform(X_train_stat_imputed)
X_test_stat_scaled = scaler.transform(X_test_stat_imputed)

# Train and evaluate classifiers
train_evaluate_classifiers(X_train_stat_scaled, X_test_stat_scaled, y_int_train, y_int_test)


Logistic Regression: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.4447409467617899
SVM: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest: Accuracy = 0.608
[[ 1  1  0 12]
 [ 1  2  0 27]
 [ 0  0  0  5]
 [ 4  6  2 87]]
              precision    recall  f1-score   support

           0       0.17      0.07      0.10        14
           1       0.22      0.07      0.10        30
           2       0.00      0.00      0.00         5
           3       0.66      0.88      0.76        99

    accuracy                           0.61       148
   macro avg       0.26      0.25      0.24       148
weighted avg       0.51      0.61      0.54       148

AUC: 0.5954855713614884


##  CNN Feature Extraction + ML classifiers

In [12]:
# Corrected reshaping for CNN/LSTM/GRU input
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Correct reshape for Conv1D/LSTM/GRU (samples, timesteps, channels)
X_signal = X_signal.reshape(X_signal.shape[0], TARGET_LENGTH, -1) 

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run CNN feature extraction and ML classifiers
train_features, test_features = extract_cnn_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)

Epoch 1/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - accuracy: 0.0908 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0661 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0797 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0810 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 5/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0931 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 6/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0777 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 7/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression: Accuracy = 0.095
[[14  0  0  0]
 [30  0  0  0]
 [ 5  0  0  0]
 [99  0  0  0]]
              precision    recall  f1-score   support

           0       0.09      1.00      0.17        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00        99

    accuracy                           0.09       148
   macro avg       0.02      0.25      0.04       148
weighted avg       0.01      0.09      0.02       148

AUC: 0.5
SVM: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


KNN: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Decision Tree: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest: Accuracy = 0.669
[[ 0  0  0 14]
 [ 0  0  0 30]
 [ 0  0  0  5]
 [ 0  0  0 99]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        30
           2       0.00      0.00      0.00         5
           3       0.67      1.00      0.80        99

    accuracy                           0.67       148
   macro avg       0.17      0.25      0.20       148
weighted avg       0.45      0.67      0.54       148

AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## LSTM Feature Extraction + ML classifiers

In [15]:
# Corrected reshaping for LSTM input
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Reshape correctly for LSTM (samples, timesteps, channels)
X_signal = X_signal.reshape(X_signal.shape[0], TARGET_LENGTH, -1)  # ✅ fixed here

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run LSTM feature extraction and ML classifiers
train_features, test_features = extract_lstm_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)


Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 292ms/step - accuracy: 0.5748 - loss: 1.3401 - val_accuracy: 0.6134 - val_loss: 1.3760
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 265ms/step - accuracy: 0.6482 - loss: 1.3719 - val_accuracy: 0.6134 - val_loss: 1.3657
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 266ms/step - accuracy: 0.6831 - loss: 1.3589 - val_accuracy: 0.6134 - val_loss: 1.3554
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 265ms/step - accuracy: 0.6883 - loss: 1.3467 - val_accuracy: 0.6134 - val_loss: 1.3455
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 269ms/step - accuracy: 0.6745 - loss: 1.3363 - val_accuracy: 0.6134 - val_loss: 1.3358
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 280ms/step - accuracy: 0.6973 - loss: 1.3217 - val_accuracy: 0.6134 - val_loss: 1.3261
Epoch 7/10
[1m15/15[0m [3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

## GRU Feature Extraction + ML classifiers

In [16]:
# Corrected reshaping for GRU input
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Reshape correctly for GRU (samples, timesteps, channels)
X_signal = X_signal.reshape(X_signal.shape[0], TARGET_LENGTH, -1)  # ✅ fixed here

# Split into training and test sets
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X_signal, y_cat_signal, y_int_signal,
    test_size=0.2, random_state=42, stratify=y_int_signal
)

# Run GRU feature extraction and ML classifiers
train_features, test_features = extract_gru_features(X_train, y_train, X_test, len(unique_labels))
train_evaluate_classifiers(train_features, test_features, y_int_train, y_int_test)


Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 323ms/step - accuracy: 0.5440 - loss: 1.3842 - val_accuracy: 0.6134 - val_loss: 1.3757
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 304ms/step - accuracy: 0.6967 - loss: 1.3704 - val_accuracy: 0.6134 - val_loss: 1.3654
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 301ms/step - accuracy: 0.6756 - loss: 1.3591 - val_accuracy: 0.6134 - val_loss: 1.3553
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 300ms/step - accuracy: 0.6859 - loss: 1.3466 - val_accuracy: 0.6134 - val_loss: 1.3454
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 303ms/step - accuracy: 0.6796 - loss: 1.3351 - val_accuracy: 0.6134 - val_loss: 1.3358
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 299ms/step - accuracy: 0.6983 - loss: 1.3207 - val_accuracy: 0.6134 - val_loss: 1.3262
Epoch 7/10
[1m15/15[0m [3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

## CNN/LSTM/GRU end-to-end model evaluation

In [None]:
# Load and preprocess raw signals correctly once for all models
X_signal, y_signal_desc = load_all_patients_raw_signal(num_patients=40, target_length=TARGET_LENGTH)

# Encode labels
unique_labels = np.unique(y_signal_desc)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int_signal = np.array([label_to_int[label] for label in y_signal_desc])
y_cat_signal = to_categorical(y_int_signal)

# Reshape for models: CNN/LSTM/GRU require shape (samples, timesteps, channels)
X_signal = X_signal.reshape(X_signal.shape[0], TARGET_LENGTH, -1)  # ✅ consistent shape

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_signal, y_cat_signal, test_size=0.2, random_state=42, stratify=y_int_signal
)

input_shape = X_train.shape[1:]

# --- CNN standalone ---
cnn_model = create_cnn_model(input_shape, len(unique_labels))
cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])
cnn_eval = cnn_model.evaluate(X_test, y_test, verbose=0)
print("CNN standalone evaluation (loss, accuracy):", cnn_eval)

# --- LSTM standalone ---
lstm_model = create_lstm_model(input_shape, len(unique_labels))
lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
               callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])
lstm_eval = lstm_model.evaluate(X_test, y_test, verbose=0)
print("LSTM standalone evaluation (loss, accuracy):", lstm_eval)

# --- GRU standalone ---
gru_model = create_gru_model(input_shape, len(unique_labels))
gru_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2,
              callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])
gru_eval = gru_model.evaluate(X_test, y_test, verbose=0)
print("GRU standalone evaluation (loss, accuracy):", gru_eval)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - accuracy: 0.0831 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0918 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0798 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0830 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 5/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0864 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 6/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0788 - loss: nan - val_accuracy: 0.1345 - val_loss: nan
Epoch 7/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/s

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 276ms/step - accuracy: 0.5612 - loss: 1.4529 - val_accuracy: 0.6134 - val_loss: 1.3759
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 254ms/step - accuracy: 0.6937 - loss: 1.3706 - val_accuracy: 0.6134 - val_loss: 1.3655
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 261ms/step - accuracy: 0.6646 - loss: 1.3597 - val_accuracy: 0.6134 - val_loss: 1.3556
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 255ms/step - accuracy: 0.7105 - loss: 1.3451 - val_accuracy: 0.6134 - val_loss: 1.3456
Epoch 5/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 263ms/step - accuracy: 0.6795 - loss: 1.3353 - val_accuracy: 0.6134 - val_loss: 1.3360
Epoch 6/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 264ms/step - accuracy: 0.6899 - loss: 1.3233 - val_accuracy: 0.6134 - val_loss: 1.3265
Epoch 7/50
[1m 6/15[0m [32m━━━━━━━━

KeyboardInterrupt: 