## Imports

In [1]:
import os
import numpy as np
import gc
import scipy.io
import scipy.signal as sgl
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from scipy.stats import skew, kurtosis
from scipy.signal import welch
import neurokit2 as nk

## Data Loading & Preprocessing

In [2]:

# --- Data Loading Function ---
def load_patient_preprocessed_data(patient_number):
    base_dir = r"C:\Users\ferri\Downloads\PoliTO\Tesi\DSs\Emotion-Stress\AMIGOS"
    file_path = os.path.join(
        base_dir, "Data preprocessed",
        f"Data_Preprocessed_P{patient_number:02d}",
        f"Data_Preprocessed_P{patient_number:02d}.mat"
    )
    data = scipy.io.loadmat(file_path)
    return data

# --- Preprocessing functions for pipeline ---
def process_trial_signal(signal, target_length=None, fs=512):
    """
    Convert a trial's raw signal into a 2D array [channels, time].
    If target_length is None, we keep the full length (then you'll pad later).
    Preprocessing steps (downsampling, filtering, baseline removal) are applied.
    """
    # Convert to float32.
    signal = np.array(signal, dtype=np.float32)
    
    # Check if signal is empty.
    if signal.size == 0:
        return np.empty((0,0), dtype=np.float32)
    
    # If the signal is 1D, reshape to (1, length).
    if signal.ndim == 1:
        signal = signal[None, :]
    
    # Downsampling parameters.
    N = 4
    lowcut, highcut = 1.0, 45.0
    desired_fs = 128
    down_factor = fs // desired_fs
    
    # 1) Downsample each channel.
    downsampled = []
    for ch_data in signal:
        # If a channel is empty, skip it.
        if ch_data.size == 0:
            continue
        ch_data_down = ch_data[::down_factor]
        downsampled.append(ch_data_down)
        
    # If no channels had data, return an empty array.
    if not downsampled:
        return np.empty((0,0), dtype=np.float32)
        
    # Stack downsampled channels.
    signal = np.vstack([ch[None, :] for ch in downsampled])
    
    # 2) Bandpass filter design.
    nyquist = 0.5 * desired_fs
    b, a = sgl.butter(N=4, Wn=[lowcut/nyquist, highcut/nyquist], btype='band')
    
    # Calculate the minimum length required by filtfilt.
    min_len = 3 * (max(len(a), len(b)) - 1)
    
    # Filter each channel; if too short, skip filtering.
    filtered = []
    for ch_data in signal:
        if len(ch_data) < min_len:
            ch_data_filt = ch_data  # Fallback: leave unfiltered.
        else:
            ch_data_filt = sgl.filtfilt(b, a, ch_data)
        filtered.append(ch_data_filt)
    signal = np.vstack([ch[None, :] for ch in filtered])
    
    # 3) Baseline removal (subtract mean from each channel).
    baseline_removed = []
    for ch_data in signal:
        ch_data_bs = ch_data - np.mean(ch_data)
        baseline_removed.append(ch_data_bs)
    signal = np.vstack([ch[None, :] for ch in baseline_removed])
    
    # 4) Padding/Truncation if target_length is provided.
    if target_length is not None:
        processed = []
        for ch_data in signal:
            ch_len = len(ch_data)
            if ch_len == 0:
                proc = np.zeros(target_length, dtype=np.float32)
            elif ch_len < target_length:
                pad_width = target_length - ch_len
                proc = np.pad(ch_data, (0, pad_width), mode='edge')
            else:
                proc = ch_data[:target_length]
            processed.append(proc.astype(np.float32))
        signal = np.vstack([p[None, :] for p in processed])
    
    return signal.astype(np.float32)

def split_into_modalities(signal):
    # If the signal is 1D, assume it represents a single modality (e.g., ECG).
    if signal.ndim == 1:
        return {"ecg": signal}
    else:
        # If multi-channel, split into ECG, GSR, and EEG as desired.
        ecg = signal[0, :]
        gsr = signal[1, :]
        eeg = signal[2, :] 
        return {"ecg": ecg, "gsr": gsr, "eeg": eeg}

def discretize_label(label):
    """
    Convert a label [valence, arousal] into a descriptive class.
    If the flattened label has 2 elements, use them directly.
    If it has 3 or more, use the second and third elements.
    """
    flat_label = np.array(label).flatten()  # Ensure label is 1D.
    if flat_label.size == 2:
        valence, arousal = flat_label
    elif flat_label.size >= 3:
        valence, arousal = flat_label[1], flat_label[2]
    else:
        return "Unknown"
    
    if valence < 0 and arousal < 0:
        return "Low valence, Low arousal"
    elif valence < 0 and arousal >= 0:
        return "Low valence, High arousal"
    elif valence >= 0 and arousal < 0:
        return "High valence, Low arousal"
    else:
        return "High valence, High arousal"

# --- Feature Extraction Functions ---
def extract_features(signals, fs=128):
    """
    Extract features for multiple signals (ECG, GSR, EEG) from a dictionary.
    If advanced processing (e.g., HRV from ECG) fails, falls back to basic statistics.
    """
    feat_list = []

    # ---------- ECG Features ----------
    if 'ecg' in signals:
        ecg_signal = np.array(signals['ecg']).flatten()
        if len(ecg_signal) <= 18:
            ecg_feats = [0.0] * 10  # Not enough data for advanced features.
        else:
            try:
                ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=fs)
                _, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=fs)
                # Check if any R-peaks were detected
                if len(rpeaks.get("ECG_R_Peaks", [])) == 0:
                    raise ValueError("No R-peaks detected.")
                hrv = nk.hrv(rpeaks, sampling_rate=fs, show=False)
                feature_names = [
                    "RMSSD", "SDNN", "pNN50", "pNN20",
                    "LF/HF", "HF", "LF", "VLF", "HRV_TI", "SDSD"
                ]
                ecg_feats = []
                for name in feature_names:
                    if name in hrv.columns and not np.isnan(hrv[name].values[0]):
                        ecg_feats.append(hrv[name].values[0])
                    else:
                        ecg_feats.append(0.0)
            except Exception as e:
                # Fallback: Compute basic statistics if advanced features fail.
                basic_stats = [
                    np.mean(ecg_signal),
                    np.std(ecg_signal),
                    np.min(ecg_signal),
                    np.max(ecg_signal),
                    np.median(ecg_signal)
                ]
                # Pad to reach length 10.
                ecg_feats = basic_stats + [0.0] * (10 - len(basic_stats))
        feat_list.append(np.array(ecg_feats))

    # ---------- GSR Features ----------
    if 'gsr' in signals:
        gsr_signal = np.array(signals['gsr']).flatten()
        if len(gsr_signal) > 2:
            try:
                eda_cleaned = nk.eda_clean(gsr_signal, sampling_rate=fs)
                eda_peaks, _ = nk.eda_peaks(eda_cleaned, sampling_rate=fs)
                num_scr_peaks = eda_peaks.get("SCR_Peaks", np.array([0])).sum()
            except Exception:
                num_scr_peaks = 0.0
            gsr_feats = [
                np.mean(gsr_signal),
                np.std(gsr_signal),
                np.min(gsr_signal),
                np.max(gsr_signal),
                kurtosis(gsr_signal),
                skew(gsr_signal),
                num_scr_peaks
            ]
        else:
            gsr_feats = [0.0] * 7
        feat_list.append(np.array(gsr_feats))

    # ---------- EEG Features ----------
    if 'eeg' in signals:
        eeg_data = np.array(signals['eeg'])
        if eeg_data.ndim == 1:
            eeg_data = eeg_data[None, :]  # Ensure 2D shape.
        all_channels_feats = []
        for ch in range(eeg_data.shape[0]):
            channel_signal = eeg_data[ch, :]
            if len(channel_signal) < 2:
                ch_feats = [0.0] * 6
            else:
                activity = np.var(channel_signal)
                mobility = np.std(np.diff(channel_signal)) / (np.std(channel_signal) + 1e-8)
                diff_signal = np.diff(channel_signal)
                complexity = (np.std(np.diff(diff_signal)) / (np.std(diff_signal) + 1e-8)) / (mobility + 1e-8)
                freqs, psd = welch(channel_signal, fs=fs, nperseg=min(256, len(channel_signal)))
                def bandpower(f, pxx, fmin, fmax):
                    idx = np.logical_and(f >= fmin, f <= fmax)
                    # Use trapezoid integration as recommended.
                    return np.trapezoid(pxx[idx], x=f[idx])
                alpha = bandpower(freqs, psd, 8, 14)
                beta  = bandpower(freqs, psd, 14, 30)
                gamma = bandpower(freqs, psd, 30, 50)
                ch_feats = [activity, mobility, complexity, alpha, beta, gamma]
            all_channels_feats.append(ch_feats)
        # Average across channels.
        eeg_feats = np.mean(all_channels_feats, axis=0)
        feat_list.append(eeg_feats)

    # ---------- Combine all features ----------
    if len(feat_list) == 0:
        return np.zeros(10)
    return np.concatenate(feat_list)

def build_dataset(joined_data, labels_array, target_length=None):
    X_list = []
    y_list = []
    n_trials = joined_data.shape[1]

    for i in range(n_trials):
        trial_data = joined_data[0, i]
        signal = process_trial_signal(trial_data, target_length)
        if signal.size == 0 or signal.shape[0] == 0:
            print(f"Warning: Trial {i} has an empty signal. Skipping trial.")
            continue

        signals_dict = split_into_modalities(signal)
        feats = extract_features(signals_dict, fs=128)
        
        lbl = np.array(labels_array[0, i]).squeeze()
        if lbl.size < 3:
            print(f"Warning: Trial {i} does not have enough label data. Skipping trial.")
            continue
        selected_label = lbl[1:3]  # use only the second and third columns
        discrete_label = discretize_label(selected_label)

        X_list.append(feats)
        y_list.append(discrete_label)

    if len(X_list) == 0:
        return None, None

    X_array = np.vstack(X_list)
    y_array = np.array(y_list)
    return X_array, y_array

# Load raw signals for CNN/LSTM/GRU
def load_all_patients_raw_signal(num_patients=40, target_length=None):
    X_list, y_list = [], []
    for patient in range(1, num_patients + 1):
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']
        
        X_patient, y_patient = build_dataset(joined_data, labels_array, target_length=target_length)
        
        if X_patient is not None:
            X_list.append(X_patient)
            y_list.append(y_patient)

    if not X_list:
        raise ValueError("No data loaded!")

    X_all = np.vstack(X_list)
    y_all = np.concatenate(y_list)

    return X_all, y_all

def pad_trials(trials, pad_mode='constant', constant_values=0):
    """
    Given a list of 2D arrays (each with shape (channels, time)), pad them so that all have the same shape.
    Both channel and time dimensions are padded using constant values.
    """
    # Determine maximum dimensions among all trials.
    max_channels = max(trial.shape[0] for trial in trials)
    max_time = max(trial.shape[1] for trial in trials)
    
    padded_trials = []
    for trial in trials:
        ch, t = trial.shape
        # Pad channels if needed.
        if ch < max_channels:
            trial = np.pad(trial, ((0, max_channels - ch), (0, 0)), mode=pad_mode, constant_values=constant_values)
        # Pad time dimension if needed.
        if t < max_time:
            trial = np.pad(trial, ((0, 0), (0, max_time - t)), mode=pad_mode, constant_values=constant_values)
        elif t > max_time:
            trial = trial[:, :max_time]
        padded_trials.append(trial)
    return np.stack(padded_trials, axis=0)

def get_trial_lengths(num_patients=40):
    patient_lengths = {}
    for patient in range(1, num_patients + 1):
        print(f"\nPatient {patient}:")
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        lengths = []
        n_trials = joined_data.shape[1]
        for i in range(n_trials):
            trial_data = joined_data[0, i]
            # Process without fixed target_length so we keep original lengths.
            signal = process_trial_signal(trial_data, target_length=None)
            if signal.size == 0 or signal.shape[1] == 0:
                print(f"  Trial {i}: empty signal")
                continue
            # signal is a 2D array: [channels, time]
            trial_length = signal.shape[1]
            lengths.append(trial_length)
            print(f"  Trial {i}: length = {trial_length}")
        patient_lengths[patient] = lengths
    return patient_lengths

def load_all_patients_raw_signal_deep_chunked(num_patients=40, target_length=None):
    all_X = []
    all_y = []
    for patient in range(1, num_patients+1):
        print(f"Processing patient {patient}...")
        data = load_patient_preprocessed_data(patient)
        joined_data = data['joined_data']
        labels_array = data['labels_ext_annotation']
        n_trials = joined_data.shape[1]
        for i in range(n_trials):
            # Process label first.
            lbl = np.array(labels_array[0, i]).squeeze()
            if lbl.size == 0:
                print(f"Warning: Patient {patient} Trial {i} has empty label. Skipping trial.")
                continue
            if lbl.ndim == 2:
                lbl_processed = np.mean(lbl, axis=0)
            elif lbl.ndim == 1:
                lbl_processed = lbl
            else:
                lbl_processed = lbl.flatten()[0]
            discrete_label = discretize_label(lbl_processed)
            
            trial_data = joined_data[0, i]
            # Process trial signal to a 2D array with fixed target_length.
            signal = process_trial_signal(trial_data, target_length).astype(np.float32)
            all_X.append(signal)
            all_y.append(discrete_label)
        
        del data, joined_data, labels_array
        gc.collect()
    
    if len(all_X) == 0:
        raise ValueError("No patient data loaded.")
    
    if target_length is None:
        X_all = pad_trials(all_X, pad_mode='constant', constant_values=0)
    else:
        # Pad the channel dimension so that all trials have the same number of channels.
        max_channels = max(trial.shape[0] for trial in all_X)
        padded_trials = []
        for trial in all_X:
            channels_to_pad = max_channels - trial.shape[0]
            if channels_to_pad > 0:
                trial = np.pad(trial, ((0, channels_to_pad), (0, 0)), mode='constant', constant_values=0)
            padded_trials.append(trial)
        X_all = np.stack(padded_trials, axis=0)
    
    y_all = np.array(all_y)
    return X_all, y_all

## Load data from patients

In [3]:
# Get and print trial lengths for each patient.
# trial_lengths = get_trial_lengths(num_patients=40)

# For deep learning we want a fixed target length.
target_length = 5 

# Note: load_all_patients_raw_signal_deep_chunked loops over patient numbers starting at 1.
X_raw, y_raw = load_all_patients_raw_signal_deep_chunked(num_patients=40, target_length=target_length)
# X_raw has shape (n_trials, channels, target_length)

# For deep models (Conv1D), we interpret the time dimension as the sequence length.
# Rearrange the input to shape (n_trials, target_length, channels)
X_dl = np.transpose(X_raw, (0, 2, 1))

# Encode string labels into integers and then one-hot vectors.
unique_labels = np.unique(y_raw)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
y_int = np.array([label_to_int[label] for label in y_raw])
y_cat = to_categorical(y_int)

# Split into training and test sets.
X_train, X_test, y_train, y_test = train_test_split(
    X_dl, y_cat, test_size=0.2, random_state=42, stratify=y_cat
)

# Get input shape details.
timesteps = X_train.shape[1]   # target_length
num_channels = X_train.shape[2]
num_classes = y_cat.shape[1]

print("Input shape:", X_train.shape)
print("Number of classes:", num_classes)

Processing patient 1...
Processing patient 2...
Processing patient 3...
Processing patient 4...
Processing patient 5...
Processing patient 6...
Processing patient 7...
Processing patient 8...
Processing patient 9...
Processing patient 10...
Processing patient 11...
Processing patient 12...
Processing patient 13...
Processing patient 14...
Processing patient 15...
Processing patient 16...
Processing patient 17...
Processing patient 18...
Processing patient 19...
Processing patient 20...
Processing patient 21...
Processing patient 22...
Processing patient 23...
Processing patient 24...
Processing patient 25...
Processing patient 26...
Processing patient 27...
Processing patient 28...
Processing patient 29...
Processing patient 30...
Processing patient 31...
Processing patient 32...
Processing patient 33...
Processing patient 34...
Processing patient 35...
Processing patient 36...
Processing patient 37...
Processing patient 38...
Processing patient 39...
Processing patient 40...
Input sha

## Model 1: Pure CNN

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model_cnn = Sequential([
    # First convolutional block
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(timesteps, num_channels), padding='same'),
    BatchNormalization(),
    Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.25),
    
    # Second convolutional block
    Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.25),
    
    # Third convolutional block
    Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    # Replace MaxPooling1D with GlobalMaxPooling1D to avoid reducing the dimension below 1
    GlobalMaxPooling1D(),
    Dropout(0.25),
    
    # Classification block
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()

# Callbacks: Early stopping and learning rate reduction
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Train the model
history_cnn = model_cnn.fit(
    X_train, y_train, 
    epochs=30, 
    batch_size=16, 
    validation_split=0.2, 
    callbacks=[early_stop, reduce_lr]
)

# Evaluate the model on the test set
loss_cnn, acc_cnn = model_cnn.evaluate(X_test, y_test)
print("CNN Test Accuracy:", acc_cnn)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 227ms/step - accuracy: 0.6294 - loss: 1.4633 - val_accuracy: 0.6555 - val_loss: 1.2263 - learning_rate: 0.0010
Epoch 2/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 213ms/step - accuracy: 0.6546 - loss: 1.1847 - val_accuracy: 0.6555 - val_loss: 1.0833 - learning_rate: 0.0010
Epoch 3/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 212ms/step - accuracy: 0.6773 - loss: 1.0384 - val_accuracy: 0.6555 - val_loss: 0.9935 - learning_rate: 0.0010
Epoch 4/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 203ms/step - accuracy: 0.6914 - loss: 0.9383 - val_accuracy: 0.6555 - val_loss: 0.9652 - learning_rate: 0.0010
Epoch 5/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 205ms/step - accuracy: 0.6947 - loss: 0.8872 - val_accuracy: 0.6555 - val_loss: 0.9566 - learning_rate: 0.0010
Epoch 6/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6

## Model 2: RNN with LSTM

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Build an improved RNN model with stacked bidirectional LSTMs
model_rnn = Sequential([
    # First Bidirectional LSTM layer returns sequences
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(timesteps, num_channels)),
    BatchNormalization(),
    Dropout(0.3),
    
    # Second LSTM layer (can be unidirectional now)
    LSTM(64, return_sequences=False),
    BatchNormalization(),
    Dropout(0.3),
    
    # Fully connected classification block
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_rnn.summary()

# Callbacks: Early stopping and learning rate reduction
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train the model
history_rnn = model_rnn.fit(
    X_train, y_train, 
    epochs=30, 
    batch_size=16,
    validation_split=0.2, 
    callbacks=[early_stop, reduce_lr]
)

# Evaluate the model on the test set
loss_rnn, acc_rnn = model_rnn.evaluate(X_test, y_test)
print("RNN Test Accuracy:", acc_rnn)

Epoch 1/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 914ms/step - accuracy: 0.4557 - loss: 1.8267 - val_accuracy: 0.6555 - val_loss: 1.2343 - learning_rate: 0.0010
Epoch 2/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 875ms/step - accuracy: 0.6715 - loss: 1.1905 - val_accuracy: 0.6555 - val_loss: 1.1157 - learning_rate: 0.0010
Epoch 3/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 872ms/step - accuracy: 0.6804 - loss: 1.0618 - val_accuracy: 0.6555 - val_loss: 1.0308 - learning_rate: 0.0010
Epoch 4/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 866ms/step - accuracy: 0.6627 - loss: 1.0067 - val_accuracy: 0.6555 - val_loss: 0.9864 - learning_rate: 0.0010
Epoch 5/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 867ms/step - accuracy: 0.6525 - loss: 0.9788 - val_accuracy: 0.6555 - val_loss: 0.9657 - learning_rate: 0.0010
Epoch 6/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

## Model 3: RNN with GRU

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Build an improved GRU-based RNN model
model_rnn_gru = Sequential([
    # First Bidirectional GRU layer to capture both forward and backward temporal patterns.
    Bidirectional(GRU(64, return_sequences=True), input_shape=(timesteps, num_channels)),
    BatchNormalization(),
    Dropout(0.3),
    
    # Second GRU layer for further temporal feature extraction.
    GRU(64, return_sequences=False),
    BatchNormalization(),
    Dropout(0.3),
    
    # Fully connected block for classification.
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model_rnn_gru.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_rnn_gru.summary()

# Define callbacks: Early stopping and learning rate reduction
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train the model
history_rnn_gru = model_rnn_gru.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.2,
    callbacks=[early_stop, reduce_lr]
)

# Evaluate the model on the test set
loss_gru, acc_gru = model_rnn_gru.evaluate(X_test, y_test)
print("RNN+GRU Test Accuracy:", acc_gru)

Epoch 1/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 730ms/step - accuracy: 0.4826 - loss: 1.8101 - val_accuracy: 0.6555 - val_loss: 1.2291 - learning_rate: 0.0010
Epoch 2/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 689ms/step - accuracy: 0.7083 - loss: 1.1875 - val_accuracy: 0.6555 - val_loss: 1.1181 - learning_rate: 0.0010
Epoch 3/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 682ms/step - accuracy: 0.6724 - loss: 1.0950 - val_accuracy: 0.6555 - val_loss: 1.0382 - learning_rate: 0.0010
Epoch 4/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 678ms/step - accuracy: 0.6832 - loss: 0.9883 - val_accuracy: 0.6555 - val_loss: 0.9888 - learning_rate: 0.0010
Epoch 5/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 680ms/step - accuracy: 0.7164 - loss: 0.9117 - val_accuracy: 0.6555 - val_loss: 0.9685 - learning_rate: 0.0010
Epoch 6/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 