In [16]:
from sklearn.model_selection import GroupKFold
import scipy.io as sio
import neurokit2 as nk
import pandas as pd
import numpy as np
import time
import gc # For garbage collection

# Deep Learning Imports for DCCA
import tensorflow as tf
tf.config.run_functions_eagerly(True)
from tensorflow.keras.layers import Layer # Import base Layer class
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Conv1D, BatchNormalization, ReLU,
                                     MaxPooling1D, GlobalAveragePooling1D, Dense,
                                     Dropout, Concatenate)
from tensorflow.keras.optimizers import Adam

In [2]:
# Define path to the dataset
path = "C:\\Users\\ferri\\Downloads\\PoliTO\\Tesi\\DSs\\Emotion-Stress\\DREAMER.mat" # Ensure this path is correct
raw = sio.loadmat(path)

# Parameters for DCCA data
EEG_SAMPLING_RATE = 128 # Hz, specific to DREAMER EEG
EEG_CHANNELS = 14
EEG_SAMPLES_PER_TRIAL = 128 # Corresponds to 1 second of EEG data (128 Hz * 1s)

ECG_SAMPLING_RATE = 256 # Hz for DREAMER ECG
ECG_SAMPLES_PER_TRIAL = 256 # Corresponds to 1 second of ECG data (256 Hz * 1s)
ECG_CHANNELS = 2

In [3]:
def feat_extract_ECG_for_DCCA(raw_data, sampling_rate=ECG_SAMPLING_RATE, fixed_length=ECG_SAMPLES_PER_TRIAL):
    """Extracts ECG time-series data for DCCA."""
    extracted_data_ts = []
    n_participants = 23
    n_videos = 18

    for participant in range(n_participants):
        for video in range(n_videos):
            basl_left = raw_data["DREAMER"][0, 0]["Data"][0, participant]["ECG"][0, 0]\
                                     ["baseline"][0, 0][video, 0][:, 0]
            stim_left = raw_data["DREAMER"][0, 0]["Data"][0, participant]["ECG"][0, 0]\
                                     ["stimuli"][0, 0][video, 0][:, 0]
            basl_right = raw_data["DREAMER"][0, 0]["Data"][0, participant]["ECG"][0, 0]\
                                      ["baseline"][0, 0][video, 0][:, 1]
            stim_right = raw_data["DREAMER"][0, 0]["Data"][0, participant]["ECG"][0, 0]\
                                      ["stimuli"][0, 0][video, 0][:, 1]

            try:
                signals_b_l, _ = nk.ecg_process(basl_left, sampling_rate=sampling_rate)
                signals_s_l, _ = nk.ecg_process(stim_left, sampling_rate=sampling_rate)
                signals_b_r, _ = nk.ecg_process(basl_right, sampling_rate=sampling_rate)
                signals_s_r, _ = nk.ecg_process(stim_right, sampling_rate=sampling_rate)

                ecg_clean_left = signals_s_l["ECG_Clean"].values - np.mean(signals_b_l["ECG_Clean"].values)
                ecg_clean_right = signals_s_r["ECG_Clean"].values - np.mean(signals_b_r["ECG_Clean"].values)
            except Exception as e:
                # print(f"Neurokit processing error for P{participant+1} V{video+1} ECG: {e}. Using zeros.")
                ecg_clean_left = np.zeros(fixed_length if fixed_length else sampling_rate) # fallback
                ecg_clean_right = np.zeros(fixed_length if fixed_length else sampling_rate)

            sample_time_series = np.stack([ecg_clean_left, ecg_clean_right], axis=-1)

            if fixed_length is not None:
                T = sample_time_series.shape[0]
                if T > fixed_length:
                    sample_time_series = sample_time_series[:fixed_length, :]
                elif T < fixed_length:
                    pad_width = fixed_length - T
                    sample_time_series = np.pad(sample_time_series, ((0, pad_width), (0, 0)), mode='constant')
            
            extracted_data_ts.append(sample_time_series)
            
    return np.array(extracted_data_ts)

def feat_extract_EEG_for_DCCA(raw_data, sampling_rate=EEG_SAMPLING_RATE, fixed_length=EEG_SAMPLES_PER_TRIAL):
    """Extracts EEG time-series data for DCCA (shape: n_samples, fixed_length, n_channels)."""
    extracted_data_eeg_ts = []
    n_participants = 23
    n_videos = 18
    n_channels = EEG_CHANNELS

    for participant in range(n_participants):
        for video in range(n_videos):
            channels_data_single_trial = [] 
            for i in range(n_channels):
                basl = raw_data["DREAMER"][0, 0]["Data"][0, participant]["EEG"][0, 0]\
                                     ["baseline"][0, 0][video, 0][:, i]
                stim = raw_data["DREAMER"][0, 0]["Data"][0, participant]["EEG"][0, 0]\
                                     ["stimuli"][0, 0][video, 0][:, i]
                corrected_signal = stim - np.mean(basl) 
                channels_data_single_trial.append(corrected_signal)

            sample_time_series_unpadded = np.stack(channels_data_single_trial, axis=-1)

            current_fixed_length = fixed_length
            T_orig = sample_time_series_unpadded.shape[0]
            
            if T_orig > current_fixed_length:
                sample_time_series = sample_time_series_unpadded[:current_fixed_length, :]
            elif T_orig < current_fixed_length:
                pad_width_time = current_fixed_length - T_orig
                sample_time_series = np.pad(sample_time_series_unpadded, ((0, pad_width_time), (0, 0)), mode='constant')
            else:
                sample_time_series = sample_time_series_unpadded
            
            extracted_data_eeg_ts.append(sample_time_series)

    return np.array(extracted_data_eeg_ts) 


def participant_affective(raw_data):
    a = np.zeros((23, 18, 9), dtype=object)
    for participant in range(0, 23):
        for video in range(0, 18):
            a[participant, video, 0] = (raw_data["DREAMER"][0, 0]["Data"]
                                        [0, participant]["Age"][0][0][0])
            a[participant, video, 1] = (raw_data["DREAMER"][0, 0]["Data"]
                                        [0, participant]["Gender"][0][0][0])
            a[participant, video, 2] = int(participant+1)
            a[participant, video, 3] = int(video+1)
            a[participant, video, 4] = ["Searching for Bobby Fischer", "D.O.A.", "The Hangover", "The Ring", "300", "National Lampoon\'s VanWilder", "Wall-E", "Crash", "My Girl", "The Fly", "Pride and Prejudice", "Modern Times", "Remember the Titans", "Gentlemans Agreement", "Psycho", "The Bourne Identitiy", "The Shawshank Redemption", "The Departed"][video]
            a[participant, video, 5] = ["calmness", "surprise", "amusement", "fear", "excitement", "disgust", "happiness", "anger", "sadness", "disgust", "calmness", "amusement", "happiness", "anger", "fear", "excitement", "sadness", "surprise"][video]
            a[participant, video, 6] = int(raw_data["DREAMER"][0, 0]["Data"] [0, participant]["ScoreValence"] [0, 0][video, 0])
            a[participant, video, 7] = int(raw_data["DREAMER"][0, 0]["Data"] [0, participant]["ScoreArousal"] [0, 0][video, 0])
            a[participant, video, 8] = int(raw_data["DREAMER"][0, 0]["Data"] [0, participant]["ScoreDominance"] [0, 0][video, 0])
    b = pd.DataFrame(a.reshape((23*18, a.shape[2])), columns=["age", "gender", "participant", "video", "video_name", "target_emotion", "valence", "arousal", "dominance"])
    for col in ["age", "participant", "video", "valence", "arousal", "dominance"]: b[col] = b[col].astype(int)
    b["gender"] = b["gender"].astype(str)
    return b

## DCCA Fusion Model Evaluation

In [4]:
# --- Get Labels and Groups ---
df_participant_affective = participant_affective(raw)
data_filtered_df = df_participant_affective.loc[
    (df_participant_affective['target_emotion'] == 'anger') |
    (df_participant_affective['target_emotion'] == 'fear') |
    (df_participant_affective['target_emotion'] == 'calmness')
].copy()

idx_filter = data_filtered_df.index
y_labels = data_filtered_df['target_emotion'].map({'anger': 1, 'fear': 1, 'calmness': 0}).values
groups_labels = data_filtered_df['participant'].values

# --- Data Prep for DCCA Fusion Model ---
print("\nExtracting data for DCCA Fusion Model...")

# EEG Data for DCCA
X_eeg_unfiltered = feat_extract_EEG_for_DCCA(raw, fixed_length=EEG_SAMPLES_PER_TRIAL)
X_eeg_for_dcca = X_eeg_unfiltered[idx_filter] 
if np.isnan(X_eeg_for_dcca).any():
    print("Warning: NaNs found in X_eeg_for_dcca. Replacing with 0.")
    X_eeg_for_dcca = np.nan_to_num(X_eeg_for_dcca, nan=0.0)
print(f"Shape of X_eeg_for_dcca: {X_eeg_for_dcca.shape}")

# ECG Data for DCCA
X_ecg_unfiltered = feat_extract_ECG_for_DCCA(raw, fixed_length=ECG_SAMPLES_PER_TRIAL)
X_ecg_for_dcca = X_ecg_unfiltered[idx_filter] 
if np.isnan(X_ecg_for_dcca).any():
    print("Warning: NaNs found in X_ecg_for_dcca. Replacing with 0.")
    X_ecg_for_dcca = np.nan_to_num(X_ecg_for_dcca, nan=0.0)
print(f"Shape of X_ecg_for_dcca: {X_ecg_for_dcca.shape}")

X_dcca_inputs = [X_eeg_for_dcca, X_ecg_for_dcca]


Extracting data for DCCA Fusion Model...
Shape of X_eeg_for_dcca: (138, 128, 14)
Shape of X_ecg_for_dcca: (138, 256, 2)


In [23]:
# --- DCCA Loss Layer Definition ---
class DccaLossLayer(Layer):
    def __init__(self, dcca_weight=0.5, epsilon=1e-9, name="dcca_loss_layer", **kwargs):
        super().__init__(name=name, **kwargs)
        self.dcca_weight = dcca_weight
        self.epsilon = epsilon

    def call(self, inputs):
        o1, o2 = inputs

        N = tf.cast(tf.shape(o1)[0], tf.float32)

        def compute_loss():
            o1_centered = o1 - tf.reduce_mean(o1, axis=0, keepdims=True)
            o2_centered = o2 - tf.reduce_mean(o2, axis=0, keepdims=True)
            denominator = (N - 1.0) + self.epsilon # Ensure N-1.0 for sample covariance
            cross_cov = tf.matmul(o1_centered, o2_centered, transpose_a=True) / denominator
            singular_values = tf.linalg.svd(cross_cov, compute_uv=False)
            loss = -tf.reduce_sum(singular_values)
            return loss

        raw_loss = tf.cond(tf.greater(N, 1.0), # Use tf.greater for explicit boolean tensor
                           true_fn=compute_loss,
                           false_fn=lambda: tf.constant(0.0, dtype=tf.float32))
        
        weighted_loss = self.dcca_weight * raw_loss
        self.add_loss(weighted_loss)
        
        # Return one of the inputs, making it behave like an identity layer for data flow.
        # This avoids Keras treating a scalar output specially in a way that might cause issues.
        return o1 # Or inputs (the list [o1, o2]), or any other valid tensor from inputs.

    def compute_output_shape(self, input_shape):
        # If returning o1, the output shape is the shape of o1.
        # input_shape is a list of two shapes: [shape_o1, shape_o2]
        return input_shape[0]

    def get_config(self):
        config = super().get_config()
        config.update({
            "dcca_weight": self.dcca_weight,
            "epsilon": self.epsilon,
        })
        return config

# --- DCCA Model Definition ---
def create_dcca_fusion_model(input_shape_eeg, input_shape_ecg, projection_dim=32, num_classes=1, dcca_weight=0.5):
    input_eeg = Input(shape=input_shape_eeg, name='eeg_input')
    input_ecg = Input(shape=input_shape_ecg, name='ecg_input')

    x_eeg = Conv1D(filters=16, kernel_size=7, padding='same', activation='relu')(input_eeg)
    x_eeg = BatchNormalization()(x_eeg); x_eeg = MaxPooling1D(pool_size=2)(x_eeg)
    x_eeg = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(x_eeg)
    x_eeg = BatchNormalization()(x_eeg); x_eeg = MaxPooling1D(pool_size=2)(x_eeg)
    eeg_pooled_features = GlobalAveragePooling1D(name='eeg_pooled_features')(x_eeg)

    x_ecg = Conv1D(filters=16, kernel_size=7, padding='same', activation='relu')(input_ecg)
    x_ecg = BatchNormalization()(x_ecg); x_ecg = MaxPooling1D(pool_size=2)(x_ecg)
    x_ecg = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(x_ecg)
    x_ecg = BatchNormalization()(x_ecg); x_ecg = MaxPooling1D(pool_size=2)(x_ecg)
    ecg_pooled_features = GlobalAveragePooling1D(name='ecg_pooled_features')(x_ecg)

    eeg_projected = Dense(projection_dim, activation=None, name='eeg_projection')(eeg_pooled_features)
    ecg_projected = Dense(projection_dim, activation=None, name='ecg_projection')(ecg_pooled_features)

    # DccaLossLayer adds loss as a side effect. Its direct output is now eeg_projected (or o1).
    # This output is not used further in this specific branch, which is fine.
    _ = DccaLossLayer(dcca_weight=dcca_weight, name='dcca_loss_calculator')([eeg_projected, ecg_projected])

    combined_features = Concatenate()([eeg_pooled_features, ecg_pooled_features])
    x_clf = Dense(64, activation='relu')(combined_features)
    x_clf = Dropout(0.5)(x_clf)
    output_classification = Dense(num_classes, activation='sigmoid' if num_classes == 1 else 'softmax', name='classification_output')(x_clf)

    model = Model(inputs=[input_eeg, input_ecg], outputs=output_classification)
    return model

# --- Training and Evaluation Loop for DL Models ---
def run_dl_model_cv(model_fn, X_data_list, y_data, groups_data, model_name,
                    n_splits=5, epochs=25, batch_size=16, compile_kwargs=None, model_kwargs=None):
    print(f"\n--- Evaluating: {model_name} ---")
    cv_dl = GroupKFold(n_splits=n_splits)
    scores, runtimes = [], []

    if model_kwargs is None:
        model_kwargs = {}
    X_for_split = X_data_list[0]

    for fold, (train_idx, test_idx) in enumerate(cv_dl.split(X_for_split, y_data, groups_data)):
        print(f"{model_name} - Fold {fold+1}/{n_splits}...")
        X_train = [x_mod[train_idx] for x_mod in X_data_list]
        X_test  = [x_mod[test_idx]  for x_mod in X_data_list]
        y_train, y_test = y_data[train_idx], y_data[test_idx]

        # clear any previous TF graph & free memory
        tf.keras.backend.clear_session()
        gc.collect()

        # instantiate a fresh model
        model = model_fn(**model_kwargs)

        # compile: either clone your optimizer or fall back to a new Adam each fold
        if compile_kwargs:
            # shallow-copy so we don't mutate the user's dict
            ck = compile_kwargs.copy()

            if 'optimizer' in ck:
                opt = ck.pop('optimizer')
                # re-create a fresh optimizer instance from its config
                ck['optimizer'] = type(opt).from_config(opt.get_config())

            model.compile(**ck)
        elif not model._is_compiled:
            model.compile(
                optimizer=Adam(learning_rate=0.001),
                loss='binary_crossentropy',
                metrics=['accuracy'],
                run_eagerly=True
            )

        # fit + time it
        start_time = time.time()
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=0
        )
        runtime = time.time() - start_time

        # evaluate
        loss_val, accuracy = model.evaluate(X_test, y_test, verbose=0)
        scores.append(accuracy)
        runtimes.append(runtime)

        best_val_acc_fold = max(history.history.get('val_accuracy', [0]))
        print(f"Fold {fold+1}: Acc={accuracy:.4f}, Time={runtime:.2f}s. "
              f"Best val_acc: {best_val_acc_fold:.4f}")

    # summarize
    mean_acc = np.mean(scores) if scores else np.nan
    std_acc  = np.std(scores)  if scores else np.nan
    mean_rt  = np.mean(runtimes) if runtimes else np.nan

    print(f"{model_name} CV Results: Mean Acc = {mean_acc:.4f} "
          f"+/- {std_acc:.4f}, Mean Runtime = {mean_rt:.2f}s")

    return [model_name, mean_acc, std_acc, mean_rt]


In [24]:
# --- Run DCCA Fusion Model ---
dl_results = []
N_SPLITS_DL = 3
EPOCHS_DL = 25
BATCH_SIZE_DL = 16

dcca_model_kwargs = {
    'input_shape_eeg': (EEG_SAMPLES_PER_TRIAL, EEG_CHANNELS),
    'input_shape_ecg': (ECG_SAMPLES_PER_TRIAL, ECG_CHANNELS),
    'projection_dim': 32,
    'num_classes': 1,
    'dcca_weight': 0.3
}

compile_kwargs = {
    'optimizer': Adam(learning_rate=1e-3),
    'loss': 'binary_crossentropy',
    'metrics': ['accuracy'],
    'run_eagerly': True
}

dl_results.append(run_dl_model_cv(
    create_dcca_fusion_model,
    X_dcca_inputs,
    y_labels,
    groups_labels,
    "DCCA_Fusion_EEG_ECG",
    n_splits=N_SPLITS_DL,
    epochs=EPOCHS_DL,
    batch_size=BATCH_SIZE_DL,
    model_kwargs=dcca_model_kwargs,
    compile_kwargs=compile_kwargs
))



--- Evaluating: DCCA_Fusion_EEG_ECG ---
DCCA_Fusion_EEG_ECG - Fold 1/3...




Fold 1: Acc=0.6250, Time=16.78s. Best val_acc: 0.6667
DCCA_Fusion_EEG_ECG - Fold 2/3...
Fold 2: Acc=0.6458, Time=16.55s. Best val_acc: 0.7083
DCCA_Fusion_EEG_ECG - Fold 3/3...
Fold 3: Acc=0.5238, Time=17.34s. Best val_acc: 0.6905
DCCA_Fusion_EEG_ECG CV Results: Mean Acc = 0.5982 +/- 0.0533, Mean Runtime = 16.89s


In [25]:
# --- Consolidate and Save Results ---
results_dcca_df = pd.DataFrame(dl_results, columns=['name', 'mean_score', 'std_score', 'mean_runtime'])
print("\nDCCA Fusion Model CV Results:")
print(results_dcca_df)

results_dcca_df.to_csv('dcca_fusion_model_results.csv', index=False)

print("\n--- Script Finished ---")


DCCA Fusion Model CV Results:
                  name  mean_score  std_score  mean_runtime
0  DCCA_Fusion_EEG_ECG    0.598214   0.053295     16.891401

--- Script Finished ---
