In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, BatchNormalization, MaxPooling1D, Flatten, Dense, Input, LSTM, Dropout
from tensorflow.keras.models import Model
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import seaborn as sns
from scipy.signal import welch
from scipy.stats import entropy, skew, kurtosis
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import LabelBinarizer

2024-12-14 15:22:31.579323: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-14 15:22:31.579430: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-14 15:22:31.718755: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:

def read_data(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    return data

files = [f"s{n:02d}" for n in range(1, 33)]

labels, data = [], []

for file in files:
    filepath = f"/kaggle/input/deap-dataset/data_preprocessed_python/{file}.dat"
    d = read_data(filepath)
    labels.append(d['labels'])
    data.append(d['data'])

labels = np.array(labels)
data = np.array(data)

print("Labels shape: ", labels.shape)
print("Data shape: ", data.shape)

eeg_data = data[:, :, :32, -7680:]
print("EEG data shape: ", eeg_data.shape)

Labels shape:  (32, 40, 4)
Data shape:  (32, 40, 40, 8064)
EEG data shape:  (32, 40, 32, 7680)


In [4]:
def extract_psd_feature(data):
    psd_features = []
    for channel_data in data:
        f, psd = welch(channel_data, fs=128)
        psd_features.append(psd)
    return np.array(psd_features)

def extract_entropy_feature(data):
    entropy_features = []
    for channel_data in data:
        prob_dist, _ = np.histogram(channel_data, bins=128, density=True)
        prob_dist = prob_dist / np.sum(prob_dist)
        entropy_features.append(entropy(prob_dist))
    return np.array(entropy_features)

def extract_sd_feature(data):
    return np.std(data, axis=1)

def extract_mean_feature(data):
    return np.mean(data, axis=1)
def extract_variance_feature(data):
    return np.var(data, axis=1)

def extract_skewness_feature(data):
    return skew(data, axis=1)

def extract_kurtosis_feature(data):
    return kurtosis(data, axis=1)

def preprocess_data(eeg_data, labels, feature_type):
    data_features, data_labels, data_labels_valence = [], [], []
    
    for subject in range(eeg_data.shape[0]):
        for trial in range(eeg_data.shape[1]):
            trial_data = eeg_data[subject, trial, :, :]
            if feature_type == 'psd':
                features = extract_psd_feature(trial_data)
            elif feature_type == 'entropy':
                features = extract_entropy_feature(trial_data)
            elif feature_type == 'sd':
                features = extract_sd_feature(trial_data)
            elif feature_type == 'mean':
                features = extract_mean_feature(trial_data)
            elif feature_type == 'variance':
                features = extract_variance_feature(trial_data)
            elif feature_type == 'skewness':
                features = extract_skewness_feature(trial_data)
            elif feature_type == 'kurtosis':
                features = extract_kurtosis_feature(trial_data)
            else:
                raise ValueError("Unknown feature type")
            
            data_features.append(features)
            arousal_label = labels[subject, trial, 0]
            data_labels.append(1 if arousal_label > 5 else 0)
            valence_label = labels[subject, trial, 1]
            data_labels_valence.append(1 if valence_label > 5 else 0)
    
    return np.array(data_features), np.array(data_labels), np.array(data_labels_valence)

In [5]:
feature_types = ['psd', 'entropy', 'sd', 'mean', 'variance', 'skewness', 'kurtosis']

all_features = {}
for feature_type in feature_types:
    features, labels_processed, labels_processed_valence = preprocess_data(eeg_data, labels, feature_type)
    all_features[feature_type] = (features, labels_processed, labels_processed_valence)
    print(f"{feature_type.capitalize()} Features shape: ", features.shape)
    print(f"{feature_type.capitalize()} Arousal Labels shape: ", labels_processed.shape)
    print(f"{feature_type.capitalize()} Valence Labels shape: ", labels_processed_valence.shape)


Psd Features shape:  (1280, 32, 129)
Psd Arousal Labels shape:  (1280,)
Psd Valence Labels shape:  (1280,)
Entropy Features shape:  (1280, 32)
Entropy Arousal Labels shape:  (1280,)
Entropy Valence Labels shape:  (1280,)
Sd Features shape:  (1280, 32)
Sd Arousal Labels shape:  (1280,)
Sd Valence Labels shape:  (1280,)
Mean Features shape:  (1280, 32)
Mean Arousal Labels shape:  (1280,)
Mean Valence Labels shape:  (1280,)
Variance Features shape:  (1280, 32)
Variance Arousal Labels shape:  (1280,)
Variance Valence Labels shape:  (1280,)
Skewness Features shape:  (1280, 32)
Skewness Arousal Labels shape:  (1280,)
Skewness Valence Labels shape:  (1280,)
Kurtosis Features shape:  (1280, 32)
Kurtosis Arousal Labels shape:  (1280,)
Kurtosis Valence Labels shape:  (1280,)


In [6]:
# CNN + LSTM
def create_cnn_lstm_model(input_shape):
    inputs = Input(shape=input_shape)
    
    # 1st Conv Block
    x = Conv1D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    
    # 2nd Conv Block
    x = Conv1D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    
    # 3rd Conv Block 
    x = Conv1D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    
    # LSTM
    x = LSTM(256, return_sequences=False)(x)
    
    x = Dense(units=512, activation='relu')(x)
    x = Dropout(0.5)(x)  
    x = Dense(units=256, activation='relu')(x)
    
    outputs = Dense(units=1, activation='sigmoid')(x)
    
    
    model = Model(inputs, outputs)
    return model

feature_types = ['psd']

In [7]:

X_all = []
y_all = []

for feature_type in feature_types:
    features, arousal_labels, _ = all_features[feature_type]
    
    for channel in range(features.shape[2]):
        channel_data = features[:, :, channel]  
        X_all.append(channel_data)
        y_all.append(arousal_labels)

X_all = np.concatenate(X_all, axis=0)  
y_all = np.concatenate(y_all, axis=0)  

print("Concatenated Features Shape: ", X_all.shape)
print("Concatenated Arousal Labels Shape: ", y_all.shape)


kf = KFold(n_splits=5, shuffle=True, random_state=42)

Concatenated Features Shape:  (165120, 32)
Concatenated Arousal Labels Shape:  (165120,)


In [40]:
import os
import wandb

# Đọc API key từ biến môi trường
api_key = os.environ.get("88aea2f639fada32b4501731dc07e972dc732128")
wandb.login(key=api_key)
wandb.init(project="Recognition Emotion", config={
               "learning_rate": 0.001,
               "epochs": 150,
               "batch_size": 32,
               "feature_type": "psd", # Sẽ được cập nhật sau
               "model": "CNN-LSTM"
           })
config={
               "learning_rate": 0.001,
               "epochs": 70,
               "batch_size": 32,
               "feature_type": "psd", # Sẽ được cập nhật sau
               "model": "CNN-LSTM"
           }



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁
fold,▁▁▃▅▆█
test/f1_score,▄▆▇▁█▆
test/fold,▁▁▃▅▆█
train/accuracy,▁▂█▄▄▂
train/loss,█▆▁█▄▆
val/accuracy,▁▁▃██▆
val/loss,█▇▃▂▁▂

0,1
epoch,0.0
fold,4.0
test/f1_score,0.71024
test/fold,4.0
train/accuracy,0.5561
train/loss,0.68553
val/accuracy,0.5642
val/loss,0.6813


In [47]:
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=config["epochs"], batch_size=32, validation_data=(X_test, y_test), verbose=1)

    for epoch in range(config["epochs"]):
        wandb.log({
            "epoch": epoch,
            "train/loss": history.history['loss'][epoch],
            "train/accuracy": history.history['accuracy'][epoch],
            "val/loss": history.history['val_loss'][epoch],
            "val/accuracy": history.history['val_accuracy'][epoch],
            "fold": fold
        })
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)

    f1 = f1_score(y_test, y_pred)
    
    test_score = model.evaluate(X_test, y_test)

    wandb.log({"test/f1_score": f1, "test/fold": fold})

    
    return test_score, f1


In [48]:
# from wandb.keras import WandbCallback

In [49]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X_all)):
    print(f"Training fold {fold + 1}")
    
    X_train, X_val = X_all[train_idx], X_all[val_idx]
    y_train, y_val = y_all[train_idx], y_all[val_idx]
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    
    # CNN + LSTM
    input_shape = (X_train.shape[1], 1)
    model = create_cnn_lstm_model(input_shape)
    
    test_score, f1 = train_and_evaluate(model, X_train, y_train, X_val, y_val)
    
    print(f"Fold {fold + 1} Test Score: ", test_score)
    print(f"Fold {fold + 1} F1 Score: ", f1)

Training fold 1
Epoch 1/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 7ms/step - accuracy: 0.5494 - loss: 0.6904 - val_accuracy: 0.5759 - val_loss: 0.6807
Epoch 2/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.5822 - loss: 0.6726 - val_accuracy: 0.5610 - val_loss: 0.6889
Epoch 3/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.5937 - loss: 0.6608 - val_accuracy: 0.5659 - val_loss: 0.6877
Epoch 4/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.6078 - loss: 0.6485 - val_accuracy: 0.5568 - val_loss: 0.6868
Epoch 5/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.6147 - loss: 0.6394 - val_accuracy: 0.5659 - val_loss: 0.6869
Epoch 6/70
[1m4128/4128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.6280 - loss: 0.6275 - val_accuracy: 0.5720 - val_loss: 0