In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint

import pandas as pd
import os
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [12]:
# Load data
data_path = "../../data"
X_train = pd.read_csv(os.path.join(data_path, "X_train.csv")).values
y_train_org = pd.read_csv(os.path.join(data_path, "y_train.csv"))
X_test = pd.read_csv(os.path.join(data_path, "X_test.csv")).values
y_test_org = pd.read_csv(os.path.join(data_path, "y_test.csv"))
traits = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Emotional Stability', 'Openness']

In [13]:
# Create results directory
results_path = "../../results"
specific_results_path = os.path.join("../../results", "cnn_classification")
os.makedirs(results_path, exist_ok=True)
os.makedirs(specific_results_path, exist_ok=True)

In [14]:
def calc_roc_auc(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    n_classes = np.unique(y_true)
    roc_auc_scores = []
    for label in n_classes:
        # Create binary labels for the current class vs. all other classes
        y_true_class = (y_true == label).astype(int)
        y_pred_class = (y_pred == label).astype(int)
        
        # Calculate ROC AUC for the current class
        roc_auc = roc_auc_score(y_true_class, y_pred_class)
        roc_auc_scores.append(roc_auc)
    return roc_auc_scores

In [15]:
for trait in traits:
    print(f"Processing {trait}")
    trait_bin = trait + "_bin"
    label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}
    y_train = np.array([label_mapping[label] for label in y_train_org[trait_bin]])
    y_test = np.array([label_mapping[label] for label in y_test_org[trait_bin]])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)

    num_classes = len(np.unique(y_test)) 

    checkpoint = ModelCheckpoint("best_model.h5", 
                                monitor='val_accuracy',  
                                verbose=1, 
                                save_best_only=True,    
                                mode='max',             
                                save_weights_only=False) 
    # Define the CNN model
    # model = keras.Sequential([
    #     keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    #     keras.layers.Dropout(0.5),  # Add dropout to reduce overfitting
    #     keras.layers.Dense(64, activation='relu'),
    #     keras.layers.Dense(10, activation='softmax')  # Adjust the output layer units based on your problem
    # ])
    # Create a Sequential model
    model = keras.Sequential()

    model.add(layers.Input(shape=(X_train.shape[1],)))

    # Hidden layers
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.3))  # Dropout layer for regularization
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(16, activation='relu'))

    # Output layer with 3 units (3 classes) and softmax activation
    model.add(layers.Dense(3, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                loss='categorical_crossentropy',  # Use this loss for multiclass classification
                metrics=['accuracy'])

    # Display the model summary
    model.summary()

    # Train the model
    batch_size = 128
    epochs = 500

    history = model.fit(X_train, y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_split=0.1,
                        callbacks=[checkpoint])  # Pass the checkpoint callback

    # Load the best model with the highest accuracy
    best_model = tf.keras.models.load_model("best_model.h5")

    # Evaluate the model on the test data
    test_loss, test_accuracy = best_model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_accuracy}")
    break
    # Save the trained model to a file
    # model.save("ecg_classification_model.h5")

Processing Extraversion
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 128)               2560      
                                                                 
 dense_11 (Dense)            (None, 64)                8256      


                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_12 (Dense)            (None, 32)                2080      
                                                                 
 dense_13 (Dense)            (None, 16)                528       
                                                                 
 dense_14 (Dense)            (None, 3)                 51        
                                                                 
Total params: 13475 (52.64 KB)
Trainable params: 13475 (52.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/500
 1/14 [=>............................] - ETA: 2s - loss: 1.0777 - accuracy: 0.4609
Epoch 1: val_accuracy improved from -inf to 0.52406, saving model to best_model.h5
Epoch 2/500
 1/14 [=>............................] - ETA: 0s

  saving_api.save_model(


Epoch 8/500
 1/14 [=>............................] - ETA: 0s - loss: 1.0079 - accuracy: 0.5078
Epoch 8: val_accuracy did not improve from 0.54545
Epoch 9/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9557 - accuracy: 0.6016
Epoch 9: val_accuracy did not improve from 0.54545
Epoch 10/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9720 - accuracy: 0.5469
Epoch 10: val_accuracy did not improve from 0.54545
Epoch 11/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9343 - accuracy: 0.5938
Epoch 11: val_accuracy did not improve from 0.54545
Epoch 12/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9860 - accuracy: 0.5312
Epoch 12: val_accuracy did not improve from 0.54545
Epoch 13/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9085 - accuracy: 0.6016
Epoch 13: val_accuracy did not improve from 0.54545
Epoch 14/500
 1/14 [=>............................] - ETA: 0s - loss: 0.9809 - accuracy: 0.5156
Epoch 14: val_accura