In [1]:
#Master_Thesis for Michel Laji 2024
#Creating a EEG classifying naive transformer model using the dataset from kaggel (https://www.kaggle.com/datasets/harunshimanto/epileptic-seizure-recognition/code)

#Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import sklearn
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix, accuracy_score
from sklearn.utils import class_weight
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l2

2024-03-28 11:21:45.431885: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
# Load training data
train_data = pd.read_csv("train_eeg.csv")
x_train = train_data.iloc[:, :-1].values  # Features: all columns except the last
y_train = train_data.iloc[:, -1].values   # Labels: the last column

# Load test data
test_data = pd.read_csv("test_eeg.csv")
x_test = test_data.iloc[:, :-1].values  # Features
y_test = test_data.iloc[:, -1].values   # Labels

# Load validation data
val_data = pd.read_csv("validation_eeg.csv")
x_val = val_data.iloc[:, :-1].values  # Features
y_val = val_data.iloc[:, -1].values   # Labels

# Reshape features for a model expecting 3D inputs, e.g., (samples, timesteps, features)
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], 1))

print(np.shape(x_train))

(8050, 178, 1)


In [3]:
#Creating the encoder and Fedd Forward part of the transformer
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs
    
    # Feed Forward part of the transformer
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu", kernel_regularizer=l1_reg)(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1, padding='same', kernel_regularizer=l1_reg)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return (x + res)

In [4]:
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu", kernel_regularizer=l1_reg)(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1, activation="sigmoid", kernel_regularizer=l1_reg)(x)
    return keras.Model(inputs, outputs)

In [5]:
# Training parameters
num_epochs = 3
batch_size = 10
learning_rate = 0.0001


In [6]:
#Define the L2 Regularization
lambda_l2 =0.01
l1_reg = regularizers.l2(lambda_l2)

def get_regularization_type(regularizer):
    if isinstance(regularizer, regularizers.L1):
        return "L1"
    elif isinstance(regularizer, regularizers.L2):
        return "L2"
    else:
        return "Unknown"
regularization_type = get_regularization_type(l2_reg)
lambda_value = lambda_l2

#Early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0,  # Minimum change in the monitored quantity to qualify as an improvement.
    patience=10,  # Number of epochs with no improvement after which training will be stopped.
    verbose=0,  # Verbosity mode.
    mode='auto',  # Direction of improvement.
    baseline=None,  # Value to reach.
    restore_best_weights=False,  # Restore model weights from the epoch with the best value of the monitored quantity.
    start_from_epoch=600,  # Epoch from which to start monitoring. This line was missing a comma at its end.
)

# Building the model
input_shape = x_train.shape[1:]

key_dim = 64
model = build_model(
    input_shape, 
    head_size=256, 
    num_heads=4, 
    ff_dim=4, 
    num_transformer_blocks=4,
    mlp_units=[128], 
    dropout=0.025, 
    mlp_dropout=0.4)

# Compiling the model
model.compile(
    loss=BinaryCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
    metrics=["accuracy"]
)


# Displaying the model summary
model.summary()

# Compute class weights
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Define the model checkpoint
model_checkpoint = ModelCheckpoint(
    filepath=f"Model_{regularization_type}_{lambda_value}_best.h5",
    monitor="val_loss",
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode="auto",
    save_freq="epoch"
)

# Training the model
history = model.fit(
    x_train,
    y_train,
    validation_data=(x_val, y_val),
    batch_size=batch_size,
    epochs=num_epochs,
    class_weight=class_weight_dict,
    callbacks=[early_stopping, model_checkpoint]  # Include model_checkpoint here
)

# Get the last epoch
last_epoch = len(history.history['loss'])


# After training, you can get the training and validation losses like this:
train_losses = history.history['loss']
val_losses = history.history['val_loss']

# Print the final epoch's losses
print(f'Epoch {num_epochs}/{num_epochs}, Training Loss: {train_losses[-1]}')
print(f'Epoch {num_epochs}/{num_epochs}, Validation Loss: {val_losses[-1]}')



2024-03-28 11:21:50.645673: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31075 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:61:00.0, compute capability: 7.0
2024-03-28 11:21:50.646293: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31075 MB memory:  -> device: 1, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8a:00.0, compute capability: 7.0


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 178, 1)]     0           []                               
                                                                                                  
 multi_head_attention (MultiHea  (None, 178, 1)      7169        ['input_1[0][0]',                
 dAttention)                                                      'input_1[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 178, 1)       0           ['multi_head_attention[0][0]']   
                                                                                                  
 layer_normalization (LayerNorm  (None, 178, 1)      2           ['dropout[0][0]']            

 mbda)                                                            'tf.__operators__.add_5[0][0]'] 
                                                                                                  
 conv1d_6 (Conv1D)              (None, 178, 4)       8           ['tf.__operators__.add_6[0][0]'] 
                                                                                                  
 dropout_7 (Dropout)            (None, 178, 4)       0           ['conv1d_6[0][0]']               
                                                                                                  
 conv1d_7 (Conv1D)              (None, 178, 1)       5           ['dropout_7[0][0]']              
                                                                                                  
 layer_normalization_7 (LayerNo  (None, 178, 1)      2           ['conv1d_7[0][0]']               
 rmalization)                                                                                     
          

2024-03-28 11:21:57.093695: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401
2024-03-28 11:21:57.630685: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x14afb4c4cda0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-28 11:21:57.630708: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
2024-03-28 11:21:57.630713: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): Tesla V100-SXM2-32GB, Compute Capability 7.0
2024-03-28 11:21:57.634788: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-03-28 11:21:57.722776: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/3
Epoch 3/3
Epoch 3/3, Training Loss: 38.337371826171875
Epoch 3/3, Validation Loss: 33.14757537841797


In [7]:
# Evaluating the model
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=1)

# Storing the true and predicted labels
true_labels_test = []
predicted_labels_test = []
test_losses = []
probabilities_list = []

# Making predictions
y_pred_prob = model.predict(x_test)
y_pred = np.where(y_pred_prob > 0.5, 1, 0)

# Calculate precision, recall, accuracy and F1 score
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
overall_accuracy = accuracy_score(y_test, y_pred)





In [8]:
# Calculate AUC and plot ROC curve
def sigmoid(scores):
    exp_scores = np.exp(-scores)
    return 1/ (1 + np.exp(-scores))
    #return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

y_pred_prob_softmax = y_pred_prob

# Calculate AUC
auc = roc_auc_score(y_test, y_pred_prob)

# Calculate ROC curve
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)

print(f'AUC: {auc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Loss: {test_loss:.4f}')

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--', label='Random Chance')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')

# Save the ROC plot in the current working directory with the name 'AUC.png'
plt.savefig(f'AUC_{regularization_type}_{lambda_value}_{last_epoch}.png')


AUC: 0.7526
Precision: 0.8253
Recall: 0.5213
F1 Score: 0.5542
Test Accuracy: 0.5213
Test Loss: 32.1108


In [9]:
with open('model_evaluation_results__{regularization_type}_{lambda_value}_{last_epoch}.txt', 'w') as file:
    file.write(f"Test Loss = {test_loss:.4f}\n")
    file.write(f"Test Accuracy = {test_accuracy:.4f}\n")
    file.write(f"Precision = {precision:.4f}\n")
    file.write(f"Recall = {recall:.4f}\n")
    file.write(f"F1 Score = {f1:.4f}\n")
    file.write(f"Overall Accuracy = {overall_accuracy:.4f}\n")

In [10]:
plt.figure()
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Training vs Validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')

# Save the plot in the current working directory with the name plot_1.png
plt.savefig('Training vs Validation loss_{regularization_type}_{lambda_value}_{last_epoch}.png')

In [11]:
# Example data
x = np.linspace(0.1, 10, 100)
y = np.log(x)

plt.figure()
plt.plot(x, y)
plt.xscale('log')
plt.title('Logarithmic scale')
plt.xlabel('Logarithmic X')
plt.ylabel('Y')

filename = f"Logscale_{regularization_type}_{lambda_value}_{last_epoch}.png"
plt.savefig(filename)

plt.show()

print(f"Plot saved as {filename}")

Plot saved as Logscale_L1_0.01_3.png
