In [1]:
import tensorflow as tf
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    for gpu in gpus:
        print("Found a GPU with the name:", gpu)
else:
    print("Failed to detect a GPU.")


Found a GPU with the name: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [2]:
import numpy as np
import os
import glob

In [3]:
# files = np.sort(glob.glob("./EEG FOLDER/*"))
# print("Total number of files: ", len(files))
# print("Showing first 10 files...")
# files[:10]

In [4]:
# Recursively glob for CSV files within each subdirectory of the EEG FOLDER
files = glob.glob('./EEG FOLDER/*/*.csv', recursive=True)
files.sort()  # Optional: sort the files for consistency

In [5]:
print("Total number of files: ", len(files))
print("Showing first 10 files...")
print(files[:10])


Total number of files:  360
Showing first 10 files...
['./EEG FOLDER\\Fear\\cz_eeg_data_11.csv', './EEG FOLDER\\Fear\\cz_eeg_data_15.csv', './EEG FOLDER\\Fear\\cz_eeg_data_17.csv', './EEG FOLDER\\Fear\\cz_eeg_data_18.csv', './EEG FOLDER\\Fear\\cz_eeg_data_2.csv', './EEG FOLDER\\Fear\\cz_eeg_data_5.csv', './EEG FOLDER\\Fear\\ha_eeg_data_11.csv', './EEG FOLDER\\Fear\\ha_eeg_data_15.csv', './EEG FOLDER\\Fear\\ha_eeg_data_17.csv', './EEG FOLDER\\Fear\\ha_eeg_data_18.csv']


In [6]:
import pandas as pd
import re            # To match regular expression for extracting labels

In [7]:
glob.glob("./EEG FOLDER/*")

['./EEG FOLDER\\Fear',
 './EEG FOLDER\\Happy',
 './EEG FOLDER\\Neutral',
 './EEG FOLDER\\Sad']

In [8]:
glob.glob("./EEG FOLDER/Neutral/*")[:10] # Showing first 10 files of Neutral folder

['./EEG FOLDER/Neutral\\cz_eeg_data_21.csv',
 './EEG FOLDER/Neutral\\cz_eeg_data_23.csv',
 './EEG FOLDER/Neutral\\cz_eeg_data_4.csv',
 './EEG FOLDER/Neutral\\cz_eeg_data_6.csv',
 './EEG FOLDER/Neutral\\cz_eeg_data_7.csv',
 './EEG FOLDER/Neutral\\cz_eeg_data_9.csv',
 './EEG FOLDER/Neutral\\ha_eeg_data_21.csv',
 './EEG FOLDER/Neutral\\ha_eeg_data_23.csv',
 './EEG FOLDER/Neutral\\ha_eeg_data_4.csv',
 './EEG FOLDER/Neutral\\ha_eeg_data_6.csv']

In [9]:
glob.glob("./EEG FOLDER/Sad/*")[:10]  #Sad

['./EEG FOLDER/Sad\\cz_eeg_data_1.csv',
 './EEG FOLDER/Sad\\cz_eeg_data_10.csv',
 './EEG FOLDER/Sad\\cz_eeg_data_12.csv',
 './EEG FOLDER/Sad\\cz_eeg_data_13.csv',
 './EEG FOLDER/Sad\\cz_eeg_data_14.csv',
 './EEG FOLDER/Sad\\cz_eeg_data_8.csv',
 './EEG FOLDER/Sad\\ha_eeg_data_1.csv',
 './EEG FOLDER/Sad\\ha_eeg_data_10.csv',
 './EEG FOLDER/Sad\\ha_eeg_data_12.csv',
 './EEG FOLDER/Sad\\ha_eeg_data_13.csv']

In [10]:
glob.glob("./EEG FOLDER/Fear/*")[:10]  #Fear

['./EEG FOLDER/Fear\\cz_eeg_data_11.csv',
 './EEG FOLDER/Fear\\cz_eeg_data_15.csv',
 './EEG FOLDER/Fear\\cz_eeg_data_17.csv',
 './EEG FOLDER/Fear\\cz_eeg_data_18.csv',
 './EEG FOLDER/Fear\\cz_eeg_data_2.csv',
 './EEG FOLDER/Fear\\cz_eeg_data_5.csv',
 './EEG FOLDER/Fear\\ha_eeg_data_11.csv',
 './EEG FOLDER/Fear\\ha_eeg_data_15.csv',
 './EEG FOLDER/Fear\\ha_eeg_data_17.csv',
 './EEG FOLDER/Fear\\ha_eeg_data_18.csv']

In [11]:
glob.glob("./EEG FOLDER/Happy/*")[:10]  #Happy

['./EEG FOLDER/Happy\\cz_eeg_data_16.csv',
 './EEG FOLDER/Happy\\cz_eeg_data_19.csv',
 './EEG FOLDER/Happy\\cz_eeg_data_20.csv',
 './EEG FOLDER/Happy\\cz_eeg_data_22.csv',
 './EEG FOLDER/Happy\\cz_eeg_data_24.csv',
 './EEG FOLDER/Happy\\cz_eeg_data_3.csv',
 './EEG FOLDER/Happy\\ha_eeg_data_16.csv',
 './EEG FOLDER/Happy\\ha_eeg_data_19.csv',
 './EEG FOLDER/Happy\\ha_eeg_data_20.csv',
 './EEG FOLDER/Happy\\ha_eeg_data_22.csv']

In [12]:
def data_generator(file_list, batch_size=20):
    i = 0
    while True:
        if i * batch_size >= len(file_list):  # This loop is used to run the generator indefinitely.
            i = 0
            np.random.shuffle(file_list)
        else:
            file_chunk = file_list[i * batch_size:(i + 1) * batch_size]
            data = []
            labels = []
            label_classes = ["Neutral", "Sad", "Fear", "Happy"]
            for file_path in file_chunk:
                directory_path = os.path.dirname(file_path)
                label = os.path.basename(directory_path)  # This should correctly extract the folder name as the label
                label_index = label_classes.index(label)  # Get the index of the label in label_classes
                temp = pd.read_csv(file_path)
                temp = temp.iloc[:, 1:].values.reshape(400, 62, 1)
                data.append(temp)
                labels.append(label_index)
            data = np.asarray(data).reshape(-1, 400, 62, 1)
            labels = np.asarray(labels)
            yield data, labels
            i += 1


In [13]:
generated_data = data_generator(files, batch_size = 100)

In [14]:
num = 0
for data, labels in generated_data:
    print(data.shape, labels.shape)
    print(labels, "<--Labels")  # Just to see the lables
    print()
    num = num + 1
    if num > 5: break

(100, 400, 62, 1) (100,)
[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3] <--Labels

(100, 400, 62, 1) (100,)
[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] <--Labels

(100, 400, 62, 1) (100,)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] <--Labels

(60, 400, 62, 1) (60,)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] <--Labels

(100, 400, 62, 1) (100,)
[1 3 1 0 2 2 1 2 0 3 0 3 2 0 0 0 1 1 1 3 1 0 3 3 0 1 3 3 1 2 2 2 1 2 2 2 2
 0 1 2 3 1 2 3 1 0 3 3 

In [15]:
import tensorflow as tf


In [22]:
def tf_data_generator(file_list, batch_size = 20):
    i = 0
    while True:
        if i*batch_size >= len(file_list):  
            i = 0
            np.random.shuffle(file_list)
        else:
            file_chunk = file_list[i*batch_size:(i+1)*batch_size] 
            data = []
            labels = []
            label_classes = tf.constant(["Neutral", "Sad", "Fear", "Happy"]) # This line has changed.
            for file in file_chunk:
                directory_path = os.path.dirname(file)
                label = os.path.basename(directory_path)  # This should correctly extract the folder name as the label
                label_index = tf.where(tf.equal(label_classes, label))  # Find index of label in label_classes
                label_index = tf.squeeze(label_index)  # Remove extra dimensions
                temp = pd.read_csv(open(file,'r'))
                temp = temp.iloc[:, 1:].values  # Convert DataFrame to numpy array after dropping the first column

                # Normalize each feature to zero mean and unit variance
                mean = np.mean(temp, axis=0)
                std = np.std(temp, axis=0)
                normalized_temp = (temp - mean) / (std + 1e-8)  # Adding epsilon to avoid division by zero

                # Reshape for the model
                normalized_temp = normalized_temp.reshape(400, 62, 1)
                data.append(normalized_temp)
           
                labels.append(label_index)

            data = np.asarray(data).reshape(-1,400,62,1)
            labels = np.asarray(labels)
            yield data, labels
            i = i + 1

In [55]:
check_data = tf_data_generator(files, batch_size = 10)

In [56]:
num = 0
for data, labels in check_data:
    print(data.shape, labels.shape)
    print(labels, "<--Labels")
    print()
    num = num + 1
    if num > 5: break


(10, 400, 62, 1) (10,)
[2 0 1 3 1 2 1 1 1 3] <--Labels

(10, 400, 62, 1) (10,)
[2 2 0 3 1 2 1 2 2 2] <--Labels

(10, 400, 62, 1) (10,)
[0 0 1 2 0 0 1 0 3 3] <--Labels

(10, 400, 62, 1) (10,)
[2 3 3 0 2 0 0 1 2 1] <--Labels

(10, 400, 62, 1) (10,)
[1 0 0 1 2 1 0 1 1 1] <--Labels

(10, 400, 62, 1) (10,)
[2 3 2 0 1 2 0 0 2 2] <--Labels



In [113]:
batch_size = 15
dataset = tf.data.Dataset.from_generator(tf_data_generator,args= [files, batch_size],output_types = (tf.float32, tf.float32),
                                                output_shapes = ((None,400,62,1),(None,)))

In [114]:
# Check whether dataset works or not.
num = 0
for data, labels in dataset:
    print(data.shape, labels.shape)
    print(labels)
    print()
    num = num + 1
    if num > 7: break


(15, 400, 62, 1) (15,)
tf.Tensor([2. 0. 1. 3. 1. 2. 1. 1. 1. 3. 2. 2. 0. 3. 1.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([2. 1. 2. 2. 2. 0. 0. 1. 2. 0. 0. 1. 0. 3. 3.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([2. 3. 3. 0. 2. 0. 0. 1. 2. 1. 1. 0. 0. 1. 2.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([1. 0. 1. 1. 1. 2. 3. 2. 0. 1. 2. 0. 0. 2. 2.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([3. 1. 2. 2. 1. 3. 2. 2. 3. 1. 0. 0. 2. 2. 1.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([3. 0. 3. 3. 0. 1. 3. 0. 3. 2. 2. 2. 2. 0. 0.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([0. 0. 1. 2. 1. 2. 2. 0. 0. 1. 0. 0. 3. 3. 3.], shape=(15,), dtype=float32)

(15, 400, 62, 1) (15,)
tf.Tensor([1. 3. 1. 3. 0. 3. 3. 3. 0. 1. 1. 0. 2. 1. 2.], shape=(15,), dtype=float32)



In [115]:
# Building data pipeline and training CNN model¶
import shutil

In [116]:
Neutral_files = glob.glob("./EEG FOLDER/Neutral/*")
Sad_files = glob.glob("./EEG FOLDER/Sad/*")
Fear_files = glob.glob("./EEG FOLDER/Fear/*")
Happy_files = glob.glob("./EEG FOLDER/Happy/*")


In [117]:
from sklearn.model_selection import train_test_split


In [147]:
Neutral_train, Neutral_test = train_test_split(Neutral_files, test_size = .20, random_state = 5)
Sad_train, Sad_test = train_test_split(Sad_files, test_size = .20, random_state = 54)
Fear_train, Fear_test = train_test_split(Fear_files, test_size = .20, random_state = 543)
Happy_train, Happy_test = train_test_split(Happy_files, test_size = .20, random_state = 5432)


In [148]:
Neutral_train, Neutral_val = train_test_split(Neutral_train, test_size = .10, random_state = 1)
Sad_train, Sad_val = train_test_split(Sad_train, test_size = .10, random_state = 12)
Fear_train, Fear_val = train_test_split(Fear_train, test_size = .10, random_state = 123)
Happy_train, Happy_val = train_test_split(Happy_train, test_size = .10, random_state = 1234)


In [149]:
train_file_names = Neutral_train + Sad_train + Fear_train + Happy_train 
validation_file_names = Neutral_val + Sad_val + Fear_val + Happy_val
test_file_names = Neutral_test + Sad_test + Fear_test + Happy_test 

In [150]:
print("Number of train_files:" ,len(train_file_names))
print("Number of validation_files:" ,len(validation_file_names))
print("Number of test_files:" ,len(test_file_names))

Number of train_files: 256
Number of validation_files: 32
Number of test_files: 72


In [168]:
batch_size = 18
train_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [train_file_names, batch_size], 
                                              output_shapes = ((None,400,62,1),(None,)),
                                              output_types = (tf.float32, tf.float32))

validation_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [validation_file_names, batch_size],
                                                   output_shapes = ((None,400,62,1),(None,)),
                                                   output_types = (tf.float32, tf.float32))

test_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [test_file_names, batch_size],
                                             output_shapes = ((None,400,62,1),(None,)),
                                             output_types = (tf.float32, tf.float32))

In [169]:
# Now create the model.
from tensorflow.keras import layers, callbacks

In [175]:
# model = tf.keras.Sequential([
#     layers.Conv2D(16, 3, activation = "relu", input_shape = (400,62,1)),
#     layers.MaxPool2D(2),
#     layers.Conv2D(62, 3, activation = "relu"),
#     layers.MaxPool2D(2),
#     layers.Flatten(),
#     layers.Dense(16, activation = "relu"),
#     layers.Dense(5, activation = "softmax")
# ])
# model.summary()




model = tf.keras.Sequential([
    layers.Conv2D(32, 3, activation="relu", input_shape=(400, 62, 1)),
    layers.MaxPool2D(2),
    layers.Conv2D(64, 3, activation="relu"),
    layers.MaxPool2D(2),
    layers.Conv2D(128, 3, activation="relu"),
    layers.MaxPool2D(2),
    layers.Conv2D(256, 3, activation="relu"),
    layers.MaxPool2D(2),
    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(5, activation="softmax")
])


# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Define early stopping callback
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=5,          # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore weights from the epoch with the best validation loss
)


model.summary()

Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_74 (Conv2D)          (None, 398, 60, 32)       320       
                                                                 
 max_pooling2d_68 (MaxPoolin  (None, 199, 30, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_75 (Conv2D)          (None, 197, 28, 64)       18496     
                                                                 
 max_pooling2d_69 (MaxPoolin  (None, 98, 14, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_76 (Conv2D)          (None, 96, 12, 128)       73856     
                                                                 
 max_pooling2d_70 (MaxPoolin  (None, 48, 6, 128)     

In [176]:
# Compile the model.
model.compile(loss = "sparse_categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])


In [177]:
steps_per_epoch = int(np.ceil(len(train_file_names)/batch_size))
validation_steps = int(np.ceil(len(validation_file_names)/batch_size))
steps = int(np.ceil(len(test_file_names)/batch_size))
print("steps_per_epoch = ", steps_per_epoch)
print("validation_steps = ", validation_steps)
print("steps = ", steps)

steps_per_epoch =  15
validation_steps =  2
steps =  4


In [178]:
model.fit(train_dataset, validation_data = validation_dataset, steps_per_epoch = steps_per_epoch,
         validation_steps = validation_steps, epochs = 100, callbacks=[early_stopping])



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<keras.callbacks.History at 0x28b3c7e8460>

In [181]:
test_loss, test_accuracy = model.evaluate(test_dataset, steps = 10)



In [164]:
print("Test loss: ", test_loss)
print("Test accuracy:", test_accuracy)

Test loss:  1.726000189781189
Test accuracy: 0.1875
