# Mount to Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Improt required libraries

In [3]:
import pandas as pd
import numpy as np
import os
import re
from matplotlib import pyplot as plt
import scipy.io as sio
import numpy as np
import pickle as pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from scipy.signal import spectrogram
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn import metrics
from scipy import signal
import os
import math

import tensorflow as tf
import keras
from tensorflow.keras.utils import to_categorical
# from keras.wrappers.scikit_learn import KerasClassifier
import keras.backend as K
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, LSTM, Conv2D, MaxPooling2D, RepeatVector
import warnings
warnings.filterwarnings('ignore')

# Data Pre-processing

In [4]:
dataset_path = '/content/drive/My Drive/Major/auditory-evoked-potential-eeg-biometric-dataset-1.0.0/Filtered_Data/'
channel_names = ['P4', 'Cz', 'F8', 'T7']  # Specify the EEG channel names

In [5]:
def load_eeg_column_data(directory, column_name):
    data = []
    labels = []

    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            filepath = os.path.join(directory, filename)

            df = pd.read_csv(filepath)

            num_rows = len(df)
            # If the number of rows is less than the specified maximum
            if num_rows < 24000:
                print(f"Skipping file {filename} as it doesn't have enough rows.")
                continue
            df = df.head(23800)

            if column_name in df.columns:
                eeg_data = df[column_name].values
            else:
                print(f"Column '{column_name}' not found in file {filename}. Skipping...")
                continue

            data.append(eeg_data)

            # Extract label from filename
            person_id = 0
            match = re.search(r's(\d+)_ex', filename)
            if match:
                person_id = int(match.group(1))  # Convert the extracted number to an integer
            else:
                print("No match found.")
            label = person_id - 1
            labels.append(label)

    data = np.array(data)
    labels = np.array(labels)

    return data, labels

In [6]:
eeg_Data_P4, labels = load_eeg_column_data(dataset_path, channel_names[0])
eeg_Data_Cz, _ = load_eeg_column_data(dataset_path, channel_names[1])
eeg_Data_F8, _ = load_eeg_column_data(dataset_path, channel_names[2])
eeg_Data_T7, _ = load_eeg_column_data(dataset_path, channel_names[3])

Skipping file s03_ex06.csv as it doesn't have enough rows.
Skipping file s20_ex01_s03.csv as it doesn't have enough rows.
Skipping file s19_ex01_s03.csv as it doesn't have enough rows.
Skipping file s03_ex06.csv as it doesn't have enough rows.
Skipping file s20_ex01_s03.csv as it doesn't have enough rows.
Skipping file s19_ex01_s03.csv as it doesn't have enough rows.
Skipping file s03_ex06.csv as it doesn't have enough rows.
Skipping file s20_ex01_s03.csv as it doesn't have enough rows.
Skipping file s19_ex01_s03.csv as it doesn't have enough rows.
Skipping file s03_ex06.csv as it doesn't have enough rows.
Skipping file s20_ex01_s03.csv as it doesn't have enough rows.
Skipping file s19_ex01_s03.csv as it doesn't have enough rows.


In [24]:
# Change the label from subject_id to categorial label
Y = to_categorical(labels)

Y.shape

(237, 20)

# Feature Extraction

In [8]:
# Spectogram
def extract_features(eeg_Data):
    fs = 200

    size_dataset = len(eeg_Data)
    nperseg = 400
    noverlap = 200
    f_size = math.ceil(((nperseg + 1)/2))
    t_size = int((eeg_Data[0].size - noverlap ) / (nperseg - noverlap))
    X_full = np.ndarray(shape=(size_dataset, f_size, t_size, 3))

    for i in range(0, size_dataset):
      for j in range(0,3):
        X = eeg_Data[i]
        f, t, Sxx = spectrogram(X, fs, nperseg = nperseg, noverlap = noverlap, mode='complex')
        X_full[i, :, :, j] = Sxx

    X_full = np.array(X_full)
    return X_full

In [13]:
X_full_P4 = extract_features(eeg_Data_P4)
X_full_Cz = extract_features(eeg_Data_Cz)
X_full_F8 = extract_features(eeg_Data_F8)
X_full_T7 = extract_features(eeg_Data_T7)

X_full_T7.shape

(237, 201, 118, 3)

In [14]:
X_full_P4 /= 255
X_full_Cz /= 255
X_full_F8 /= 255
X_full_T7 /= 255

# Load Models

In [15]:
models_path = '/content/drive/My Drive/saved_models'

model_files = [filename for filename in os.listdir(models_path) if filename.endswith('.h5')]

loaded_models = {}

for filename in model_files:
    # Extract channel name from filename
    channel = filename.split('_')[1].split('.h5')[0]
    model_path = os.path.join(models_path, filename)
    loaded_models[channel] = load_model(model_path)

print("Models loaded!!")
# Now loaded_models is a dictionary with keys as channel names and values as loaded models
print(loaded_models)

Models loaded!!
{'P4': <keras.src.engine.sequential.Sequential object at 0x7ef95ca21870>, 'Cz': <keras.src.engine.sequential.Sequential object at 0x7ef8be1034f0>, 'F8': <keras.src.engine.sequential.Sequential object at 0x7ef8be072bc0>, 'T7': <keras.src.engine.sequential.Sequential object at 0x7ef8b33356c0>}


# Get predictions

In [16]:
pred_Y_P4 = loaded_models["P4"].predict(X_full_P4)
pred_Y_P4=np.argmax(pred_Y_P4,axis=1)

pred_Y_Cz = loaded_models["Cz"].predict(X_full_Cz)
pred_Y_Cz=np.argmax(pred_Y_Cz,axis=1)

pred_Y_F8 = loaded_models["F8"].predict(X_full_F8)
pred_Y_F8=np.argmax(pred_Y_F8,axis=1)

pred_Y_T7 = loaded_models["T7"].predict(X_full_T7)
pred_Y_T7=np.argmax(pred_Y_T7,axis=1)



In [27]:
# convert categorial labels to initial label
label_Y = np.argmax(Y, axis=1)

# Voting

In [36]:
import numpy as np

def hard_voting(predictions):
    # Calculate the majority class label
    majority_label = np.bincount(predictions).argmax()

    # Count the occurrences of the majority label
    majority_count = np.count_nonzero(predictions == majority_label)

    # Check if the majority label appears in at least 75% of the predictions
    if majority_count >= 0.75 * len(predictions):
        # Return the majority label as the final prediction
        return majority_label
    else:
        # Return -1 if 75% of the elements are not the same
        return -1


In [37]:
voted_Y = []
for i in range(len(label_Y)):
    result = np.array([pred_Y_P4[i],pred_Y_Cz[i],pred_Y_F8[i],pred_Y_T7[i]])
    pred = hard_voting(result)
    voted_Y.append(pred)

voted_Y = np.array(voted_Y)

# Plot metrics

In [38]:
print(metrics.classification_report(label_Y,voted_Y))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00         0
           0       1.00      0.92      0.96        12
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        11
           3       1.00      1.00      1.00        12
           4       0.92      1.00      0.96        12
           5       1.00      0.92      0.96        12
           6       1.00      1.00      1.00        12
           7       1.00      1.00      1.00        12
           8       1.00      1.00      1.00        12
           9       1.00      1.00      1.00        12
          10       1.00      1.00      1.00        12
          11       1.00      1.00      1.00        12
          12       1.00      0.92      0.96        12
          13       1.00      1.00      1.00        12
          14       1.00      1.00      1.00        12
          15       1.00      1.00      1.00        12
          16       1.00    