I utilized the SVM implementation specifically tailored for EEG data from the following GitHub repository: https://github.com/jayavardhanravi/EEG-Data-predection/blob/master/mypart1.py. This resource provided invaluable support for my SVM analysis.

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
ec_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_data = np.load(os.path.join(ec_data_dir, "normalized_epoch_eeg_data.npy"))
eo_eeg_data = np.load(os.path.join(eo_data_dir, "normalized_epoch_eeg_data.npy"))

In [None]:
print(ec_eeg_data.shape)
print(eo_eeg_data.shape)

(4356, 1, 32, 4975)
(4344, 1, 32, 4975)


In [None]:
ec_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_labels = np.load(os.path.join(ec_labels_dir, "labels_data.npy"))
eo_eeg_labels = np.load(os.path.join(eo_labels_dir, "labels_data.npy"))

In [None]:
print(ec_eeg_labels.shape)
print(eo_eeg_labels.shape)

(4356, 2)
(4344, 2)


In [None]:
for label in ec_eeg_labels:
  sample_id = label[0]
  if sample_id not in eo_eeg_labels[:, 0]:
        index_to_remove = np.where(ec_eeg_labels[:, 0] == sample_id)[0]
        ec_eeg_labels = np.delete(ec_eeg_labels, index_to_remove, axis=0)
        ec_eeg_data = np.delete(ec_eeg_data, index_to_remove, axis=0)
print(ec_eeg_labels.shape)
print(ec_eeg_data.shape)

eeg_data = np.concatenate((ec_eeg_data[:, 0], eo_eeg_data[:, 0]), axis=1)
eeg_data.shape

eeg_labels = ec_eeg_labels

(4344, 2)
(4344, 1, 32, 4975)


In [None]:
healthy_count, mdd_count = 0, 0
for sample in eeg_labels:
  if sample[1] == "MDD":
      mdd_count += 1
  else:
      healthy_count += 1

print(f"Number of MDD patient: {mdd_count}")
print(f"Number of Healthy patient: {healthy_count}")

Number of MDD patient: 3780
Number of Healthy patient: 564


### **Converting the labels to binary**
1 -> MDD

0 -> HEALTHY

# **Model**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
ll = ec_eeg_labels
encountered_sample_ids = {}
sample_ids_with_more_than_12_entries = []

for index, sample_id in enumerate(ll):
    sample_id_tuple = tuple(sample_id)
    count = encountered_sample_ids.get(sample_id_tuple, 0)
    count += 1
    encountered_sample_ids[sample_id_tuple] = count
    if count > 12:
        sample_ids_with_more_than_12_entries.append((sample_id_tuple, index))

indices_to_remove = [index for _, index in sample_ids_with_more_than_12_entries]
ec_eeg_labels = [sample for i, sample in enumerate(ec_eeg_labels) if i not in indices_to_remove]
eeg_data = [data for i, data in enumerate(eeg_data) if i not in indices_to_remove]
print("Length of filtered ec_eeg_labels:", len(ec_eeg_labels))
print("Length of filtered eeg_data:", len(eeg_data))


###### Undersampling and preparing training data ########
ll = ec_eeg_labels
unique_sample_id = []
encountered_sample_ids = set()
print(len(ll))
for sample_id in ll:
    sample_id_tuple = tuple(sample_id)
    if sample_id_tuple not in encountered_sample_ids:
        unique_sample_id.append(sample_id)
        encountered_sample_ids.add(sample_id_tuple)
print(len(unique_sample_id))

num_samples_minority = 47
indices_maj = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "MDD"]
indices_min = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "HEALTHY"]
undersampled = np.random.choice(indices_maj, num_samples_minority, replace=False)

balanced_data_indices = np.concatenate([indices_min, undersampled])
# print(unique_sample_id)
balanced_unique_sample_id = [unique_sample_id[i] for i in balanced_data_indices]

# Extract all unique sample IDs from train_unique_sample_id
unique_sample_ids = [sample_id[0] for sample_id in balanced_unique_sample_id]
print(len(unique_sample_ids))
# Extract all indices from eeg_labels for sample IDs in train_unique_sample_id
indices = []
for i, sample_id in enumerate(ec_eeg_labels):
  # print(sample_id[0])
  if sample_id[0] in unique_sample_ids:
        indices.append(i)

# Convert indices to a NumPy array
indices = np.array(indices)
X_train = []
y_train = []
for i in indices:
    X_train.append(eeg_data[i])
    y_train.append(eeg_labels[i])

X_train = np.array(X_train)
y_train = np.array(y_train)

# Shuffle together with their indices
permutation = np.random.permutation(len(X_train))
X_train = X_train[permutation]
y_train = y_train[permutation]

print(X_train.shape)
# print(y_train)

sample_ids = []
for sample in y_train:
  sample_ids.append(sample[0])
sample_ids = np.array(sample_ids)
l = np.array([1 if label[1] == "MDD" else 0 for label in y_train])

Length of filtered ec_eeg_labels: 4248
Length of filtered eeg_data: 4248
4248
354
94
(1128, 64, 4975)


# results sigmoid

In [None]:
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class EEGClassifier:
    def __init__(self):
        # Model geïnitialiseerd met de beste parameters
        self.model = SVC(kernel='sigmoid', C=0.4)

    def train(self, X_train, y_train):
        # Flatten X_train voor SVM gebruik
        X_train_flattened = X_train.reshape(X_train.shape[0], -1)

        # Extract alleen de labels van y_train
        y_labels = np.array([label[1] for label in y_train])  # Selecteer de tweede kolom, die de labels bevat

        # Bereken klasse gewichten
        class_weights = compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)
        class_weight_dict = dict(zip(np.unique(y_labels), class_weights))

        # Stel klasse gewichten in en train het model
        self.model.class_weight = class_weight_dict
        self.model.fit(X_train_flattened, y_labels)

    def evaluate(self, X_test, y_test):
        # Flatten X_test voor SVM gebruik
        X_test_flattened = X_test.reshape(X_test.shape[0], -1)
        # Extract alleen de labels van y_test
        y_labels = np.array([label[1] for label in y_test])  # Selecteer de tweede kolom, die de labels bevat
        y_pred = self.model.predict(X_test_flattened)

        return {
            'accuracy': accuracy_score(y_labels, y_pred),
            'precision': precision_score(y_labels, y_pred, average='macro'),
            'recall': recall_score(y_labels, y_pred, average='macro'),
            'f1_score': f1_score(y_labels, y_pred, average='macro'),
            'confusion_matrix': confusion_matrix(y_labels, y_pred)
        }

def main():
    X_train_flattened = X_train.reshape(X_train.shape[0], -1)

    num_splits = 5
    cv = KFold(n_splits=num_splits, shuffle=True, random_state=42)

    classifier = EEGClassifier()

    for fold_idx, (train_index, val_index) in enumerate(cv.split(X_train_flattened), 1):
        print(f"Fold {fold_idx}:")

        X_train_fold = X_train_flattened[train_index]
        y_train_fold = y_train[train_index]
        X_val_fold = X_train_flattened[val_index]
        y_val_fold = y_train[val_index]

        classifier.train(X_train_fold, y_train_fold)

        # Evalueren van de training data
        train_metrics = classifier.evaluate(X_train_fold, y_train_fold)
        print(f'Training Results - Accuracy: {train_metrics["accuracy"]}, Precision: {train_metrics["precision"]}, Recall: {train_metrics["recall"]}, F1 Score: {train_metrics["f1_score"]}')

        # Evalueren van de validatie data
        val_metrics = classifier.evaluate(X_val_fold, y_val_fold)
        print(f'Validation Results - Accuracy: {val_metrics["accuracy"]}, Precision: {val_metrics["precision"]}, Recall: {val_metrics["recall"]}, F1 Score: {val_metrics["f1_score"]}')
        print('Validation Confusion Matrix:')
        print(val_metrics['confusion_matrix'])
        print()

if __name__ == "__main__":
    main()


Fold 1:
Training Results - Accuracy: 0.8137472283813747, Precision: 0.7934795618348816, Recall: 0.8068345656443654, F1 Score: 0.7987804878048781
Validation Results - Accuracy: 0.5486725663716814, Precision: 0.5280353200883002, Recall: 0.5253736114440981, F1 Score: 0.5213455149501661
Validation Confusion Matrix:
[[35 62]
 [40 89]]

Fold 2:
Training Results - Accuracy: 0.7727272727272727, Precision: 0.7684282563314822, Recall: 0.7856641359761497, F1 Score: 0.7679817005306394
Validation Results - Accuracy: 0.5221238938053098, Precision: 0.5221238938053098, Recall: 0.526077725468476, F1 Score: 0.5032967032967033
Validation Confusion Matrix:
[[37 32]
 [76 81]]

Fold 3:
Training Results - Accuracy: 0.6851441241685144, Precision: 0.729023548043721, Recall: 0.7345778486903338, F1 Score: 0.6849892523893144
Validation Results - Accuracy: 0.49557522123893805, Precision: 0.5419788168431929, Recall: 0.541507024265645, F1 Score: 0.4955357142857143
Validation Confusion Matrix:
[[57 24]
 [90 55]]

Fol

# Results rbf

In [None]:
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class EEGClassifier:
    def __init__(self):
        # Model geïnitialiseerd met de beste parameters
        self.model = SVC(kernel='rbf', C=0.4)

    def train(self, X_train, y_train):
        # Flatten X_train voor SVM gebruik
        X_train_flattened = X_train.reshape(X_train.shape[0], -1)

        # Extract alleen de labels van y_train
        y_labels = np.array([label[1] for label in y_train])  # Selecteer de tweede kolom, die de labels bevat

        # Bereken klasse gewichten
        class_weights = compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)
        class_weight_dict = dict(zip(np.unique(y_labels), class_weights))

        # Stel klasse gewichten in en train het model
        self.model.class_weight = class_weight_dict
        self.model.fit(X_train_flattened, y_labels)

    def evaluate(self, X_test, y_test):
        # Flatten X_test voor SVM gebruik
        X_test_flattened = X_test.reshape(X_test.shape[0], -1)
        # Extract alleen de labels van y_test
        y_labels = np.array([label[1] for label in y_test])  # Selecteer de tweede kolom, die de labels bevat
        y_pred = self.model.predict(X_test_flattened)

        return {
            'accuracy': accuracy_score(y_labels, y_pred),
            'precision': precision_score(y_labels, y_pred, average='macro'),
            'recall': recall_score(y_labels, y_pred, average='macro'),
            'f1_score': f1_score(y_labels, y_pred, average='macro'),
            'confusion_matrix': confusion_matrix(y_labels, y_pred)
        }

def main():
    # Hier moet je eigen data laden en aanpassen
    # Bijvoorbeeld:
    # X_train, y_train = load_your_data()

    X_train_flattened = X_train.reshape(X_train.shape[0], -1)

    num_splits = 5
    cv = KFold(n_splits=num_splits, shuffle=True, random_state=42)

    classifier = EEGClassifier()

    for fold_idx, (train_index, val_index) in enumerate(cv.split(X_train_flattened), 1):
        print(f"Fold {fold_idx}:")

        X_train_fold = X_train_flattened[train_index]
        y_train_fold = y_train[train_index]
        X_val_fold = X_train_flattened[val_index]
        y_val_fold = y_train[val_index]

        classifier.train(X_train_fold, y_train_fold)

        # Evalueren van de training data
        train_metrics = classifier.evaluate(X_train_fold, y_train_fold)
        print(f'Training Results - Accuracy: {train_metrics["accuracy"]}, Precision: {train_metrics["precision"]}, Recall: {train_metrics["recall"]}, F1 Score: {train_metrics["f1_score"]}')

        # Evalueren van de validatie data
        val_metrics = classifier.evaluate(X_val_fold, y_val_fold)
        print(f'Validation Results - Accuracy: {val_metrics["accuracy"]}, Precision: {val_metrics["precision"]}, Recall: {val_metrics["recall"]}, F1 Score: {val_metrics["f1_score"]}')
        print('Validation Confusion Matrix:')
        print(val_metrics['confusion_matrix'])
        print()

if __name__ == "__main__":
    main()


Fold 1:
Training Results - Accuracy: 0.6929046563192904, Precision: 0.7360733835452937, Recall: 0.7496558778243861, F1 Score: 0.6921384116641757
Validation Results - Accuracy: 0.5088495575221239, Precision: 0.5377844950213371, Recall: 0.5339646767361944, F1 Score: 0.5037094189566146
Validation Confusion Matrix:
[[69 28]
 [83 46]]

Fold 2:
Training Results - Accuracy: 0.7827050997782705, Precision: 0.7952633481384651, Recall: 0.8124354883499165, F1 Score: 0.781232676011721
Validation Results - Accuracy: 0.5442477876106194, Precision: 0.5752403846153846, Recall: 0.5866795901412352, F1 Score: 0.5376487178977893
Validation Confusion Matrix:
[[48 21]
 [82 75]]

Fold 3:
Training Results - Accuracy: 0.7361419068736141, Precision: 0.7893805309734514, Recall: 0.7930434782608695, F1 Score: 0.7361094721107702
Validation Results - Accuracy: 0.504424778761062, Precision: 0.5834063686824422, Recall: 0.5729246487867177, F1 Score: 0.5012610340479193
Validation Confusion Matrix:
[[66 15]
 [97 48]]

Fol