I utilized the SVM implementation specifically tailored for EEG data from the following GitHub repository: https://github.com/jayavardhanravi/EEG-Data-predection/blob/master/mypart1.py. This resource provided invaluable support for my SVM analysis.

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
ec_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_data = np.load(os.path.join(ec_data_dir, "normalized_epoch_eeg_data.npy"))
eo_eeg_data = np.load(os.path.join(eo_data_dir, "normalized_epoch_eeg_data.npy"))

In [None]:
print(ec_eeg_data.shape)
print(eo_eeg_data.shape)

(4356, 1, 32, 4975)
(4344, 1, 32, 4975)


In [None]:
ec_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_labels = np.load(os.path.join(ec_labels_dir, "labels_data.npy"))
eo_eeg_labels = np.load(os.path.join(eo_labels_dir, "labels_data.npy"))

In [None]:
print(ec_eeg_labels.shape)
print(eo_eeg_labels.shape)

(4356, 2)
(4344, 2)


In [None]:
for label in ec_eeg_labels:
  sample_id = label[0]
  if sample_id not in eo_eeg_labels[:, 0]:
        index_to_remove = np.where(ec_eeg_labels[:, 0] == sample_id)[0]
        ec_eeg_labels = np.delete(ec_eeg_labels, index_to_remove, axis=0)
        ec_eeg_data = np.delete(ec_eeg_data, index_to_remove, axis=0)
print(ec_eeg_labels.shape)
print(ec_eeg_data.shape)

eeg_data = np.concatenate((ec_eeg_data[:, 0], eo_eeg_data[:, 0]), axis=1)
eeg_data.shape

eeg_labels = ec_eeg_labels

(4344, 2)
(4344, 1, 32, 4975)


In [None]:
healthy_count, mdd_count = 0, 0
for sample in eeg_labels:
  if sample[1] == "MDD":
      mdd_count += 1
  else:
      healthy_count += 1

print(f"Number of MDD participants: {mdd_count}")
print(f"Number of Healthy participants: {healthy_count}")

Number of MDD participants: 3780
Number of Healthy participants: 564


#Extracting data for female participants

In [None]:
df_participants = pd.read_pickle('/content/drive/MyDrive/TD-BRAIN/TDBRAIN_participants_V2_data/df_participants.pkl')
female_count = 0

eeg_data_female = []
eeg_label_female = []

for i, labels in enumerate(eeg_labels):
     sample_id = labels[0]
     index = df_participants.loc[df_participants['participants_ID'] == sample_id].index
     if [value for value in df_participants.loc[index, 'gender']][0] == 1:
       eeg_data_female.append(eeg_data[i])
       eeg_label_female.append(eeg_labels[i])

eeg_data_female = np.array(eeg_data_female)
eeg_label_female = np.array(eeg_label_female)

print(eeg_data_female.shape, eeg_label_female.shape)

(1932, 64, 4975) (1932, 2)


In [None]:
healthy_count_female, mdd_count_female = 0, 0
for sample in eeg_label_female:
  if sample[1] == "MDD":
      mdd_count_female += 1
  else:
      healthy_count_female += 1

print(f"Number of MDD female participants: {mdd_count_female}")
print(f"Number of Healthy female participants: {healthy_count_female}")

Number of MDD female participants: 1740
Number of Healthy female participants: 192


# **Model**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils import resample, shuffle

In [None]:
ll = eeg_label_female
encountered_sample_ids = {}
sample_ids_with_more_than_12_entries = []

for index, sample_id in enumerate(ll):
    sample_id_tuple = tuple(sample_id)
    count = encountered_sample_ids.get(sample_id_tuple, 0)
    count += 1
    encountered_sample_ids[sample_id_tuple] = count
    if count > 12:
        sample_ids_with_more_than_12_entries.append((sample_id_tuple, index))

indices_to_remove = [index for _, index in sample_ids_with_more_than_12_entries]
eeg_label_female = [sample for i, sample in enumerate(eeg_label_female) if i not in indices_to_remove]
eeg_data_female = [data for i, data in enumerate(eeg_data_female) if i not in indices_to_remove]
print("Length of filtered eeg_label_female:", len(eeg_label_female))
print("Length of filtered eeg_data_female:", len(eeg_data_female))


###### Undersampling and preparing training data ########
ll = eeg_label_female
unique_sample_id = []
encountered_sample_ids = set()
print(len(ll))
for sample_id in ll:
    sample_id_tuple = tuple(sample_id)
    if sample_id_tuple not in encountered_sample_ids:
        unique_sample_id.append(sample_id)
        encountered_sample_ids.add(sample_id_tuple)
print(len(unique_sample_id))

num_samples_minority = 16
indices_maj = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "MDD"]
indices_min = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "HEALTHY"]
undersampled = np.random.choice(indices_maj, num_samples_minority, replace=False)

balanced_data_indices = np.concatenate([indices_min, undersampled])
# print(unique_sample_id)
balanced_unique_sample_id = [unique_sample_id[i] for i in balanced_data_indices]

# Extract all unique sample IDs from train_unique_sample_id
unique_sample_ids = [sample_id[0] for sample_id in balanced_unique_sample_id]
print(len(unique_sample_ids))
# Extract all indices from eeg_labels for sample IDs in train_unique_sample_id
indices = []
for i, sample_id in enumerate(eeg_label_female):
  # print(sample_id[0])
  if sample_id[0] in unique_sample_ids:
        indices.append(i)

# Convert indices to a NumPy array
indices = np.array(indices)
X_train = []
y_train = []
for i in indices:
    X_train.append(eeg_data_female[i])
    y_train.append(eeg_label_female[i])

X_train = np.array(X_train)
y_train = np.array(y_train)

# Shuffle together with their indices
permutation = np.random.permutation(len(X_train))
X_train = X_train[permutation]
y_train = y_train[permutation]

print(X_train.shape)
# print(y_train)

sample_ids = []
for sample in y_train:
  sample_ids.append(sample[0])
sample_ids = np.array(sample_ids)
l = np.array([1 if label[1] == "MDD" else 0 for label in y_train])

Length of filtered eeg_label_female: 1920
Length of filtered eeg_data_female: 1920
1920
160
32
(384, 64, 4975)


# Results sigmoid

In [None]:
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class EEGClassifier:
    def __init__(self):
        # Model geïnitialiseerd met de beste parameters
        self.model = SVC(kernel='sigmoid', C=0.4)

    def train(self, X_train, y_train):
        # Flatten X_train voor SVM gebruik
        X_train_flattened = X_train.reshape(X_train.shape[0], -1)

        # Extract alleen de labels van y_train
        y_labels = np.array([label[1] for label in y_train])  # Selecteer de tweede kolom, die de labels bevat

        # Bereken klasse gewichten
        class_weights = compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)
        class_weight_dict = dict(zip(np.unique(y_labels), class_weights))

        # Stel klasse gewichten in en train het model
        self.model.class_weight = class_weight_dict
        self.model.fit(X_train_flattened, y_labels)

    def evaluate(self, X_test, y_test):
        # Flatten X_test voor SVM gebruik
        X_test_flattened = X_test.reshape(X_test.shape[0], -1)
        # Extract alleen de labels van y_test
        y_labels = np.array([label[1] for label in y_test])  # Selecteer de tweede kolom, die de labels bevat
        y_pred = self.model.predict(X_test_flattened)

        return {
            'accuracy': accuracy_score(y_labels, y_pred),
            'precision': precision_score(y_labels, y_pred, average='macro'),
            'recall': recall_score(y_labels, y_pred, average='macro'),
            'f1_score': f1_score(y_labels, y_pred, average='macro'),
            'confusion_matrix': confusion_matrix(y_labels, y_pred)
        }

def main():
    # Hier moet je eigen data laden en aanpassen
    # Bijvoorbeeld:
    # X_train, y_train = load_your_data()

    X_train_flattened = X_train.reshape(X_train.shape[0], -1)

    num_splits = 5
    cv = KFold(n_splits=num_splits, shuffle=True, random_state=42)

    classifier = EEGClassifier()

    for fold_idx, (train_index, val_index) in enumerate(cv.split(X_train_flattened), 1):
        print(f"Fold {fold_idx}:")

        X_train_fold = X_train_flattened[train_index]
        y_train_fold = y_train[train_index]
        X_val_fold = X_train_flattened[val_index]
        y_val_fold = y_train[val_index]

        classifier.train(X_train_fold, y_train_fold)

        # Evalueren van de training data
        train_metrics = classifier.evaluate(X_train_fold, y_train_fold)
        print(f'Training Results - Accuracy: {train_metrics["accuracy"]}, Precision: {train_metrics["precision"]}, Recall: {train_metrics["recall"]}, F1 Score: {train_metrics["f1_score"]}')

        # Evalueren van de validatie data
        val_metrics = classifier.evaluate(X_val_fold, y_val_fold)
        print(f'Validation Results - Accuracy: {val_metrics["accuracy"]}, Precision: {val_metrics["precision"]}, Recall: {val_metrics["recall"]}, F1 Score: {val_metrics["f1_score"]}')
        print('Validation Confusion Matrix:')
        print(val_metrics['confusion_matrix'])
        print()

if __name__ == "__main__":
    main()


Fold 1:
Training Results - Accuracy: 0.7937915742793792, Precision: 0.7830207344096234, Recall: 0.8137474229065228, F1 Score: 0.7852391948837948
Validation Results - Accuracy: 0.45132743362831856, Precision: 0.45809792843691155, Recall: 0.45374220374220375, F1 Score: 0.44148596938775514
Validation Confusion Matrix:
[[33 41]
 [21 18]]

Fold 2:
Training Results - Accuracy: 0.8203991130820399, Precision: 0.8045272435897436, Recall: 0.8323713323713324, F1 Score: 0.8107172650352079
Validation Results - Accuracy: 0.4247787610619469, Precision: 0.43742118537200503, Recall: 0.4303508771929825, F1 Score: 0.41581165990614816
Validation Confusion Matrix:
[[31 44]
 [21 17]]

Fold 3:
Training Results - Accuracy: 0.7694013303769401, Precision: 0.7770390070921986, Recall: 0.8125138895062003, F1 Score: 0.7641972329472329
Validation Results - Accuracy: 0.40707964601769914, Precision: 0.4349593495934959, Recall: 0.4362126245847176, F1 Score: 0.40689385037211123
Validation Confusion Matrix:
[[22 48]
 [19

# Results rbf

In [None]:
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class EEGClassifier:
    def __init__(self):
        # Model geïnitialiseerd met de beste parameters
        self.model = SVC(kernel='rbf', C=0.4)

    def train(self, X_train, y_train):
        # Flatten X_train voor SVM gebruik
        X_train_flattened = X_train.reshape(X_train.shape[0], -1)

        # Extract alleen de labels van y_train
        y_labels = np.array([label[1] for label in y_train])  # Selecteer de tweede kolom, die de labels bevat

        # Bereken klasse gewichten
        class_weights = compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)
        class_weight_dict = dict(zip(np.unique(y_labels), class_weights))

        # Stel klasse gewichten in en train het model
        self.model.class_weight = class_weight_dict
        self.model.fit(X_train_flattened, y_labels)

    def evaluate(self, X_test, y_test):
        # Flatten X_test voor SVM gebruik
        X_test_flattened = X_test.reshape(X_test.shape[0], -1)
        # Extract alleen de labels van y_test
        y_labels = np.array([label[1] for label in y_test])  # Selecteer de tweede kolom, die de labels bevat
        y_pred = self.model.predict(X_test_flattened)

        return {
            'accuracy': accuracy_score(y_labels, y_pred),
            'precision': precision_score(y_labels, y_pred, average='macro'),
            'recall': recall_score(y_labels, y_pred, average='macro'),
            'f1_score': f1_score(y_labels, y_pred, average='macro'),
            'confusion_matrix': confusion_matrix(y_labels, y_pred)
        }

def main():
    # Hier moet je eigen data laden en aanpassen
    # Bijvoorbeeld:
    # X_train, y_train = load_your_data()

    X_train_flattened = X_train.reshape(X_train.shape[0], -1)

    num_splits = 5
    cv = KFold(n_splits=num_splits, shuffle=True, random_state=42)

    classifier = EEGClassifier()

    for fold_idx, (train_index, val_index) in enumerate(cv.split(X_train_flattened), 1):
        print(f"Fold {fold_idx}:")

        X_train_fold = X_train_flattened[train_index]
        y_train_fold = y_train[train_index]
        X_val_fold = X_train_flattened[val_index]
        y_val_fold = y_train[val_index]

        classifier.train(X_train_fold, y_train_fold)

        # Evalueren van de training data
        train_metrics = classifier.evaluate(X_train_fold, y_train_fold)
        print(f'Training Results - Accuracy: {train_metrics["accuracy"]}, Precision: {train_metrics["precision"]}, Recall: {train_metrics["recall"]}, F1 Score: {train_metrics["f1_score"]}')

        # Evalueren van de validatie data
        val_metrics = classifier.evaluate(X_val_fold, y_val_fold)
        print(f'Validation Results - Accuracy: {val_metrics["accuracy"]}, Precision: {val_metrics["precision"]}, Recall: {val_metrics["recall"]}, F1 Score: {val_metrics["f1_score"]}')
        print('Validation Confusion Matrix:')
        print(val_metrics['confusion_matrix'])
        print()

if __name__ == "__main__":
    main()


Fold 1:
Training Results - Accuracy: 0.758957654723127, Precision: 0.7720868409393, Recall: 0.7606979113601631, F1 Score: 0.7567874272010962
Validation Results - Accuracy: 0.4675324675324675, Precision: 0.44565217391304346, Recall: 0.45426829268292684, F1 Score: 0.43675289919714544
Validation Confusion Matrix:
[[ 9 27]
 [14 27]]

Fold 2:
Training Results - Accuracy: 0.739413680781759, Precision: 0.8119015047879616, Recall: 0.7523627075351214, F1 Score: 0.7294200599330161
Validation Results - Accuracy: 0.6493506493506493, Precision: 0.6274509803921569, Recall: 0.5921985815602837, F1 Score: 0.586597733147743
Validation Confusion Matrix:
[[10 20]
 [ 7 40]]

Fold 3:
Training Results - Accuracy: 0.7947882736156352, Precision: 0.8139415340604327, Recall: 0.7983391385608699, F1 Score: 0.7928097783586325
Validation Results - Accuracy: 0.5844155844155844, Precision: 0.5731523378582202, Recall: 0.5663474692202461, F1 Score: 0.5631205673758866
Validation Confusion Matrix:
[[14 20]
 [12 31]]

Fold