I utilized the SVM implementation specifically tailored for EEG data from the following GitHub repository: https://github.com/jayavardhanravi/EEG-Data-predection/blob/master/mypart1.py. This resource provided invaluable support for my SVM analysis.

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
ec_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_data_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_data = np.load(os.path.join(ec_data_dir, "normalized_epoch_eeg_data.npy"))
eo_eeg_data = np.load(os.path.join(eo_data_dir, "normalized_epoch_eeg_data.npy"))

In [None]:
print(ec_eeg_data.shape)
print(eo_eeg_data.shape)

(4356, 1, 32, 4975)
(4344, 1, 32, 4975)


In [None]:
ec_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EC"
eo_labels_dir = "/content/drive/MyDrive/TD-BRAIN/training_data/data/EO"
ec_eeg_labels = np.load(os.path.join(ec_labels_dir, "labels_data.npy"))
eo_eeg_labels = np.load(os.path.join(eo_labels_dir, "labels_data.npy"))

In [None]:
for label in ec_eeg_labels:
  sample_id = label[0]
  if sample_id not in eo_eeg_labels[:, 0]:
        index_to_remove = np.where(ec_eeg_labels[:, 0] == sample_id)[0]
        ec_eeg_labels = np.delete(ec_eeg_labels, index_to_remove, axis=0)
        ec_eeg_data = np.delete(ec_eeg_data, index_to_remove, axis=0)
print(ec_eeg_labels.shape)
print(ec_eeg_data.shape)

eeg_data = np.concatenate((ec_eeg_data[:, 0], eo_eeg_data[:, 0]), axis=1)
eeg_data.shape

eeg_labels = ec_eeg_labels

(4344, 2)
(4344, 1, 32, 4975)


In [None]:
healthy_count, mdd_count = 0, 0
for sample in eeg_labels:
  if sample[1] == "MDD":
      mdd_count += 1
  else:
      healthy_count += 1

print(f"Number of MDD participants: {mdd_count}")
print(f"Number of Healthy participants: {healthy_count}")

Number of MDD participants: 3780
Number of Healthy participants: 564


## Extracting data for male participants

In [None]:
df_participants = pd.read_pickle('/content/drive/MyDrive/TD-BRAIN/TDBRAIN_participants_V2_data/df_participants.pkl')
male_count = 0

eeg_data_male = []
eeg_label_male = []

for i, labels in enumerate(eeg_labels):
     sample_id = labels[0]
     index = df_participants.loc[df_participants['participants_ID'] == sample_id].index
     if [value for value in df_participants.loc[index, 'gender']][0] == 0:
       eeg_data_male.append(eeg_data[i])
       eeg_label_male.append(eeg_labels[i])

eeg_data_male = np.array(eeg_data_male)
eeg_label_male = np.array(eeg_label_male)

print(eeg_data_male.shape, eeg_label_male.shape)

(2412, 64, 4975) (2412, 2)


In [None]:
healthy_count_male, mdd_count_male = 0, 0
for sample in eeg_label_male:
  if sample[1] == "MDD":
      mdd_count_male += 1
  else:
      healthy_count_male += 1

print(f"Number of MDD male participants: {mdd_count_male}")
print(f"Number of Healthy male participants: {healthy_count_male}")

Number of MDD male participants: 2040
Number of Healthy male participants: 372


# Model

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils import resample, shuffle

In [None]:
ll = eeg_label_male
encountered_sample_ids = {}
sample_ids_with_more_than_12_entries = []

for index, sample_id in enumerate(ll):
    sample_id_tuple = tuple(sample_id)
    count = encountered_sample_ids.get(sample_id_tuple, 0)
    count += 1
    encountered_sample_ids[sample_id_tuple] = count
    if count > 12:
        sample_ids_with_more_than_12_entries.append((sample_id_tuple, index))

indices_to_remove = [index for _, index in sample_ids_with_more_than_12_entries]
eeg_label_male = [sample for i, sample in enumerate(eeg_label_male) if i not in indices_to_remove]
eeg_data_male = [data for i, data in enumerate(eeg_data_male) if i not in indices_to_remove]
print("Length of filtered eeg_label_male:", len(eeg_label_male))
print("Length of filtered eeg_data_male:", len(eeg_data_male))


###### Undersampling and preparing training data ########
ll = eeg_label_male
unique_sample_id = []
encountered_sample_ids = set()
print(len(ll))
for sample_id in ll:
    sample_id_tuple = tuple(sample_id)
    if sample_id_tuple not in encountered_sample_ids:
        unique_sample_id.append(sample_id)
        encountered_sample_ids.add(sample_id_tuple)
print(len(unique_sample_id))

num_samples_minority = 31
indices_maj = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "MDD"]
indices_min = [index for index, sample in enumerate(unique_sample_id) if sample[1] == "HEALTHY"]
undersampled = np.random.choice(indices_maj, num_samples_minority, replace=False)

balanced_data_indices = np.concatenate([indices_min, undersampled])
balanced_unique_sample_id = [unique_sample_id[i] for i in balanced_data_indices]

# Extract all unique sample IDs from balanced_unique_sample_id
unique_sample_ids = [sample_id[0] for sample_id in balanced_unique_sample_id]
print(len(unique_sample_ids))
# Extract all indices from eeg_label_male for sample IDs in balanced_unique_sample_id
indices = []
for i, sample_id in enumerate(eeg_label_male):
  if sample_id[0] in unique_sample_ids:
        indices.append(i)

# Convert indices to a NumPy array
indices = np.array(indices)
X_train = []
y_train = []
for i in indices:
    X_train.append(eeg_data_male[i])
    y_train.append(eeg_label_male[i])

X_train = np.array(X_train)
y_train = np.array(y_train)

# Shuffle together with their indices
permutation = np.random.permutation(len(X_train))
X_train = X_train[permutation]
y_train = y_train[permutation]

print(X_train.shape)
# print(y_train)

sample_ids = []
for sample in y_train:
  sample_ids.append(sample[0])
sample_ids = np.array(sample_ids)
l = np.array([1 if label[1] == "MDD" else 0 for label in y_train])


Length of filtered eeg_label_male: 2328
Length of filtered eeg_data_male: 2328
2328
194
62
(744, 64, 4975)



# Results

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

class EEGClassifier:
    def __init__(self):
        # We beginnen zonder een geïnitialiseerd model
        self.model = SVC(C=0.4, kernel='sigmoid')

    def train(self, X_train, y_train):
        # Flatten X_train voor SVM gebruik
        X_train_flattened = X_train.reshape(X_train.shape[0], -1)

        # Bereken klasse gewichten
        class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
        class_weight_dict = dict(zip(np.unique(y_train), class_weights))

        # Initialiseer en train het SVM model
        self.model = SVC(C=0.4, kernel='sigmoid', class_weight=class_weight_dict)
        self.model.fit(X_train_flattened, y_train)

    def evaluate(self, X_test, y_test):
        # Flatten X_test voor SVM gebruik
        X_test_flattened = X_test.reshape(X_test.shape[0], -1)
        y_pred = self.model.predict(X_test_flattened)

        return {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred),
            'recall': recall_score(y_test, y_pred),
            'f1_score': f1_score(y_test, y_pred),
            'confusion_matrix': confusion_matrix(y_test, y_pred)
        }

def main():
    classifier = EEGClassifier()
    num_splits = 5
    cv = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=42)

    for fold_idx, (train_index, val_index) in enumerate(cv.split(X_train, l), 1):
        print(f"Fold {fold_idx}:")

        X_train_fold = X_train[train_index]
        y_train_fold = l[train_index]
        X_val_fold = X_train[val_index]
        y_val_fold = l[val_index]

        classifier.train(X_train_fold, y_train_fold)

        train_metrics = classifier.evaluate(X_train_fold, y_train_fold)
        print(f'Training Results - Accuracy: {train_metrics["accuracy"]}, Precision: {train_metrics["precision"]}, Recall: {train_metrics["recall"]}, F1 Score: {train_metrics["f1_score"]}')

        val_metrics = classifier.evaluate(X_val_fold, y_val_fold)
        print(f'Validation Results - Accuracy: {val_metrics["accuracy"]}, Precision: {val_metrics["precision"]}, Recall: {val_metrics["recall"]}, F1 Score: {val_metrics["f1_score"]}')
        print('Validation Confusion Matrix:')
        print(val_metrics['confusion_matrix'])
        print()

if __name__ == "__main__":
    main()

Fold 1:
Training Results - Accuracy: 0.746218487394958, Precision: 0.8348623853211009, Recall: 0.6127946127946128, F1 Score: 0.7067961165048543
Validation Results - Accuracy: 0.48322147651006714, Precision: 0.4782608695652174, Recall: 0.29333333333333333, F1 Score: 0.36363636363636365
Validation Confusion Matrix:
[[50 24]
 [53 22]]

Fold 2:
Training Results - Accuracy: 0.7966386554621848, Precision: 0.7666666666666667, Recall: 0.8518518518518519, F1 Score: 0.8070175438596491
Validation Results - Accuracy: 0.4429530201342282, Precision: 0.45454545454545453, Recall: 0.5333333333333333, F1 Score: 0.49079754601226994
Validation Confusion Matrix:
[[26 48]
 [35 40]]

Fold 3:
Training Results - Accuracy: 0.7781512605042017, Precision: 0.8705357142857143, Recall: 0.6543624161073825, F1 Score: 0.7471264367816092
Validation Results - Accuracy: 0.5302013422818792, Precision: 0.5454545454545454, Recall: 0.32432432432432434, F1 Score: 0.4067796610169491
Validation Confusion Matrix:
[[55 20]
 [50 24