In [1]:
import sklearn
import mne
import numpy as np
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [7]:
def split_train_test_path_list(data_path, file_name_template, train_ratio):
    file_list = sorted(glob.glob(os.path.join(data_path, file_name_template)))
    np.random.shuffle(file_list)
    split_id = int(len(file_list) * train_ratio)

    train_list = file_list[:split_id]
    test_list = file_list[split_id:]

    return train_list, test_list


In [29]:
def read_eeg_epochs(train_list, test_list):
    epochs_train_list = []
    epochs_test_list = []

    for file_path in train_list:
        with mne.utils.use_log_level("ERROR"):
            epochs_train = mne.read_epochs(file_path, preload=True)
            epochs_train_list.append(epochs_train)

    for file_path in test_list:
        with mne.utils.use_log_level("ERROR"):
            epochs_test = mne.read_epochs(file_path, preload=True)
            epochs_test_list.append(epochs_test)

    epochs_train = mne.concatenate_epochs(epochs_train_list)
    epochs_test = mne.concatenate_epochs(epochs_test_list)

    return epochs_train, epochs_test

In [43]:
def get_X_and_Y_from_epochs(epochs_train, epochs_test, picks=None, t_min = -0.2, t_max = 0.5):

    epochs_up_train_list = epochs_train["up"].get_data(picks=picks, tmin=t_min, tmax=t_max).reshape(len(epochs_train["up"]), -1)
    epochs_inv_train_list = epochs_train["inv"].get_data(picks=picks, tmin=t_min, tmax=t_max).reshape(len(epochs_train["inv"]), -1)

    labels_up_train = [0] * len(epochs_up_train_list)
    labels_inv_train = [1] * len(epochs_inv_train_list)

    X_train = np.concatenate((epochs_up_train_list, epochs_inv_train_list), axis=0)
    y_train = np.concatenate((labels_up_train, labels_inv_train), axis=0)

    epochs_up_test_list = epochs_test["up"].get_data(picks=picks, tmin=t_min, tmax=t_max).reshape(len(epochs_test["up"]), -1)
    epochs_inv_test_list = epochs_test["inv"].get_data(picks=picks, tmin=t_min, tmax=t_max).reshape(len(epochs_test["inv"]), -1)

    labels_up_test = [0] * len(epochs_up_test_list)
    labels_inv_test = [1] * len(epochs_inv_test_list)

    X_test = np.concatenate((epochs_up_test_list, epochs_inv_test_list), axis=0)
    y_test = np.concatenate((labels_up_test, labels_inv_test), axis=0)


    return X_train, X_test, y_train, y_test

In [45]:
def train_and_test_model(X_train, X_test, y_train, y_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LogisticRegression(max_iter=10000)
    model.fit(X_train, y_train)

    # predict test data
    y_test_pred = model.predict(X_test)
    test_score = accuracy_score(y_test, y_test_pred)

    # predict train data
    y_train_pred = model.predict(X_train)
    train_score = accuracy_score(y_train, y_train_pred)

    print(f"test_score: {test_score:.4f}")
    print(f"train_score: {train_score:.4f}")

In [46]:
dir_path = 'D:\studia\magisterka\dane EEG\BADANIE_POLITYCZNE_2022_eeg_bdfy\EEG_preprocessed'
file_name_template = "s*.bdf-epo.fif"
train_ratio = 0.8

train_list, test_list = split_train_test_path_list(dir_path, file_name_template, train_ratio)
epochs_train, epochs_test = read_eeg_epochs(train_list, test_list)
X_train, X_test, y_train, y_test = get_X_and_Y_from_epochs(epochs_train, epochs_test )
train_and_test_model(X_train, X_test, y_train, y_test)


Not setting metadata
18583 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
4753 matching events found
Applying baseline correction (mode: mean)
test_score: 0.7671
train_score: 0.9094
