In [1]:
import os
import warnings

if 'Modeling' in os.path.abspath("").split('/'):
    os.chdir('..')
if 'Notebooks' in os.path.abspath("").split('/'):
    os.chdir('..')

project_root = os.path.abspath("")

warnings.filterwarnings('ignore')
# Firstly import the class of dataset
from Scripts.Data_Loader import EIRDataset

EIR_Dataset = EIRDataset('./Generated/Data_Train/', task_type='geometric', n_jobs=72) # task type can be `geometric` or `random` or `all`

Processing exp_paths: 100%|██████████| 31/31 [00:00<00:00, 153.96it/s]
Loading .fif files: 100%|██████████| 434/434 [00:19<00:00, 21.73it/s]


In [2]:
def resample_df(EIR_Dataset: EIRDataset, freq: int):
    for i in range(len(EIR_Dataset)): 
        eeg_sample, eye_sample, metadata, label, img = EIR_Dataset[i]
        eeg_sample.resample(freq)
resample_df(EIR_Dataset, 256)

In [3]:
import numpy as np
loaded = np.load(project_root + '/Generated/Spectrums/exec_morlets.npz')

results_arr = []
i = 0
while f'power_{i}' in loaded:
    power = loaded[f'power_{i}']
    phase = loaded[f'phase_{i}']
    s_id = int(loaded[f'subject_id_{i}'])
    t_id = int(loaded[f'trial_id_{i}'])
    gender = str(loaded[f'gender_{i}'])
    handiness = str(loaded[f'handiness_{i}'])
    age = int(loaded[f'age_{i}'])
    label = int(loaded[f'label_{i}'])
    img = loaded[f'img_{i}']
    task_type = str(loaded[f'task_type_{i}'])
    
    results_arr.append([power, phase, s_id, t_id, gender, handiness, age, label, img, task_type])
    i += 1

power, phase, s_id, t_id, gender, handiness, age, label, img, task_type = results_arr[0]
del loaded

In [4]:
power_wav = []
phase_wav = []
subj = []
trial = []
label_wav = []
tasks = []

max_len = 309

for i, sample in enumerate(results_arr):

    if i == len(EIR_Dataset):
        break
    power_wav.append(sample[0][:, :, :max_len])
    phase_wav.append(sample[1][:, :, :max_len])
    subj.append(sample[2])
    trial.append(sample[3])
    label_wav.append(sample[7])
    
power_wav = np.array(power_wav)
phase_wav = np.array(phase_wav)

In [5]:
power_wav_combined = power_wav.transpose(0, 2, 1, 3).reshape(power_wav.shape[0], power_wav.shape[1], power_wav.shape[2]*power_wav.shape[3])
phase_wav_combined = phase_wav.transpose(0, 2, 1, 3).reshape(phase_wav.shape[0], phase_wav.shape[1], phase_wav.shape[2]*phase_wav.shape[3])

In [6]:
import numpy as np
import scipy
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from pyriemann.estimation import XdawnCovariances
from pyriemann.tangentspace import TangentSpace
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn import metrics
import matplotlib.pyplot as plt
from pyriemann.estimation import Covariances
from typing import Union

n_components = 4
class ModelCovariance:
    class ParallelCovariances(BaseEstimator, TransformerMixin):
        def __init__(self, n_filters, covariance_type: Union[list, type] = Covariances):
            self.covariance_type = covariance_type
            self.n_filters = n_filters
            self.cov = []
            self.cov_pipeline = []

            for i, cov_type in enumerate(covariance_type):
                if cov_type == XdawnCovariances:
                    self.cov.append(cov_type(nfilter = self.n_filters[i], estimator = 'lwf', xdawn_estimator='lwf'))
                else: self.cov.append(cov_type(estimator = 'lwf'))
        def fit(self, X, y):
            self.cov_pipeline = []
            for i in range(len(X)):
                self.cov_pipeline.append(Pipeline([
                ('covariance', self.cov[i]),
                ('tangent', TangentSpace())]))
                self.cov_pipeline[i].fit(X[i], y)
            return self
        
        def transform(self, X):
            ret = []
            for i in range(len(X)):
                ret.append(self.cov_pipeline[i].transform(X[i]))
            combined = np.concatenate(ret, axis=1)
            return combined

        
    def __init__(self, feature_groups: int = 3, n_filters: Union[list, int, None] = 4, covariances: Union[list, type] = XdawnCovariances,
                  Classifier: Union[list, BaseEstimator] = LogisticRegression(C=3, class_weight='balanced', max_iter = 300)):
        self.feature_groups = feature_groups
        self.covariances = covariances
        self.n_filters = n_filters
        if feature_groups == 1: # Предполагается что в этом случае нигде не используется list для передачи параметров
            cov = covariances(estimator='lwf')
            if covariances == XdawnCovariances:
                cov = covariances(n_filters, estimator='lwf', xdawn_estimator='lwf')
            self.pipeline = Pipeline([
                ('cov', cov),
                ('tang', TangentSpace()),
                ('clf', Classifier)])
        else: # Если используем листы для передачи параметров - следим чтобы длина совпадала с feature_groups
            if isinstance(n_filters, int):
                self.n_filters = [n_filters] * feature_groups
            if isinstance(covariances, type):
                self.covariances = [covariances] * feature_groups
            self.pipeline = (Pipeline([
                ('cov', self.ParallelCovariances(self.n_filters, self.covariances)),
                ('clf', Classifier)]))
    def get_pipeline(self):
        return self.pipeline
    def fit(self, X, y):
        self.pipeline.fit(X, y)
        return self

    def predict(self, X):
        return self.pipeline.predict(X)
    
    def evaluate(self, X, true_y, metric: Union[str, list] = 'accuracy'):
        preds = self.pipeline.predict(X)
        if isinstance(metric, str):
            if metric == 'accuracy':
                return metrics.accuracy_score(true_y, preds)
            elif metric == 'precision':
                return metrics.precision_score(true_y, preds, average='weighted')
            elif metric == 'recall':
                return metrics.recall_score(true_y, preds, average='weighted')
            elif metric == 'f1':
                return metrics.f1_score(true_y, preds, average='weighted')
            elif metric == 'roc_auc':
                return metrics.roc_auc_score(true_y, preds)
            else:
                raise ValueError(f"Unknown metric: {metric}")
        
        elif isinstance(metric, list):
            results = {}
            for m in metric:
                if m == 'accuracy':
                    results[m] = metrics.accuracy_score(true_y, preds)
                elif m == 'precision':
                    results[m] = metrics.precision_score(true_y, preds, average='weighted')
                elif m == 'recall':
                    results[m] = metrics.recall_score(true_y, preds, average='weighted')
                elif m == 'f1':
                    results[m] = metrics.f1_score(true_y, preds, average='weighted')
                elif m == 'roc_auc':
                    results[m] = metrics.roc_auc_score(true_y, preds)
                else:
                    raise ValueError(f"Unknown metric: {m}")
            return results

In [None]:
import numpy as np
from itertools import product
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from sklearn import metrics
from Scripts import Selectors_From_Dataset as sel

splits = [3, 5, 7, 9]

results_storage = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: []
}

metrics_list = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']

models = {
                1: ModelCovariance(feature_groups=1, n_filters=4,
                                    covariances=XdawnCovariances,
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                2: ModelCovariance(feature_groups=3, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                3: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                4: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                5: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[Covariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
            }

s_ids = np.unique(np.array([result[2] for result in results_arr]))

CUT_IDS = False
if CUT_IDS:
    s_ids = s_ids[:3]

for s_id in s_ids:
    print(f"Subject ID: {s_id}")
    X, img, y = sel.get_sample(EIR_Dataset, [s_id])

    for n_splits in tqdm(splits):
        try:
            k_fold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

            for train_index, test_index in k_fold.split(X, y):
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = y[train_index], y[test_index]
                power_wav_train, power_wav_test = power_wav_combined[train_index], power_wav_combined[test_index]
                phase_wav_train, phase_wav_test = phase_wav_combined[train_index], phase_wav_combined[test_index]
                y_wav_train, y_wav_test = np.array(label_wav)[train_index], np.array(label_wav)[test_index]

                inputs_train = {
                    1: X_train,
                    2: [X_train, power_wav_train, phase_wav_train],
                    3: [X_train, power_wav_train],
                    4: [X_train, phase_wav_train],
                    5: [power_wav_train, phase_wav_train]
                }

                inputs_test = {
                    1: X_test,
                    2: [X_test, power_wav_test, phase_wav_test],
                    3: [X_test, power_wav_test],
                    4: [X_test, phase_wav_test],
                    5: [power_wav_test, phase_wav_test]
                }

                # Обучение и оценка моделей
                for i in models:
                    models[i].fit(inputs_train[i], Y_train)
                    results = models[i].evaluate(inputs_test[i], Y_test, metrics_list)
                    
                    storage_item = {
                        'n_splits': n_splits,
                        'fold': len(results_storage[i]) % n_splits + 1}
                    for metric in metrics_list:
                        storage_item[metric] = results[metric]
                        
                    results_storage[i].append(storage_item)

        except Exception as e:
            print(f"ERROR AT: n_splits={n_splits}.\nERROR: {e}")

Subject ID: 1


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:59<08:57, 179.13s/it]

	N Splits: 5


100%|██████████| 4/4 [08:19<00:00, 125.00s/it]


	N Splits: 7
ERROR AT: n_splits=7.
ERROR: n_splits=7 cannot be greater than the number of members in each class.
	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 2


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:45<08:15, 165.24s/it]

	N Splits: 5


 50%|█████     | 2/4 [08:11<08:40, 260.03s/it]

	N Splits: 7


100%|██████████| 4/4 [16:05<00:00, 241.25s/it]


	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 3


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:21<07:04, 141.57s/it]

	N Splits: 5


 50%|█████     | 2/4 [06:46<07:08, 214.32s/it]

	N Splits: 7


100%|██████████| 4/4 [13:22<00:00, 200.63s/it]


	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 4


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:38<07:56, 158.73s/it]

	N Splits: 5


100%|██████████| 4/4 [07:29<00:00, 112.27s/it]


	N Splits: 7
ERROR AT: n_splits=7.
ERROR: n_splits=7 cannot be greater than the number of members in each class.
	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 5


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:20<07:02, 140.98s/it]

	N Splits: 5


100%|██████████| 4/4 [06:42<00:00, 100.73s/it]


	N Splits: 7
ERROR AT: n_splits=7.
ERROR: n_splits=7 cannot be greater than the number of members in each class.
	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 6


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:41<08:03, 161.31s/it]

	N Splits: 5


100%|██████████| 4/4 [07:11<00:00, 107.76s/it]


	N Splits: 7
ERROR AT: n_splits=7.
ERROR: n_splits=7 cannot be greater than the number of members in each class.
	N Splits: 9
ERROR AT: n_splits=9.
ERROR: n_splits=9 cannot be greater than the number of members in each class.
Subject ID: 7


  0%|          | 0/4 [00:00<?, ?it/s]

	N Splits: 3


 25%|██▌       | 1/4 [02:23<07:09, 143.20s/it]

	N Splits: 5


In [None]:
import json
import os

output_path = './Generated/Results/per_subject_results.json'
os.makedirs(os.path.dirname(output_path), exist_ok=True)

with open(output_path, 'w', encoding='utf-8') as f:
    json.dump({
        'models': {i: f"Model_{i}" for i in models},
        'metrics': metrics_list,
        'data_size': len(X),
        'results': results_storage
    }, f, indent=4, ensure_ascii=False)

In [None]:
import numpy as np
from itertools import product
from sklearn.model_selection import LeavePGroupsOut
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from sklearn import metrics
from Scripts import Selectors_From_Dataset as sel

groups_out_variants = [1, 2, 3]

results_storage = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: []
}

metrics_list = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']

models = {
                1: ModelCovariance(feature_groups=1, n_filters=4,
                                    covariances=XdawnCovariances,
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                2: ModelCovariance(feature_groups=3, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                3: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                4: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[XdawnCovariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
                5: ModelCovariance(feature_groups=2, n_filters=4,
                                    covariances=[Covariances, Covariances],
                                    Classifier=LogisticRegression(C=3, class_weight='balanced', max_iter=300)),
            }


s_ids = np.unique(np.array([result[2] for result in results_arr]))

CUT_IDS = False
if CUT_IDS:
    s_ids = s_ids[:3]

X, img, y = sel.get_sample(EIR_Dataset, [s_ids])

for groups_out in groups_out_variants:
    lpgo = LeavePGroupsOut(n_groups=groups_out)
    for train_index, test_index in tqdm(lpgo.split(X, y, groups=subj), total=len(s_ids), desc="LPGO"):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = y[train_index], y[test_index]
        power_wav_train, power_wav_test = power_wav_combined[train_index], power_wav_combined[test_index]
        phase_wav_train, phase_wav_test = phase_wav_combined[train_index], phase_wav_combined[test_index]
        y_wav_train, y_wav_test = np.array(label_wav)[train_index], np.array(label_wav)[test_index]

        inputs_train = {
            1: X_train,
            2: [X_train, power_wav_train, phase_wav_train],
            3: [X_train, power_wav_train],
            4: [X_train, phase_wav_train],
            5: [power_wav_train, phase_wav_train]
        }

        inputs_test = {
            1: X_test,
            2: [X_test, power_wav_test, phase_wav_test],
            3: [X_test, power_wav_test],
            4: [X_test, phase_wav_test],
            5: [power_wav_test, phase_wav_test]
        }

        test_subject = int(subj[test_index][0])
        # Обучение и оценка моделей
        for i in models:
            try:
                models[i].fit(inputs_train[i], Y_train)
                results = models[i].evaluate(inputs_test[i], Y_test, metrics_list)
                storage_item = {
                    'test_subject_id': test_subject,
                    'subjects_in_validation': groups_out
                }
                for metric in metrics_list:
                    storage_item[metric] = results.get(metric, 0.0)
                results_storage[i].append(storage_item)

            except Exception as e:
                print(f"ERROR AT: n_splits={n_splits}.\nERROR: {e}")

In [None]:
import json
import os

output_path = './Generated/Results/cross_subject_results.json'
os.makedirs(os.path.dirname(output_path), exist_ok=True)

with open(output_path, 'w', encoding='utf-8') as f:
    json.dump({
        'models': {i: f"Model_{i}" for i in models},
        'metrics': metrics_list,
        'data_size': len(X),
        'results': results_storage
    }, f, indent=4, ensure_ascii=False)