In [1]:
import os
import sys
from pathlib import Path

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from src.data.smr_datamodule import SMR_Data
from src.utils.device import print_data_info

In [3]:
data_dir = Path("D:\\SMR\\")
task_name = "LR"
subject_sessions_dict = {"S5": "all"}
loading_data_mode = "within_subject"
ival = "2s:11s:3ms"
bands = [8, 13]
chans = ['C?,~*3,~*4', 'FC*,~*3,~*4']
# chans = "*"
fallback_neighbors = 4
transform = None
normalize = {"norm_type": "std", "norm_axis": 0}
# normalize = None

process_noisy_channels = True
ignore_noisy_sessions = False

trial_type = "valid"

smr_datamodule = SMR_Data(data_dir=data_dir,
                          task_name=task_name,
                          trial_type=trial_type,
                          subject_sessions_dict=subject_sessions_dict,
                          loading_data_mode=loading_data_mode,
                          ival=ival,
                          bands=bands,
                          chans=chans,
                          fallback_neighbors=fallback_neighbors,
                          transform=transform,
                          normalize=normalize,
                          process_noisy_channels=process_noisy_channels,
                          ignore_noisy_sessions=ignore_noisy_sessions)

subjects_sessions_path_dict = smr_datamodule.collect_subject_sessions(subject_sessions_dict)

INFO:root:Found sessions: ['Session_1', 'Session_2', 'Session_3', 'Session_4', 'Session_5', 'Session_6', 'Session_7'] for subject S5


In [4]:
smr_datamodule.prepare_dataloader_1()

INFO:root:Found sessions: ['Session_1', 'Session_2', 'Session_3', 'Session_4', 'Session_5', 'Session_6', 'Session_7'] for subject S5
INFO:root:Subject S5 loading...
INFO:root:Loading session Session_1 ...
INFO:root:Noisy channels found: [24.0], each channel will be averaged with 4 neighbors


noisechan [24.0]


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (103, 62, 12000)
INFO:root:Data shape after preprocessing: (103, 10, 3000)
INFO:root:1/7 sessions loaded
INFO:root:Loading session Session_2 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (135, 62, 12000)
INFO:root:Data shape after preprocessing: (135, 10, 3000)
INFO:root:2/7 sessions loaded
INFO:root:Loading session Session_3 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (133, 62, 12000)
INFO:root:Data shape after preprocessing: (133, 10, 3000)
INFO:root:3/7 sessions loaded
INFO:root:Loading session Session_4 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (136, 62, 12000)
INFO:root:Data shape after preprocessing: (136, 10, 3000)
INFO:root:4/7 sessions loaded
INFO:root:Loading session Session_5 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (149, 62, 12000)
INFO:root:Data shape after preprocessing: (149, 10, 3000)
INFO:root:5/7 sessions loaded
INFO:root:Loading session Session_6 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (138, 62, 12000)
INFO:root:Data shape after preprocessing: (138, 10, 3000)
INFO:root:6/7 sessions loaded
INFO:root:Loading session Session_7 ...


noisechan None


INFO:root:Preprocessing the data ...
INFO:root:Data shape before preprocessing: (149, 62, 12000)
INFO:root:Data shape after preprocessing: (149, 10, 3000)
INFO:root:7/7 sessions loaded
  mask |= (ar1 == a)


(103, 10, 3000)
(238, 10, 3000)
(371, 10, 3000)
(507, 10, 3000)
(656, 10, 3000)
(794, 10, 3000)


INFO:root:Preparing data...
INFO:root:Train data info:


(943, 10, 3000)
data shape:  (943, 10, 3000)
max timepoints:  645.13037109375
min timepoints:  -525.9212036132812
mean timepoints:  0.0985091103180395


INFO:root:Normalizing the data...


std timepoints:  10.404910593014362


INFO:root:Train data info:


data shape:  (943, 10, 3000)
max timepoints:  30.64146242969836
min timepoints:  -30.649377589425654
mean timepoints:  5.2242095807034583e-20
std timepoints:  0.9999999999999994


In [5]:
train_data = smr_datamodule.train_data

In [6]:
print_data_info(train_data)

data shape:  (943, 10, 3000)
max timepoints:  30.64146242969836
min timepoints:  -30.649377589425654
mean timepoints:  5.2242095807034583e-20
std timepoints:  0.9999999999999994


In [7]:
import numpy as np

np.unique(train_data.y, return_counts=True)

(array([0, 1]), array([494, 449], dtype=int64))

In [8]:
run_path = "./LR_results/run_4/"

## BBCpy toolbox

## CSP 

In [9]:
import pandas as pd
from bbcpy.pipeline import make_pipeline
import bbcpy.functions.helpers as helpers
from bbcpy.functions.base import ImportFunc
from bbcpy.functions.spatial import CSP
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Define the values of excllev to investigate
excllev_values = [None, 1.2, 1.8, 2, 4, 8, 10, 20, 100]
# excllev_values = [None]

# Define the number of splits for cross-validation
n_splits = 6

# Initialize an empty list to store results
results = []


var = ImportFunc(np.var, axis=2)

# Define the cross-validator
cv = KFold(n_splits=n_splits, shuffle=False)

for excllev in excllev_values:
    # Create the pipeline for this iteration
    csp_pipeline = make_pipeline(
        CSP(n_cmps=4, excllev=excllev, scoring=helpers.evscoring_medvar, select=helpers.evselect_directorscut),
        var,
        np.log,
        LDA()
    )

    # Perform cross-validation and store the results
    scores = cross_val_score(csp_pipeline, train_data, train_data.y, cv=cv, error_score='raise')
    
    # Store results for each fold
    for i, score in enumerate(scores):
        results.append([excllev, i+1, score])


# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['excllev', 'fold', 'score'])
file_name = 'excllev_results_csp_bbcpy.csv'

results_df.to_csv(os.path.join(run_path, file_name), index=False)

# Calculate mean and standard deviation for each excllev value
summary_df = results_df.groupby('excllev')[['score']].agg(['mean', 'std'])
file_name = 'excllev_summary_csp_bbcpy.csv'

summary_df.to_csv(os.path.join(run_path, file_name))

# Print the results
print(results_df)




    excllev  fold     score
0       NaN     1  0.651899
1       NaN     2  0.611465
2       NaN     3  0.592357
3       NaN     4  0.611465
4       NaN     5  0.598726
5       NaN     6  0.579618
6       1.2     1  0.696203
7       1.2     2  0.624204
8       1.2     3  0.745223
9       1.2     4  0.713376
10      1.2     5  0.694268
11      1.2     6  0.694268
12      1.8     1  0.575949
13      1.8     2  0.617834
14      1.8     3  0.796178
15      1.8     4  0.770701
16      1.8     5  0.700637
17      1.8     6  0.719745
18      2.0     1  0.632911
19      2.0     2  0.630573
20      2.0     3  0.757962
21      2.0     4  0.700637
22      2.0     5  0.687898
23      2.0     6  0.719745
24      4.0     1  0.537975
25      4.0     2  0.585987
26      4.0     3  0.675159
27      4.0     4  0.668790
28      4.0     5  0.738854
29      4.0     6  0.624204
30      8.0     1  0.487342
31      8.0     2  0.636943
32      8.0     3  0.700637
33      8.0     4  0.585987
34      8.0     5  0



## Riemann 

In [10]:
import pyriemann
import sklearn
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

import bbcpy
from bbcpy.functions.artireject import AverageVariance
from bbcpy.pipeline import make_pipeline
from bbcpy.functions.base import ImportFunc


In [11]:
cv = KFold(n_splits=6, shuffle=False)

# Define the values of excllev to investigate
excllev_values = [None, 1.2, 1.8, 2, 4, 8, 10, 20, 100]
estimator = 'lwf'
# Initialize an empty list to store results
results = []

for excllev in excllev_values:
    AverageVariance(excllev=excllev, estimator=estimator)
    Cov = ImportFunc(bbcpy.functions.statistics.cov, estimator=estimator)
    riemann_pipeline = make_pipeline(AverageVariance(excllev=excllev, estimator=estimator),
                                     Cov,
                                     pyriemann.classification.SVC(metric='logeuclid'))
        # Perform cross-validation and store the results
    scores = cross_val_score(riemann_pipeline, train_data, train_data.y, cv=cv, error_score='raise')
    
    # Store results for each fold
    for i, score in enumerate(scores):
        results.append([excllev, i+1, score])




In [12]:
# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['excllev', 'fold', 'score'])
file_name = 'excllev_results_Riemann_bbcpy.csv'
results_df.to_csv(os.path.join(run_path, file_name), index=False)

# Calculate mean and standard deviation for each excllev value
summary_df = results_df.groupby('excllev')[['score']].agg(['mean', 'std'])
file_name = 'excllev_summary_Riemann_bbcpy.csv'
summary_df.to_csv(os.path.join(run_path, file_name))

# Print the results
print(results_df)

    excllev  fold     score
0       NaN     1  0.734177
1       NaN     2  0.707006
2       NaN     3  0.770701
3       NaN     4  0.840764
4       NaN     5  0.885350
5       NaN     6  0.777070
6       1.2     1  0.721519
7       1.2     2  0.713376
8       1.2     3  0.821656
9       1.2     4  0.828025
10      1.2     5  0.840764
11      1.2     6  0.821656
12      1.8     1  0.715190
13      1.8     2  0.700637
14      1.8     3  0.802548
15      1.8     4  0.821656
16      1.8     5  0.840764
17      1.8     6  0.789809
18      2.0     1  0.740506
19      2.0     2  0.713376
20      2.0     3  0.815287
21      2.0     4  0.834395
22      2.0     5  0.847134
23      2.0     6  0.783439
24      4.0     1  0.734177
25      4.0     2  0.719745
26      4.0     3  0.777070
27      4.0     4  0.834395
28      4.0     5  0.859873
29      4.0     6  0.777070
30      8.0     1  0.721519
31      8.0     2  0.707006
32      8.0     3  0.770701
33      8.0     4  0.828025
34      8.0     5  0

## Tangent Rieamnn

In [13]:
cv = KFold(n_splits=6, shuffle=False)

# Define the values of excllev to investigate
excllev_values = [None, 1.2, 1.8, 2, 4, 8, 10, 20, 100]
estimator = 'lwf'
# Initialize an empty list to store results
results = []

for excllev in excllev_values:
    AverageVariance(excllev=excllev, estimator=estimator)
    Cov = ImportFunc(bbcpy.functions.statistics.cov, estimator=estimator)
    Triemann_pipeline = make_pipeline(AverageVariance(excllev=excllev, estimator=estimator),
                             Cov,
                             pyriemann.tangentspace.TangentSpace(),
                             sklearn.svm.SVC())
        # Perform cross-validation and store the results
    scores = cross_val_score(Triemann_pipeline, train_data, train_data.y, cv=cv, error_score='raise')
    
    # Store results for each fold
    for i, score in enumerate(scores):
        results.append([excllev, i+1, score])




In [14]:
# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['excllev', 'fold', 'score'])
file_name = 'excllev_results_TRiemann_bbcpy.csv'
results_df.to_csv(os.path.join(run_path, file_name), index=False)

# Calculate mean and standard deviation for each excllev value
summary_df = results_df.groupby('excllev')[['score']].agg(['mean', 'std'])
file_name = 'excllev_summary_TRiemann_bbcpy.csv'
summary_df.to_csv(os.path.join(run_path, file_name))

# Print the results
print(results_df)

    excllev  fold     score
0       NaN     1  0.683544
1       NaN     2  0.643312
2       NaN     3  0.751592
3       NaN     4  0.783439
4       NaN     5  0.853503
5       NaN     6  0.707006
6       1.2     1  0.689873
7       1.2     2  0.636943
8       1.2     3  0.719745
9       1.2     4  0.764331
10      1.2     5  0.821656
11      1.2     6  0.675159
12      1.8     1  0.689873
13      1.8     2  0.662420
14      1.8     3  0.783439
15      1.8     4  0.751592
16      1.8     5  0.828025
17      1.8     6  0.707006
18      2.0     1  0.689873
19      2.0     2  0.687898
20      2.0     3  0.802548
21      2.0     4  0.745223
22      2.0     5  0.834395
23      2.0     6  0.738854
24      4.0     1  0.683544
25      4.0     2  0.649682
26      4.0     3  0.751592
27      4.0     4  0.751592
28      4.0     5  0.834395
29      4.0     6  0.713376
30      8.0     1  0.683544
31      8.0     2  0.649682
32      8.0     3  0.751592
33      8.0     4  0.783439
34      8.0     5  0

In [15]:
sfreq = raw.info["sfreq"]
w_length = int(sfreq * 0.5)  # running classifier: window length
w_step = int(sfreq * 0.1)  # running classifier: window step size
w_start = np.arange(0, epochs_data.shape[2] - w_length, w_step)

scores_windows = []

for train_idx, test_idx in cv_split:
    y_train, y_test = labels[train_idx], labels[test_idx]

    X_train = csp.fit_transform(epochs_data_train[train_idx], y_train)
    X_test = csp.transform(epochs_data_train[test_idx])

    # fit classifier
    lda.fit(X_train, y_train)

    # running classifier: test classifier on sliding window
    score_this_window = []
    for n in w_start:
        X_test = csp.transform(epochs_data[test_idx][:, :, n: (n + w_length)])
        score_this_window.append(lda.score(X_test, y_test))
    scores_windows.append(score_this_window)

# Plot scores over time
w_times = (w_start + w_length / 2.0) / sfreq + epochs.tmin

plt.figure()
plt.plot(w_times, np.mean(scores_windows, 0), label="Score")
plt.axvline(0, linestyle="--", color="k", label="Onset")
plt.axhline(0.5, linestyle="-", color="k", label="Chance")
plt.xlabel("time (s)")
plt.ylabel("classification accuracy")
plt.title("Classification score over time")
plt.legend(loc="lower right")
plt.show()

NameError: name 'raw' is not defined