In [58]:
import numpy as np 
import math
import os 
import sys 
import random
import pandas as pd
from tqdm import tqdm

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from datasets.preprocess_utils import * 
from datasets.utils import * 

In [3]:
RAW_DATA_PATH = os.path.join(module_path,"data/SMR/raw")
PREPROCESS_DATA_PATH = os.path.join(module_path,"data/SMR/processed")
STATS_SUBJECTS_PATH = os.path.join(module_path, "data/SMR/stats/subjects")
if not os.path.exists(STATS_SUBJECTS_PATH):
    os.mkdir(STATS_SUBJECTS_PATH)
STATS_TRIALS_PATH = os.path.join(module_path, "data/SMR/stats/trials")
if not os.path.exists(STATS_TRIALS_PATH):
    os.mkdir(STATS_TRIALS_PATH)

In [None]:
RAW_DATA_PATH = os.path.join(module_path,"../SMR_2021_RAW_DATA")
PREPROCESS_DATA_PATH = os.path.join(module_path,"../processed")
if not os.path.exists(PREPROCESS_DATA_PATH):
    os.mkdir(PREPROCESS_DATA_PATH)
STATS_SUBJECTS_PATH = os.path.join(module_path, "../processed/subjects")
if not os.path.exists(STATS_SUBJECTS_PATH):
    os.mkdir(STATS_SUBJECTS_PATH)
STATS_TRIALS_PATH = os.path.join(module_path, "../processed/trials")
if not os.path.exists(STATS_TRIALS_PATH):
    os.mkdir(STATS_TRIALS_PATH)

In [7]:
subjects_eeg_data_dict = load_all_eeg_data(RAW_DATA_PATH , outdir=PREPROCESS_DATA_PATH) 

Subject S4 : Loading data of Session_3 from 1/1 sessions  

In [59]:
subjects_trial_info_dict = load_subjects_trials_stats(RAW_DATA_PATH, outdir=STATS_TRIALS_PATH)

In [5]:
subjects_info_dict = load_subjects_info(RAW_DATA_PATH, outdir=STATS_SUBJECTS_PATH)

In [26]:
get_subjects_stats(subjects_info_dict, filters="handsport")

{'Y': 0, 'N': 3}

In [34]:
def list_all_files(data_path, pattern="*.mat"):
    DATA_PATH = Path(data_path)
    mat_files = []

    for file in DATA_PATH.glob(pattern):
        mat_files.append(file)

    group_files = {}
    for f in mat_files:
        res = f.stem.split("_", 1)
        if len(res) > 1:
            if res[0] in group_files:
                group_files[res[0]][res[1]] = f
            else:
                group_files[res[0]] = {}
                group_files[res[0]][res[1]] = f
        else:
            group_files[res[0]] = f

    return group_files

In [35]:
list_all_files(STATS_SUBJECTS_PATH, pattern="*.pkl")

{'S1': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/subjects/S1.pkl'),
 'S3': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/subjects/S3.pkl'),
 'S4': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/subjects/S4.pkl')}

In [36]:
def load_dict_pkl(path_name):
    res_dict = {}
    group_files = list_all_files(path_name, pattern="*.pkl")
    for _, subject_value in group_files.items():
        if isinstance(subject_value, dict):
            for _, session_path in subject_value.items():
                with open(session_path, "rb") as f:
                    res_dict.update(pickle.load(f))
        else:
            with open(subject_value, "rb") as f:
                res_dict.update(pickle.load(f))
    return res_dict

In [39]:
load_dict_pkl(STATS_TRIALS_PATH).values()

AttributeError: 'dict_values' object has no attribute 'keys'

In [23]:
def load_dict_pkl(path_name):
    res_dict = {}
    group_files = list_all_files(path_name, pattern="*.pkl")
    for subject, subject_path in group_files.items():
        with open(subject_path, "rb") as f:
            res_dict.update(pickle.load(f))
    return res_dict

In [46]:
trial_info_dict = load_dict_pkl(STATS_TRIALS_PATH)
subjects_info_dict = load_dict_pkl(STATS_SUBJECTS_PATH)

In [50]:
get_subjects_stats(subjects_info_dict, filters="handedness")

{'R': 3, 'L': 0}

In [45]:
trial_info_dict["S1"]["Session_10"]

{'result': array([nan,  1., nan,  0.,  0., nan,  0.,  1., nan,  1.,  1., nan,  1.,
         1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0., nan,  1.,
         1.,  0.,  1.,  1., nan,  1.,  1.,  1.,  1., nan,  1., nan,  1.,
        nan,  0.,  1., nan,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  0.,  0., nan,  0.,  1., nan,  0., nan,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,
        nan,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        nan,  1.,  1., nan, nan,  1.,  1., nan,  1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1., nan,  1.,
         1.,  1., nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1., nan,  1., nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  1.,  1.,  1.,
         1.,  0., nan,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,
         0.,  0.,  1.,  1.,  1., nan, nan

In [56]:
def get_trial_stats(trial_info_dict):
    trial_stat_dict = {}
    for subject, sessions in trial_info_dict.items():
        trial_stat_dict[subject] = {}
        for session_id, values in sessions.items():
            trial_stat_dict[subject][session_id] = {}
            
            true_trials_idx = np.where(np.array(values["result"]) == True)[0]
            false_trials_idx = np.where(np.array(values["result"]) == False)[0]
            error_trials_idx = np.where(np.isnan(np.array(values["result"])))[0]

            trial_stat_dict[subject][session_id]["true"] = len(true_trials_idx)
            trial_stat_dict[subject][session_id]["false"] = len(false_trials_idx)
            trial_stat_dict[subject][session_id]["error"] = len(error_trials_idx)
            trial_stat_dict[subject][session_id]["mean_triallength"] = np.mean(np.array(values["triallength"]))
            trial_stat_dict[subject][session_id]["std_triallength"] = np.std(np.array(values["triallength"]))

    return trial_stat_dict

In [57]:
get_trial_stats(trial_info_dict)

{'S1': {'Session_10': {'true': 292,
   'false': 94,
   'error': 64,
   'mean_triallength': 3.147822222222222,
   'std_triallength': 1.7807756523354135},
  'Session_1': {'true': 214,
   'false': 122,
   'error': 114,
   'mean_triallength': 3.6125333333333334,
   'std_triallength': 1.9549220569856203}},
 'S3': {'Session_6': {'true': 178,
   'false': 155,
   'error': 117,
   'mean_triallength': 3.827377777777778,
   'std_triallength': 1.8168154714455595}},
 'S4': {'Session_3': {'true': 260,
   'false': 135,
   'error': 55,
   'mean_triallength': 2.5747555555555555,
   'std_triallength': 1.8266031759714905}}}

In [15]:
group_files = list_all_files(STATS_TRIALS_PATH, pattern="*.pkl")

['S1', 'Session_10']
['S1', 'Session_1']
['S4', 'Session_3']
['S3', 'Session_6']


In [16]:
group_files

{'S1': {'Session_10': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/trials/S1_Session_10.pkl'),
  'Session_1': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/trials/S1_Session_1.pkl')},
 'S4': {'Session_3': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/trials/S4_Session_3.pkl')},
 'S3': {'Session_6': PosixPath('/media/alioo/database/02_MA/MA/code/bbcpy_AutoML/data/SMR/stats/trials/S3_Session_6.pkl')}}

In [None]:
get_size(subjects_dict) / 1000000

In [5]:
get_size(subjects_trial_info_dict) / 1000000

0.076112

In [8]:
get_size(subjects_trial_info_dict) / 1000000

0.570675

In [7]:
subjects_trial_info_dict["S1"]["Session_1"]["trial_info"].keys()

dict_keys(['tasknumber', 'runnumber', 'trialnumber', 'targetnumber', 'triallength', 'targethitnumber', 'resultind', 'result', 'forcedresult', 'artifact'])

In [None]:
source = ColumnDataSource(data=data)

p = figure(x_range=list(data.keys()), y_range=(0, 18086), height=250,
           title="subject gender ", toolbar_location=None, tools="")

p.vbar(x=dodge('class_name', -0.25, range=p.x_range), top='test', width=0.2, source=source,
       color="#c9d9d3", legend_label="test")

p.vbar(x=dodge('class_name', 0.0, range=p.x_range), top='vali', width=0.2, source=source,
       color="#718dbf", legend_label="vali")

p.vbar(x=dodge('class_name', 0.25, range=p.x_range), top='train', width=0.2, source=source,
       color="#e84d60", legend_label="train")

show(p)

num_class = pd.DataFrame.from_dict(num_class)
num_class["total"] = num_class.sum(axis=1)

In [None]:

trial_dict = {}
for subject, sessions in sessions_dict.items():
    trial_dict[subject] = {}
    for session_id, data in sessions.items():
        trial_dict[subject][session_id] = {}
        true_trials_idx = np.where(np.array(data["trial_info"]["result"]) == True)[0]
        false_trials_idx = np.where(np.array(data["trial_info"]["result"]) == False)[0]
        error_trials_idx = np.where(np.isnan(np.array(data["trial_info"]["result"])))[0]
        trial_dict[subject][session_id]["true"] = true_trials_idx
        trial_dict[subject][session_id]["false"] = false_trials_idx
        trial_dict[subject][session_id]["error"] = error_trials_idx

In [None]:
def get_trial_stats(session_dict):
    trial_stat_dict = {}
    true_trials_idx = np.where(np.array(session_dict["trial_info"]["result"]) == True)[0]
    false_trials_idx = np.where(np.array(session_dict["trial_info"]["result"]) == False)[0]
    error_trials_idx = np.where(np.isnan(np.array(session_dict["trial_info"]["result"])))[0]
    trial_stat_dict["true"] = len(true_trials_idx)
    trial_stat_dict["false"] = len(false_trials_idx)
    trial_stat_dict["error"] = len(error_trials_idx)
    trial_stat_dict["mean_triallength"] = np.mean(np.array(session_dict["trial_info"]["triallength"]))
    trial_stat_dict["std_triallength"] = np.std(np.array(session_dict["trial_info"]["triallength"]))
    return trial_stat_dict

In [None]:
trial_dict

In [None]:
source = ColumnDataSource(data=data)

p = figure(x_range=list(class_names_dict.keys()), y_range=(0, 18086), height=250,
           title="Classes counts by Train/Dev/Test split ", toolbar_location=None, tools="")

p.vbar(x=dodge('class_name', -0.25, range=p.x_range), top='test', width=0.2, source=source,
       color="#c9d9d3", legend_label="test")

p.vbar(x=dodge('class_name', 0.0, range=p.x_range), top='vali', width=0.2, source=source,
       color="#718dbf", legend_label="vali")

p.vbar(x=dodge('class_name', 0.25, range=p.x_range), top='train', width=0.2, source=source,
       color="#e84d60", legend_label="train")

show(p)

num_class = pd.DataFrame.from_dict(num_class)
num_class["total"] = num_class.sum(axis=1)

## Prepare data  

In [None]:
clab

In [None]:
trial_info

In [None]:
trial_info["result"]

In [None]:
trial_info["targethitnumber"]

In [None]:
def 

In [None]:
trial_results = list(zip(trial_info["targetnumber"], trial_info["targethitnumber"],trial_info["result"]))
trial_results

In [None]:
true_trials_idx = np.where(np.array(trial_info["result"]) == True)[0]
false_trials_idx = np.where(np.array(trial_info["result"]) == False)[0]
error_trials_idx = np.where(np.isnan(np.array(trial_info["result"])))[0]

In [None]:
trial_len = []
for t in timepoints[true_trials_idx]:
    trial_len.append(t[-1][-1])
    
kwargs = dict(alpha=0.5, bins=100, density=False, stacked=True)
plt.hist(trial_len, **kwargs)
plt.title("Duration Distribution of successful trials")
plt.show()

trial_len = []
for t in timepoints[false_trials_idx]:
    trial_len.append(t[-1][-1])
    
kwargs = dict(alpha=0.5, bins=100, density=False, stacked=True)
plt.hist(trial_len, **kwargs)
plt.title("Duration Distribution of failed trials")
plt.show()

trial_len = []
for t in timepoints[error_trials_idx]:
    trial_len.append(t[-1][-1])
    
kwargs = dict(alpha=0.5, bins=100, density=False, stacked=True)
plt.hist(trial_len, **kwargs)
plt.title("Duration Distribution of error trials")
plt.show()

In [None]:
np.array(trial_info["triallength"])[true_trials_idx]

In [None]:
[(f.shape,l) for f,l in zip(data[true_trials_idx][0], clab)]

In [None]:
trials = [data[true_trials_idx][idx][chan] for idx in id_rand]
len(trials)

In [None]:
id_rand = [random.randint(0, len(true_trials_idx)) - 1 for d in range(20)]
c3 = np.where(clab=='C3')[0][0]
c4 = np.where(clab=='C4')[0][0]
i,j=0,0
PLOTS_PER_ROW = 5
PLOTS_PER_COL = math.ceil(20/PLOTS_PER_ROW)
fig, axs = plt.subplots(PLOTS_PER_COL,PLOTS_PER_ROW, figsize=(20,10), constrained_layout=True)
t_c3= [data[true_trials_idx][idx][c3] for idx in id_rand]
t_c4= [data[true_trials_idx][idx][c4] for idx in id_rand]
for c3,c4 in zip(t_c3,t_c4):
    axs[i][j].plot(c3,label="C3")
    axs[i][j].plot(c4,label="C4")
    j+=1
    if j%PLOTS_PER_ROW==0:
        i+=1
        j=0
plt.legend()
plt.show()

In [None]:
id_rand = [random.randint(0, len(true_trials_idx)) - 1 for d in range(20)]
i,j=0,0
PLOTS_PER_ROW = 5
PLOTS_PER_COL = math.ceil(62/PLOTS_PER_ROW)
fig, axs = plt.subplots(PLOTS_PER_COL,PLOTS_PER_ROW, figsize=(20,20), constrained_layout=True)
trial = data[true_trials_idx][0] 
for t_chan,l_chan in zip(trial, clab):
    idx = id_rand[i]
    axs[i][j].plot(t_chan)
    axs[i][j].set_title(l_chan)
    j+=1
    if j%PLOTS_PER_ROW==0:
        i+=1
        j=0

plt.show()

In [None]:
# plot successful  trial 
trial = data[true_trials_idx[0]]
plot_3dSurface_and_heatmap(trial,clab)

In [None]:
# plot failed trial 
trial = data[false_trials_idx[0]]
plot_3dSurface_and_heatmap(trial,clab)

In [None]:
# plot error trial 
trial = data[error_trials_idx[0]]
plot_3dSurface_and_heatmap(trial,clab)

In [None]:
import os 
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from

In [None]:
import numpy as np
import scipy as sp
from matplotlib import pyplot as plt

def map(eegdata, v=None, clim='minmax', cb_label=''):
    '''
    Usage:
        scalpmap(mnt, v, clim='minmax', cb_label='')
    Parameters:
        mnt: a 2D array of channel coordinates (channels x 2)
        v:   a 1D vector (channels)
        clim: limits of color code, either
          'minmax' to use the minimum and maximum of the data
          'sym' to make limits symmetrical around zero, or
          a two element vector giving specific values
        cb_label: label for the colorbar
    '''
    mnt = eegdata.chans.mnt
    # interpolate between channels
    xi, yi = np.linspace(-1, 1, 100), np.linspace(-1, 1, 100)
    xi, yi = np.meshgrid(xi, yi)
    rbf = sp.interpolate.Rbf(mnt[:, 0], mnt[:, 1], v, function='linear')
    zi = rbf(xi, yi)

    # mask area outside of the scalp
    a, b, n, r = 50, 50, 100, 50
    mask_y, mask_x = np.ogrid[-a:n - a, -b:n - b]
    mask = mask_x * mask_x + mask_y * mask_y >= r * r
    zi[mask] = np.nan

    if clim == 'minmax':
        vmin = v.min()
        vmax = v.max()
    elif clim == 'sym':
        vmin = -np.absolute(v).max()
        vmax = np.absolute(v).max()
    else:
        vmin = clim[0]
        vmax = clim[1]

    plt.imshow(zi, vmin=vmin, vmax=vmax, origin='lower', extent=[-1, 1, -1, 1], cmap='jet')
    plt.colorbar(shrink=.5, label=cb_label)
    plt.scatter(mnt[:, 0], mnt[:, 1], c='k', marker='+', vmin=vmin, vmax=vmax)
    plt.axis('off')

In [None]:
trial = data[true_trials_idx[0]]
map(trial)

### Splitt trails in small snippets 
### Diffrent sampling rate 