# Process data

In this notebook, we divide the raw times series into times series for each condition.

In [3]:
# import necessary libraries
import pickle as pk
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import mne
import numpy as np
import pandas as pd
import seaborn as sb
from mne import io
from rich import inspect, pretty, print
from rich.progress import track
from seaborn import heatmap
from sklearn.metrics.pairwise import pairwise_distances
from tda import windowing
from tqdm import tqdm

pretty.install()

out_dir = "../data/preprocessing/"
data_dir = "../data/"
Path(out_dir).mkdir(parents=True, exist_ok=True)
Path(data_dir).mkdir(parents=True, exist_ok=True)

## Load data and divide in one window per condition

In [4]:
from pathlib import Path

p = Path()
files_lows = sorted(list(p.glob(f"{data_dir}raw/ICApruned_lows/*.set")))
files_highs = sorted(list(p.glob(f"{data_dir}raw/ICApruned_highs/*.set")))

In [9]:
n_windows = 6  # one per condition
# windows of 1 sec
# 2160, 0.1 sec
# 6, 3 min

In [10]:
sixcondition_data = dict()

# ignore runtime warnings
with warnings.catch_warnings():
    warnings.simplefilter(action="ignore", category=RuntimeWarning)
    # loop through each file in the list of EEG files
    for file_idx in track(files_highs + files_lows):
        # extract information from the filename
        filename = file_idx.name.split(".")[0]
        subject = filename.split("_")[1]

        if file_idx in files_lows:
            group = "low"
        else:
            group = "high"

        # load the EEG data using mne
        raw_EEG = mne.io.read_raw_eeglab(file_idx, preload=True, verbose=False)
        raw_data = pd.DataFrame(raw_EEG.get_data().T, columns=raw_EEG.ch_names)
        # print(raw_EEG.annotations)
        series = raw_EEG.get_data()

        mini_serie = windowing(series, n_windows=n_windows)  # one window per condition
        chs = raw_EEG.ch_names

        for id in range(n_windows):
            key = f"{subject}_{id}"
            sixcondition_data[key] = dict()

            sixcondition_data[key]["file"] = filename
            sixcondition_data[key]["group"] = group
            sixcondition_data[key]["subject"] = subject
            if id == 0:
                sixcondition_data[key]["condition"] = "ROE"
            if id == 1:
                sixcondition_data[key]["condition"] = "RCE"
            if id == 2:
                sixcondition_data[key]["condition"] = "IND1"
            if id == 3:
                sixcondition_data[key]["condition"] = "IND2"
            if id == 4:
                sixcondition_data[key]["condition"] = "NH"
            if id == 5:
                sixcondition_data[key]["condition"] = "POST"

            sixcondition_data[key]["series"] = mini_serie[id]
            sixcondition_data[key]["channels"] = chs

sixcondition_data_df = pd.DataFrame.from_dict(sixcondition_data, orient="index")

In [11]:
sixcondition_data_df

Unnamed: 0,file,group,subject,condition,series,channels
01_0,subject_01,high,01,ROE,"[[1.0079775810241698e-05, 2.074808597564697e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
01_1,subject_01,high,01,RCE,"[[3.207792043685913e-06, 3.530672192573547e-07...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
01_2,subject_01,high,01,IND1,"[[-4.318517208099365e-06, -6.90339183807373e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
01_3,subject_01,high,01,IND2,"[[1.0585275888442992e-06, 4.3018770217895506e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
01_4,subject_01,high,01,NH,"[[-4.6376829147338864e-06, -6.676363945007324e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
...,...,...,...,...,...,...
31_1,subject_31,low,31,RCE,"[[3.0984611511230466e-06, -4.248315393924713e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
31_2,subject_31,low,31,IND1,"[[8.021036148071288e-06, 6.956083297729492e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
31_3,subject_31,low,31,IND2,"[[7.2614860534667964e-06, 6.4224123954772945e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
31_4,subject_31,low,31,NH,"[[4.52498197555542e-08, 3.130119144916534e-07,...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."


### Save to file

In [12]:
sixcondition_data_df.to_csv(f"{out_dir}EEG_per_condition_df.csv", index=False)
pk.dump(sixcondition_data, open(f"{out_dir}EEG_per_condition_dict.pkl", "wb"))

## Other format with more than one window per condition

In [13]:
n_cond = 6
n_windows = n_cond * 24  # that's 18 per condition, there are 6 conditions

# 1080, 1 sec
# 216, 5 sec -> 36 points per condition
# 72, 15 sec -> 12 points per condition
# 36, 30 sec
# 18, 1 min
# 6, 3 min

In [14]:
EEG_raw_dict = pk.load(open(f"{data_dir}preprocessing/EEG_raw_dict.pkl", "rb"))

In [47]:
cropped_data = dict()

for key_tmp in track(EEG_raw_dict):
    key = key_tmp

    cropped_data[key] = dict()

    subject = EEG_raw_dict[key_tmp]["subject"]
    cropped_data[key]["name"] = subject

    group = EEG_raw_dict[key_tmp]["group"]
    cropped_data[key]["group"] = group

    serie = EEG_raw_dict[key_tmp]["series"]
    mini_serie = windowing(serie, n_windows=n_windows)

    chs = EEG_raw_dict[key_tmp]["channels"]

    for id in range(n_windows):
        cropped_data[key]["window " + str(id)] = mini_serie[
            id
        ]  # windows no. are not reset at each condition
        cropped_data[key]["channels " + str(id)] = chs

cropped_df = pd.DataFrame.from_dict(cropped_data, orient="index")

In [48]:
cropped_df

Unnamed: 0,name,group,window 0,channels 0,window 1,channels 1,window 2,channels 2,window 3,channels 3,...,window 139,channels 139,window 140,channels 140,window 141,channels 141,window 142,channels 142,window 143,channels 143
1,1,high,"[[1.0079775810241698e-05, 2.074808597564697e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.3241842985153198e-06, 3.6938107013702392e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.752930164337158e-06, -5.468021869659423e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.852786660194397e-06, -2.630286514759064e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-7.747294902801513e-06, -6.766506671905517e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[5.545677661895752e-06, 6.070427417755127e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.31473159790039e-06, -8.36799430847168e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-8.531312942504882e-07, -7.097363471984863e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.4296892881393432e-06, -3.62955904006958e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
2,2,high,"[[-5.6332302093505856e-06, -1.6634254455566406...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.2639561653137207e-05, -9.31490135192871e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.1723330497741699e-05, -1.1805062294006346...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.2863028049468994e-06, 3.901846170425415e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-1.0808560371398925e-05, -1.6357721328735352...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[6.191649436950683e-06, 1.489131736755371e-05...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.44160509109497e-06, -9.230107307434082e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-8.020853996276855e-07, 1.2306674003601073e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[8.705867767333985e-06, 2.9634950160980223e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
3,3,high,"[[-1.7440322041511534e-07, -5.314565658569335e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[4.903311252593994e-06, 2.921401262283325e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.0305203437805175e-05, 7.177838325500488e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[5.722846508026123e-06, 4.9653825759887694e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-4.075170993804932e-06, -5.052158832550048e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.5536935329437256e-06, -3.529302358627319e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.123786211013794e-06, 9.227144241333008e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.5367966890335082e-06, -3.538686633110046e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.6394767761230467e-06, 1.4854261875152586e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
4,4,high,"[[4.982749938964844e-06, 1.2489630699157714e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.8586821556091306e-06, 2.564746856689453e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.8552670478820798e-06, 1.1212499141693115e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-5.468360424041748e-06, -4.633989810943603e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-3.1404192447662353e-06, 3.082125425338745e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.3410489082336425e-05, -1.7722192764282225...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.3577830791473387e-06, 1.7371333837509154e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[8.379056930541992e-06, 7.834229946136474e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[7.441905140876769e-07, 1.9905624389648438e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
5,5,high,"[[-4.6205735206604e-06, -6.309326648712158e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.6692761182785034e-06, 4.512409687042236e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.3297271728515625e-06, 1.373063325881958e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[9.637398719787598e-07, -3.3380502462387084e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-8.589941024780273e-06, -8.550137519836425e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-3.1671690940856933e-06, -3.299576759338379e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-9.277089834213256e-07, -2.0248174667358397e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-3.0536788702011105e-07, -1.0054768323898314...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-3.226569652557373e-06, -1.8805655241012572e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
6,6,high,"[[1.0261098146438598e-06, 5.474274635314941e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.011466324329376e-07, 1.1779037714004516e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[5.952425003051757e-06, 2.2660846710205077e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.6540329456329345e-06, 1.4942810535430907e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-2.7324655652046203e-07, -2.4974264204502103...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[4.249783515930176e-06, 4.308763980865478e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[3.6241965293884277e-06, 5.110436677932739e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.7010819911956784e-06, 9.88293707370758e-07...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-5.034029483795166e-06, -4.364974498748779e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
7,7,high,"[[-1.3466087579727172e-06, -7.661070823669434e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.318782567977905e-06, -3.1857087612152096e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.3608199357986449e-06, 1.1454241722822188e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.3155006408691406e-05, 2.4853385925292968e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-4.010418891906738e-06, -3.229462385177612e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[3.7724411487579342e-06, 9.026182293891907e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[4.6830028295516967e-07, 3.031900405883789e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.1302752494812012e-06, -3.427347183227539e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[2.9987516403198243e-06, 1.4897261559963225e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
8,8,high,"[[1.0785425186157227e-05, 6.277435302734375e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.4604690074920655e-06, -1.7324730157852173...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.524005889892578e-06, -6.3777918815612794e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.1167325973510742e-07, 5.115290284156799e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-2.364850044250488e-06, -1.2851983308792114e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.8227086067199706e-06, 2.3999476432800293e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.6929292678833e-07, -2.151191234588623e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.3690019845962525e-06, 2.4886655807495116e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.8364074230194092e-06, -1.0061696767807006...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
9,9,high,"[[2.7631943225860596e-06, 4.006883144378662e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.670936793088913e-07, 1.0217688083648682e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.3503886461257934e-06, -2.471467712894082e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-9.695415496826172e-06, -8.90003776550293e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[1.1658614873886108e-06, 2.8236633539199827e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-6.037352561950684e-06, -5.061247825622558e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-5.186368465423584e-06, -8.125234603881836e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[5.143777847290039e-06, 3.935801029205322e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.1731331795454025e-07, -1.4248335361480713e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."
10,10,high,"[[-5.56261920928955e-06, -1.734308624267578e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.0917755126953125e-05, 1.0570602416992188e-...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[1.5651550292968748e-05, 7.720638751983642e-0...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[7.94191026687622e-06, 2.0900239944458005e-06...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...",...,"[[-6.0591597557067865e-06, -8.917227745056152e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[3.832475280761719e-05, 4.370281982421875e-05...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-2.000552749633789e-05, -1.1940963745117188e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-1.565192222595215e-05, -1.4536626815795897e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P...","[[-7.214339733123779e-06, -1.9010976791381835e...","[F7, F3, FZ, F4, F8, C3, C4, T7, CZ, T8, P3, P..."


In [51]:
sh = mini_serie[0].shape
print(f"{sh[1]} points per window, ie {sh[1] / 125} sec")
print(f"{n_windows} windows, {n_windows / 6} windows per condition")

In [52]:
# save to a CSV file
file_name = f"EEG_{int(n_windows/n_cond)}wind_per_condition"
print(file_name)

cropped_df.to_csv(f"{out_dir}{file_name}_df.csv", index=False)
pk.dump(cropped_data, open(f"{out_dir}{file_name}_dict.pkl", "wb"))

## Compute correlations

In [19]:
# empty dictionary to store EEG data
sixcondition_corr = dict()

# loop through each file in the list of EEG files
for key in track(sixcondition_data):
    # extract information from the dictionary
    subject = sixcondition_data[key]["subject"]
    group = sixcondition_data[key]["group"]
    serie = sixcondition_data[key]["series"]
    condition = sixcondition_data[key]["condition"]
    ch = sixcondition_data[key]["channels"]
    raw_data = pd.DataFrame(serie.T, columns=ch)
    corr = raw_data.corr()

    if group not in sixcondition_corr:
        sixcondition_corr[group] = dict()
    if subject not in sixcondition_corr[group]:
        sixcondition_corr[group][subject] = dict()
    if condition not in sixcondition_corr[group][subject]:
        sixcondition_corr[group][subject][condition] = dict()

    sixcondition_corr[group][subject][condition] = corr

In [20]:
# save to file
pk.dump(sixcondition_corr, open(f"{data_dir}EEG_per_condition_corr.pkl", "wb"))