In [2]:
# Setup
from mne.time_frequency import psd_multitaper
import mne
import os.path as op
import numpy as np
import pandas as pd
import pickle
import ast
import copy

ba_list = {'LF': ['Fp1', 'F3', 'F7', 'AF3', 'F1', 'F5', 'FT7'],
 'LC': ['C3', 'T7', 'FC1', 'FC3', 'FC5', 'C1', 'C5'],
 'LP': ['P3', 'P7', 'CP1', 'CP3', 'CP5', 'TP7', 'P1', 'P5'],
 'LO': ['O1', 'PO3'],
 'RF': ['Fp2', 'F4', 'F8', 'AF4', 'F2', 'F6', 'FT8'],
 'RC': ['C4', 'T8', 'FC2', 'FC4', 'FC6', 'C2', 'C6'],
 'RP': ['P4', 'P8', 'CP2', 'CP4', 'CP6', 'TP8', 'P2', 'P6'],
 'RO': ['O2', 'PO4'],
 'FZ': ['Fpz', 'Fz'],
 'CZ': ['Cz', 'CPz'],
 'PZ': ['Pz', 'POz'],
 'OZ': ['Oz', 'Iz'],
 'all': ['Fp1',
 'Fp2',
 'F3',
 'F4',
 'C3',
 'C4',
 'P3',
 'P4',
 'O1',
 'O2',
 'F7',
 'F8',
 'T7',
 'T8',
 'P7',
 'P8',
 'Fpz',
 'Fz',
 'Cz',
 'CPz',
 'Pz',
 'POz',
 'Oz',
 'Iz',
 'AF3',
 'AF4',
 'F1',
 'F2',
 'F5',
 'F6',
 'FC1',
 'FC2',
 'FC3',
 'FC4',
 'FC5',
 'FC6',
 'FT7',
 'FT8',
 'C1',
 'C2',
 'C5',
 'C6',
 'CP1',
 'CP2',
 'CP3',
 'CP4',
 'CP5',
 'CP6',
 'TP7',
 'TP8',
 'P1',
 'P2',
 'P5',
 'P6',
 'PO3',
 'PO4']}

frequencies = dict(
  delta=(0, 2), theta=(2, 4),
  alpha=(4, 6), beta=(6, 15),
  gamma=(15, 20), broadband=(0,20)) # these are the indices of the frequency array not the frequency ranges


data_dir = '/Users/yeganeh/Codes/otka-preprocessing/data/Main-study/derivatives/mne-bids-pipeline'
subjects = ['01','02','03','22']
tasks = ['baseline1', 'baseline2',
         'induction1', 'induction2', 'induction3', 'induction4',
         'experience1', 'experience2', 'experience3', 'experience4']

In [None]:
psd_unaggregated = {}
freq_dict = {}
psd_total = pd.DataFrame()

for n_sub in subjects:
  psd_aggregated = {}
  for task in tasks:
    epoch_name = f'sub-{n_sub}_ses-01_task-{task}_proc-clean_epo.fif'
    dir = op.join(data_dir, f'sub-{n_sub}/ses-01/eeg/{epoch_name}')
    # open clean epochs
    epoch = mne.read_epochs(dir)

    # calculate psd for broadbands and all channels
    psds, freqs = psd_multitaper(epoch, fmin=1, fmax=40, picks='all', n_jobs=-2)
    psd_unaggregated[f'{n_sub}-{task}'] = psds
    # freq_dict[f'{n_sub}-{task}'] = freqs
    # transform
    psd_transformed = 10. * np.log10(psds)
    # aggregate over the epoch dimention
    psd_transformed = psd_transformed.mean(0)
    # calculate psds for different frequency bands across different brain areas
    ch_nam = epoch.ch_names
    bads = epoch.info['bads']
    [ch_nam.remove(i) for i in bads]
    for key in ba_list.keys():
        channels = ba_list[key]
        [channels.remove(i) for i in bads if i in channels] # remove bad channels from the brain list
        temp1 = [psd_transformed[ch_nam.index(i)] for i in channels] # sift psd of relevant channels out
        # aggregate over different frequency bands
        for k,v in frequencies.items():
            temp2 = [temp1[i][v[0]:v[1]] for i in range(len(temp1))] 
            temp3 = np.array(temp2)
            psd_aggregated[f'{key}-{k}'] = temp3.mean(0).mean(0)

    psd_df = pd.DataFrame(psd_aggregated, index=[f'{n_sub}-{task}'])
    psd_total = psd_total.append(psd_df)
          

In [284]:
# concate two dataframe and two dictionaries
# psds_aggregated_total = pd.concat([psd_total,psd_total_first])
# psds_unaggregated_total = psd_unaggregated | psd_unaggregated_first

# save both aggragated and unaggragated data
with open('psd_unaggragated_ch2.pkl', 'wb') as handle:
    pickle.dump(psds_unaggregated_total,handle)

with open('freq_unaggragated_ch2.pkl', 'wb') as handle:
    pickle.dump(freq_dict,handle)

dir = '/Users/yeganeh/Codes/otka-preprocessing/docs'
fname = op.join(dir, 'psd_total_ch2.csv')
psds_aggregated_total.to_csv(fname)

In [7]:
# reanalyze the data for the first chunck (part1)
# open necessary files
# psd
with open('psd_total_unaggregated.pkl','rb') as handle:
    psds_ch1= pickle.load(handle)

# ids map (for bad channels)
fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/ids_map.xlsx'
ids_map = pd.read_excel(fname, header=1)
ids_map = ids_map[['bids_id','bad_channels']]
ids_map.set_index('bids_id',inplace=True)
ids_map = ids_map.loc[4:25]

# we only need all-broadbands:
keys = list(psds_ch1.keys())
psds_ch1_all = {keys[i]:psds_ch1[keys[i]] for i in range(len(keys)) if 'all-broadband' in keys[i]}



In [43]:
# reanalyze the data for the first chunck (part2)
psd_unaggregated = {}
freq_dict = {}
psd_total = pd.DataFrame()
subjects = ['04','05','06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '23', '24', '25']

#open a bids file and take its channel names
epoch_name = f'sub-01_ses-01_task-baseline1_proc-clean_epo.fif'
dir = op.join(data_dir, f'sub-01/ses-01/eeg/{epoch_name}')
epoch = mne.read_epochs(dir)
ch_nam_orig = epoch.ch_names

for n_sub in subjects:
  psd_aggregated = {}
  ch_nam = copy.deepcopy(ch_nam_orig)
  bads = ids_map.loc[int(n_sub),'bad_channels']
  bads = ast.literal_eval(bads)
  [ch_nam.remove(i) for i in bads]
  for task in tasks:
    psds = psds_ch1_all[f'{n_sub}-{task}-all-broadband'][0]
    # transform
    psd_transformed = 10. * np.log10(psds)
    # aggregate over the epoch dimention
    psd_transformed = psd_transformed.mean(0)
    # calculate psds for different frequency bands across different brain areas
    for key in ba_list.keys():
        channels = ba_list[key]
        [channels.remove(i) for i in bads if i in channels] # remove bad channels from the brain list
        temp1 = [psd_transformed[ch_nam.index(i)] for i in channels] # sift psd of relevant channels out
        # aggregate over different frequency bands
        for k,v in frequencies.items():
            temp2 = [temp1[i][v[0]:v[1]] for i in range(len(temp1))] 
            temp3 = np.array(temp2)
            psd_aggregated[f'{key}-{k}'] = temp3.mean(0).mean(0)

    psd_df = pd.DataFrame(psd_aggregated, index=[f'{n_sub}-{task}'])
    psd_total = psd_total.append(psd_df)

fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/psd_total_ch1.csv'
psd_total.to_csv(fname)

Reading /Users/yeganeh/Codes/otka-preprocessing/data/Main-study/derivatives/mne-bids-pipeline/sub-01/ses-01/eeg/sub-01_ses-01_task-baseline1_proc-clean_epo.fif ...
Isotrak not found
    Found the data of interest:
        t =       0.00 ...     500.00 ms
        0 CTF compensation matrices available
Adding metadata with 2 columns
Replacing existing metadata with 2 columns
534 matching events found
No baseline correction applied
0 projection items activated


In [None]:
# concat all data into one 
# open the second chunck
fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/psd_total_ch2.csv'
psd_ch2 = pd.read_csv(fname, index_col='Unnamed: 0')
temp1 = psd_ch2.loc['01-baseline1':'03-experience4']
temp2 = psd_total.loc['04-baseline1':'21-experience4']
temp3 = psd_ch2.loc['22-baseline1':'22-experience4']
temp4 = psd_total.loc['23-baseline1':'25-experience4']
temp5 = psd_ch2.loc['26-baseline1':'50-experience4']
psd_kol = pd.concat([temp1,temp2,temp3,temp4,temp5])

# save
fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/psd_total_all.csv'
psd_kol.to_csv(fname)

In [4]:
fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/psd_total_all.csv'
psd_kol = pd.read_csv(fname, index_col='Unnamed: 0')
psd_kol.head()

Unnamed: 0,LF-delta,LF-theta,LF-alpha,LF-beta,LF-gamma,LF-broadband,LC-delta,LC-theta,LC-alpha,LC-beta,...,OZ-alpha,OZ-beta,OZ-gamma,OZ-broadband,all-delta,all-theta,all-alpha,all-beta,all-gamma,all-broadband
01-baseline1,-85.88893,-86.360071,-88.318054,-91.059635,-92.36899,-90.125789,-86.976865,-87.024191,-87.761523,-89.070576,...,-91.260226,-95.580217,-99.464436,-95.114853,-86.425469,-86.218668,-87.470322,-90.958777,-93.706882,-90.369616
01-baseline2,-85.629249,-85.630366,-87.109018,-89.621359,-90.989416,-88.913829,-86.523887,-86.166771,-86.806887,-88.438584,...,-89.494268,-92.433987,-94.90619,-92.161567,-86.125577,-85.516238,-86.544726,-90.030033,-92.990935,-89.579903
01-induction1,-86.057693,-86.440278,-88.128399,-89.850513,-90.405692,-89.096791,-87.152303,-87.118829,-87.687305,-88.310231,...,-91.827773,-96.481848,-101.026757,-96.009599,-86.76288,-86.450303,-87.568596,-90.316335,-92.550534,-89.858162
01-induction2,-86.40078,-86.960309,-89.175798,-93.764504,-97.174768,-92.741408,-86.981856,-87.472497,-89.303543,-93.697076,...,-91.396282,-95.448264,-99.34494,-95.008926,-86.779248,-86.738926,-88.330787,-93.129393,-97.949408,-92.580475
01-induction3,-85.75157,-86.062471,-88.094745,-93.030126,-96.814364,-92.058027,-86.491307,-86.459067,-87.450949,-91.108375,...,-90.253606,-94.018598,-97.29007,-93.62254,-86.102442,-85.776875,-87.119594,-92.238595,-97.169881,-91.699729


In [5]:
# reform psd_kol
# initiate an empty dataframe
psd_reformed = pd.DataFrame()

# put each data point in the corresponding place
for ind in psd_kol.index:
    for column in psd_kol.columns:
        if ind[3:-1] == 'baseline':
            continue
        else:
            psd_reformed.loc[ind[:2]+'-'+'trial'+ind[-1], column+'-'+ind[3:-1]] = psd_kol.loc[ind, column]

In [6]:
# add basline
for ind in psd_kol.index:
    for column in psd_kol.columns:
        if 'baseline' in ind:
            a = psd_kol.loc[ind, column]
            psd_reformed.loc[ind[:2]+'-'+'trial1':ind[:2]+'-'+'trial4', column+'-'+ind[3:]] = list(np.repeat(a,4))

  self.obj[key] = infer_fill_value(value)


In [17]:
# add behavioral measures
data_dir = '/Users/yeganeh/Codes/otka-preprocessing/docs'
fname = op.join(data_dir, 'data_with_hypnotizability.xlsx')

data = pd.read_excel(fname, header=1,index_col='index')
# data.set_index('index', inplace=True)

# reform to long format
a = data.index
index = [x for pair in zip(a,a,a,a) for x in pair]
columns = []
for i in range(len(data.columns.tolist())):
  if data.columns.tolist()[i][-1] == '1' and data.columns.tolist()[i][0:4] != 'time' and data.columns.tolist()[i][0:3] != 'tsz':
    columns.append(data.columns.tolist()[i])
columns = [x.replace('_1','') for x in columns]

df_ = pd.DataFrame(index=index, columns=columns)
df_ = df_.fillna(0)

# 
temp = []
sub_ids = data.index.tolist()
for i in sub_ids:
  for j in range(len(columns)):
    for z in range(1,5):
      temp.append(data.loc[i,f'{columns[j]}_{z}'])
    df_.loc[i,columns[j]] = temp
    temp = []

# add other variables to this new dataset
numeric_col = ['attitude_towards_hypnosis', 'effective_of_hypnoanalgesia', 'knowledge_level_on_hypnosis',
               'motivation_to_get_hypnotized', 'hypnotizability_total', 'datetime']
df_2 = data[numeric_col]
for i in range(len(df_2.columns.tolist())):
  df_[df_2.columns.tolist()[i]] = df_2[df_2.columns.tolist()[i]].repeat(4)
# df_.procedure_type = df_.procedure_type.astype('category').cat.codes
# df_.description_type = pd.get_dummies(df_.description_type).values.astype('float64')
# df_.trial_type = df_.trial_type.astype('category').cat.codes
# df_.drop(columns='experiences_freetext', inplace=True)
# df_.head()

In [18]:
df_

Unnamed: 0,description_type,expectancy,experiences_freetext,hypnosis_depth,procedure_type,trial_type,attitude_towards_hypnosis,effective_of_hypnoanalgesia,knowledge_level_on_hypnosis,motivation_to_get_hypnotized,hypnotizability_total,datetime
2122611,control,0,"nagyon ellazult voltam, az erzes hasonlitott a...",4,embedded,sham,3,8,6,8,3.0,2021-02-26 12:01:16
2122611,hypnosis,10,ugy ereztem hogy teljesen ellazult vagyok. az ...,6,confusion,True,3,8,6,8,3.0,2021-02-26 12:01:16
2122611,control,1,"ellazult voltam, relaxalt. az izmaim kozul csa...",6,relaxation,True,3,8,6,8,3.0,2021-02-26 12:01:16
2122611,hypnosis,9,"a feher zaj felduhitett, akkor ficeregtem is. ...",3,whitenoise,sham,3,8,6,8,3.0,2021-02-26 12:01:16
2122614,control,2,"Nem igazan tudtam, hogy mit kellene tennem, cs...",0,whitenoise,sham,4,9,6,8,5.0,2021-02-26 15:45:58
...,...,...,...,...,...,...,...,...,...,...,...,...
216814,hypnosis,5,"Semmi különös nem volt.\r\nÚgy érzem, nem tudo...",0,confusion,True,4,5,8,8,1.0,2021-06-08 15:16:08
2161011,control,3,"kényelmetlen a pozíció, fekve biztosan hatásos...",0,relaxation,True,5,10,8,10,5.0,2021-06-10 12:08:37
2161011,hypnosis,8,"lehet, hogy elkerülte a figyelmem, de én nem h...",0,embedded,sham,5,10,8,10,5.0,2021-06-10 12:08:37
2161011,hypnosis,7,na végre! ez volt eddig a leghatásosabb. a be...,7,confusion,True,5,10,8,10,5.0,2021-06-10 12:08:37


In [None]:
# open ids map
fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/ids_map.xlsx'
ids_map = pd.read_excel(fname, header=1, index_col='bids_id')
ids_map = ids_map[['behavioral_id', 'datetime']]
ids_map = ids_map.loc[:50]
ids_map.drop_duplicates(inplace=True)

a = ids_map.loc[:,'behavioral_id']
list0 = [x for pair in zip(a,a,a,a) for x in pair]
psd_reformed['index'] = list0
psd_reformed.set_index('index', inplace=True)

# drop
a = list(ids_map.keys())
ids_map.reset_index(inplace=True)
ids_map.set_index('behavioral_id', inplace=True)
a = list(ids_map.index)
df_new = df_.loc[a]

# # I sort the index in bothr dataframe and then concat them (this is a dangerous practice, I should find another way to do so) (this problem is solved!)
df_psd = pd.concat([df_new, psd_reformed], axis=1)

# fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/data_with_psds.csv'
# df_psd.to_csv(fname)

In [44]:
# add reduced baseline
## names of columns
col_name = list(psd_kol.columns)
# or:
# col_name = [list(ba_list.keys())[i] + '-' + list(frequencies.keys())[j] for i in range(len(ba_list)) for j in range(len(frequencies))]

diff = {}
for task in ['induction', 'experience']:
    for col in col_name:
        diff[f'{col}-{task}-diff'] = df_psd[f'{col}-{task}'] - df_psd[f'{col}-baseline1']

df_diff = pd.DataFrame.from_dict(diff)
df_total = pd.concat([df_psd,df_diff], axis=1)

fname = '/Users/yeganeh/Codes/otka-preprocessing/docs/data_with_psds.csv'
df_total.to_csv(fname)

In [None]:
#
# a map of a talk: we want to develope a computational model of hypnosis that incorporate all the component of hypnosis (this seems to be a mission in the field: reference Banyayi and Jensen's papers).
# instead of first and second part: first part: which feature is the most important features for our model. Second part: a multibrain model of hypnosis.
# intro: what we know from the literature (from my previous talks)
# design of the study
# To answer this question we design such a analysis pipline (this is only one first step to build a model of hypnosis) other feature should be added to this explatory analysis.
# (open science: the code are compeletly automated therefore reproducable)

# create the diagram and add it there