In [None]:
# the percentage of dropped bad epochs
from pathlib import Path

import numpy as np
import mne

full_length = 0
dropped = 0

for epoch_path in sorted(Path('data/clean_data/').glob('*.fif')):
    epochs = mne.read_epochs(epoch_path, verbose=0)
    full_length += len(epochs.drop_log)
    dropped += len(epochs)

round(1 - (dropped / full_length), 3) * 100

In [1]:
"""run the whole analysis at once"""

# imports
import pandas as pd
from xarray_creator import _cut_noisy
# from preprocessing import _make_montage
# from run_ica import run_ica
import numpy as np
# from autoreject import get_rejection_threshold
# from preprocessing import _epochs_to_continuous
import matplotlib.pyplot as plt
import mne_bids
import mne

# # uploads
# ids_map = pd.read_excel('docs/ids_map.xlsx', header=1, index_col='behavioral_id')
# ids_map = ids_map.loc[:2132614].drop_duplicates('bids_id')
# ids_map = ids_map[['bad_channels','bids_id', 'language', 'true_hyp_ind']]
# ids_map.set_index('bids_id', inplace=True)
# ids_map = ids_map.astype({'true_hyp_ind':int})  # change type of true hypnosis column from float to intger


In [4]:
def _make_montage(path='data/raw/plb-hyp-live2131111.vhdr'):
    """
    Create a montage from barin vision raw data

    Parameters
    ----------
    path : str
        path to barinvision data header file

    """
    import copy
    raw = mne.io.read_raw_brainvision(path, verbose=False, misc=['ECG'])
    raw.crop(1, 10)  # crop a small segment of the data to speed up data loading in next lines
    raw.load_data().set_channel_types({'ECG': 'ecg'})
    # raw.add_reference_channels('FCz')  # FCz was used as online reference
    ch_names = copy.deepcopy(raw.info['ch_names'])  # make a deep copy of the lists of the
    # channel names otherwise ecg channel will be removed in the raw object!!
    ch_names.remove('ECG')

    pos_array = raw._get_channel_positions()

    # # add FCz position based on channels CPz (Their positions are the same, only y-axis value is different)
    # pos_fcz = pos_array[ch_names.index('CPz')] * np.array([1, -1, 1])
    # pos_array = np.insert(pos_array, 60, pos_fcz, axis=0)
    # pos_array = np.delete(pos_array, -1, axis=0)

    pos_dict = dict(zip(ch_names, pos_array))
    pos = mne.channels.make_dig_montage(pos_dict)

    return pos

In [25]:
def make_continious_data(subject, bids_root='data/BIDS_data', tasks='all'):
    """
    This function concatenate recordings of one subject in bids format

    """
    session = '01'
    if tasks == 'all':
        tasks = [
            'experience1', 'experience2', 'experience3', 'experience4',
            'baseline2'
        ]
    # pos = _make_montage()
    pos = mne.channels.make_standard_montage(kind='standard_1020')
    # open baseline1
    bids_path = mne_bids.BIDSPath(subject=subject, session=session, task='baseline1', root=bids_root)
    raw = mne_bids.read_raw_bids(bids_path, verbose=False)
    raw = _cut_noisy(raw, 'baseline1', 'hun')
    raw.info['bads'] = []
    # raw.set_montage(pos)

    for task in tasks:
        bids_path = mne_bids.BIDSPath(subject=subject, session=session, task=task, root=bids_root)
        raw_temp = mne_bids.read_raw_bids(bids_path, verbose=False)
        # raw_temp.set_montage(pos)
        raw_temp.info['bads'] = []
        if task[:-1] in ['baseline', 'experience']:
            print(task)
            raw_temp = _cut_noisy(raw_temp, task, 'hun')
        raw.append(raw_temp)
    
    return raw

In [None]:
# open data
from mne.preprocessing import ICA
from run_ica import run_ica

tasks = [
    'baseline1', 'induction1', 'induction2', 'induction3', 'induction4',
    'experience1', 'experience2', 'experience3', 'experience4',
    'baseline2'
    ]

sub = '01'

for task in tasks:
        path = mne_bids.BIDSPath(subject=sub,
                                session='01',
                                task=task,
                                root='data/BIDS_data_test')

        raw = mne_bids.read_raw_bids(path, extra_params={'preload': True})

        if task[:-1] in ['experience', 'baseline']:
                raw = _cut_noisy(raw, task, 'hun')

        raw.filter(1, 42)

        # raw = make_continious_data('01', bids_root='data/BIDS_data_test')
        # raw.load_data()
        # pos = _make_montage()
        # raw.set_montage(pos)

        run_ica(raw,
                sub,
                task,
                filter_beforeICA=False,
                n_components=30,
                random_state=97,
                show_plot=True,
                report=True)

In [None]:
def extract_psds_freatures(path='docs/1.psd_unaggragated_2nd_analysis.pkl',
                           n_epochs=60):
    import pickle
    with open(path, 'rb') as handle:
        psds_unagg = pickle.load(handle)
    features = {}
    for k, v in psds_unagg.items():
        features[k+'_start_allbroadband'] = v[:n_epochs].mean()
        features[k+'_end_allbroadband'] = v[-n_epochs:].mean()
        # features[k] = v.mean(0)

    features_csv = pd.DataFrame.from_dict(features, orient='index')
    features_csv.to_csv('start_end_features.csv')

    return features

a = extract_psds_freatures()
a

In [23]:
def extract_psds_freatures(path='psd_unaggragated_2nd_analysis.pkl',
                   n_epochs=60):
    with open(path, 'rb') as handle:
        psds_unagg = pickle.load(handle)
    features = {}
    for k, v in psds_unagg.items():
        features[k+'_start'] = v[:n_epochs].mean(0)
        features[k+'_end'] = v[-n_epochs:].mean(0)
        # features[k] = v.mean(0)
    
    return features

a = extract_psds_freatures()

{'52-baseline1_start': array([[2.07489112e-12, 1.35350345e-12, 7.86594658e-13, ...,
         2.12174399e-13, 1.93719270e-13, 2.63079833e-13],
        [1.48188005e-11, 7.14691701e-12, 2.68839863e-12, ...,
         1.99800736e-13, 2.17513783e-13, 1.70989203e-13],
        [2.86676444e-12, 1.94435727e-12, 1.18618219e-12, ...,
         9.79403013e-14, 9.17916050e-14, 8.96356848e-14],
        ...,
        [2.83367902e-12, 2.20236480e-12, 1.22544734e-12, ...,
         7.81582275e-14, 7.60842850e-14, 5.55472724e-14],
        [2.06532008e-12, 1.40067811e-12, 6.68129210e-13, ...,
         3.80705505e-13, 3.64940632e-13, 3.10151437e-13],
        [1.55371227e-12, 1.66060758e-12, 1.38392968e-12, ...,
         4.70733056e-14, 4.02433493e-14, 5.12135396e-14]]),
 '52-baseline1_end': array([[2.47034956e-12, 1.32013845e-12, 8.37966112e-13, ...,
         2.14885560e-13, 1.95559213e-13, 2.22087326e-13],
        [2.01393019e-11, 1.11849341e-11, 4.12484776e-12, ...,
         1.56455191e-13, 1.56324802e-13, 

In [None]:
from calculateDispersion import amplitude_vector, dispersion_report
ampVectors = []
for sub in ['01', '02']:
    for task in ['baseline1', 'experience1']:
        bids_path = mne_bids.BIDSPath(subject=sub, session='01', task=task, root='data/BIDS_data')
        raw = mne_bids.read_raw_bids(bids_path, verbose=False)
        raw = _cut_noisy(raw, 'baseline1', 'hun')
        raw.load_data().filter(0.5, 42, h_trans_bandwidth='auto')
        pos = _make_montage()
        raw.set_montage(pos)
        # run_ica(raw, '01', 'baseline', n_components=5, show_plot=True, report=False)
        ampVectors.append(amplitude_vector(raw, raw.ch_names))
amp_coll = [ampVectors]
dispersion_report(amp_coll, pos,
                  fname='test', fname_report='test.html',
                  caption=['DV before ICA and Autoreject'], save=False)

In [10]:
for subj in range(4):
    fname = mne.datasets.eegbci.load_data(subj + 1, runs=[3])[0]
    raw = mne.io.read_raw_edf(fname).load_data().resample(50)
    
    raw.set_montage('standard_1005')
    # high-pass filter
    raw_filt = raw.copy().load_data().filter(l_freq=1., h_freq=None)
    # fit ICA, using low max_iter for speed
    ica = ICA(n_components=30, max_iter=100, random_state=97)
    ica.fit(raw_filt, verbose='error')
    raws.append(raw)
    icas.append(ica)


Extracting parameters from data/BIDS_data/sub-32/ses-01/eeg/sub-32_ses-01_task-baseline1_eeg.vhdr...
Setting channel info structure...
Reading events from data/BIDS_data/sub-32/ses-01/eeg/sub-32_ses-01_task-baseline1_events.tsv.
Reading channel info from data/BIDS_data/sub-32/ses-01/eeg/sub-32_ses-01_task-baseline1_channels.tsv.
Reading 0 ... 300000  =      0.000 ...   300.000 secs...
EEG channel type selected for re-referencing
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter

['ECG']
Consider setting the channel types to be of EEG/sEEG/ECoG/DBS/fNIRS using inst.set_channel_types before calling inst.set_montage, or omit these channels when creating your montage.
  raw = mne.io.read_raw_brainvision(path, verbose=False, misc=['ECG'])
  raw.set_channel_types({'ECG': 'ecg'})
  raw2.set_montage(pos)
['EOG1', 'EOG2']
Consider setting the channel types to be of EEG/sEEG/ECoG/DBS/fNIRS using inst.set_channel_types before calling inst.set_montage, or omit these channels when creating your montage.
  raw2.set_montage(pos)


Selecting by number: 30 components
Fitting ICA took 11.7s.
Using EOG channels: EOG1, EOG2
EOG channel index for this subject is: [26 27]
Filtering the data to remove DC offset to help distinguish blinks from saccades
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 10000 samples (10.000 sec)

Now detecting blinks and generating corresponding events
Found 16 significant peaks
Number of EOG events detected: 16
Not setting metadata
Not setting metadata
16 matching events found
No baseline correction applied
Loading data for 16 events and 1001 original time points ...
1 bad epochs dropp

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.


    Using multitaper spectrum estimation with 7 DPSS windows
    Using multitaper spectrum estimation with 7 DPSS windows
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied


[Parallel(n_jobs=8)]: Done  10 tasks      | elapsed:    8.8s


    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
Not setting metadata
Not setting metadata
150 matching events found
No baseline correction applied


[Parallel(n_jobs=8)]: Done  30 out of  30 | elapsed:   18.1s remaining:    0.0s
[Parallel(n_jobs=8)]: Done  30 out of  30 | elapsed:   18.1s finished


Saving report to : /Users/yeganeh/Codes/otka-preprocessing/data/ica/reports/sub-test_ses-01_task-baseline1_report-ica.html
Writing ICA solution to /Users/yeganeh/Codes/otka-preprocessing/data/ica/fitted_icas/sub-test_ses-01_task-baseline1_ica.fif...
Applying ICA for sub-test...
Applying ICA to Raw instance
    Transforming to ICA space (30 components)
    Zeroing out 1 ICA component
    Projecting back using 58 PCA components
Not setting metadata
Not setting metadata
300 matching events found
No baseline correction applied
0 projection items activated
Loading data for 300 events and 1000 original time points ...
0 bad epochs dropped
Estimating rejection dictionary for eeg
Estimating rejection dictionary for eog
    Rejecting  epoch based on EEG : ['F3', 'F4', 'C3', 'C4', 'F7', 'F8', 'T8', 'Fz', 'Cz', 'AF4', 'F1', 'F2', 'F5', 'FC1', 'FC2', 'FC3', 'FC4', 'FC5', 'FC6', 'FT8', 'C1', 'C2', 'C5', 'C6', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6']
    Rejecting  epoch based on EEG : ['F4', 'C3', 'C4', 

array([  5,   6,  19,  20,  36,  37,  59,  60,  61,  62,  83,  85, 115,
       116, 144, 145, 146, 185, 186, 236, 237, 238, 266, 267])

In [3]:
import pandas as pd
# read codebook and master data from GDrive
cb = pd.read_excel('codebook.xlsx', index_col='Variable name', header=0)
data = pd.read_excel('PLB_HYP_data_MASTER.xlsx', header=0, index_col=0)

# create a new codebook 
cb2 = pd.DataFrame(index=data.columns, columns=cb.columns)

# fill in information from the old codebook
for idx in data.columns:
    if idx in cb.index:
        cb2.loc[idx] = cb.loc[idx, :]

# fill in information of the power data
first_psd = 'LF_delta_baseline1' # first psd feature in the codebook
cb2.loc[first_psd:, 'Measurement unit'] = 'numeric'
cb2.loc[first_psd:, 'Allowed values'] = 'EEG power in dB scale'

# reset index
cb2.reset_index(inplace=True)
cb2.rename(columns={'index': 'Variable name'}, inplace=True)

# get the index of the first psd feature
psd_idx = cb2.index[cb2['Variable name'] == first_psd][0]

# create description for the power data
cb2['temp_column'] = cb2.loc[psd_idx:, 'Variable name'].apply(lambda x: x.split('_'))
cb2['temp_column'] = cb2.loc[psd_idx:, 'temp_column'].apply(lambda x:
    f'{x[1]} power at the {x[2]} of the study averaged across {x[0]} channels.')
cb2.loc[psd_idx:, 'Description'] = cb2.loc[psd_idx:,'temp_column']

# drop temporary column
cb2.drop('temp_column', axis=1, inplace=True)

# brain area names
brain_area_dict = {
    'LF': 'left frontal',
    'LC': 'left central',
    'LP': 'left parietal',
    'LO': 'left occipital',
    'RF': 'right frontal',
    'RC': 'right central',
    'RP': 'right parietal',
    'RO': 'right occipital',
    'FZ': 'frontal midline',
    'CZ': 'central midline',
    'PZ': 'parietal midline',
    'OZ': 'occipital midline',
}

sections_dict = {
    'baseline1': 'first baseline',
    'baseline2': 'second baseline',
    'experience1': 'experience section of trial one',
    'experience2': 'experience section of trial two',
    'experience3': 'experience section of trial three',
    'experience4': 'experience section of trial four',
    'induction1': 'induction section of trial one',
    'induction2': 'induction section of trial two',
    'induction3': 'induction section of trial three',
    'induction4': 'induction section of trial four',
}
for key, value in brain_area_dict.items():
    cb2['Description'] = cb2['Description'].str.replace(key, value, case=True)
    
for key, value in sections_dict.items():
    cb2['Description'] = cb2['Description'].str.replace(key, value, case=True)

# upload new codebook in the GDrive
cb2.to_excel('codebook data dictionary for PLB_HYP_data_MASTER_V2.xlsx')

### Export MNE object in EDF format.

In [None]:
epochs = mne.read_epochs('data/clean_data/sub-01_ses-01_task-baseline1_proc-clean_epo.fif')
epochs.pick_channels(['F1', 'F2'])

data = np.hstack(epochs.get_data())
info = mne.create_info(ch_names=['F1', 'F2'],
                       ch_types=['eeg'] * 2,
                       sfreq=1000)

simulated_raw = mne.io.RawArray(data, info)
mne.export.export_raw('test', simulated_raw, fmt='eeglab')