## [Working title]

Authors: Mitchell J. S., Anijärv, T.E., Hermens, D.F.

Code created by Toomas Erik Anijärv in 15.05.2023

You are free to use this or any other code from this repository for your own projects and publications. Citation or reference to the repository is not required, but would be much appreciated (see more on README.md).

In [1]:
# Import packages
import mne, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import neurokit2 as nk

# Set default directory
os.chdir('/Users/tanijarv/Documents/GitHub/EEG-pyline')
mne.set_log_level('error')

# Import functions
import signal_processing.pre_process as pre_process
import basic.arrange_data as arrange

Define these variables!

In [2]:
# Folder where to get the raw EEG files
raw_folder = 'Data/Raw/OKTOS/Eyes Closed'

# Folder where to export the clean epochs files
clean_folder = 'Data/Clean/OKTOS/Eyes Closed'

# Folder to save the results
results_folder = 'Results/OKTOS_complexity'

# Sub-folder for the experiment (i.e. timepoint or group)
exp_folder = ['Baseline', '6 weeks', '10 weeks']
dict_timepoint = {'00A' : 'PRE', '06D' : 'POST', '07A' : 'FUP'}

### PRE-PROCESSING

Run this to set the folders for data.

In [None]:
# EEG montage, EOG + mastoid channels, stimulus channel, reference, epochs duration
montage = 'biosemi32'
eog_channels = ['EXG1', 'EXG2', 'EXG3', 'EXG4', 'EXG5', 'EXG6', 'EXG7', 'EXG8']
stimulus_channel = 'Status'
reference = 'average'
epochs_duration = 5

# Parameters for filter design
filter_design = dict(l_freq=0.5, h_freq=50, filter_length='auto', method='fir',
                     l_trans_bandwidth='auto', h_trans_bandwidth='auto',
                     phase='zero', fir_window='hamming', fir_design='firwin')

Run this to pre-process the raw EEG files to clean epochs objects.

In [None]:
for exp in exp_folder:
    # Get directories of raw EEG files and set export directory for clean files
    dir_inprogress = os.path.join(raw_folder,exp)
    export_dir = os.path.join(clean_folder,exp)
    file_dirs, subject_names = arrange.read_files(dir_inprogress,'.bdf')

    # Loop through all the subjects' directories (EEG files directories)
    for i in range(len(file_dirs)):
        # Read in the raw EEG data
        raw = mne.io.read_raw_bdf(file_dirs[i], infer_types=True, eog=eog_channels,
                                stim_channel=stimulus_channel)

        # Set the right montage (Biosemi32) and set reference as average across all channels
        raw = raw.set_montage(mne.channels.make_standard_montage(montage)).load_data()\
                .set_eeg_reference(ref_channels=reference)

        # Find event markers for the start and end of resting state recordings
        events = mne.find_events(raw, stim_channel=stimulus_channel, consecutive=False, output='offset')
        # If there is 3 events, then crop the signal by the first and last event point
        if len(events) >= 3:
            tminmax = [events[0][0]/raw.info['sfreq'], events[-1][0]/raw.info['sfreq']]
            # If there is more than 3, warn the user (as probably requires manual processing)
            if len(events) > 3:
                warnings.warn('\nMore than 3 event points found for {}\n'.format(subject_names[i]))
        # If there is 1 or 2 event points, check whether they are start or end points or similar to each
        elif len(events) == 1 or len(events) == 2:
            warnings.warn('\nOnly 1 or 2 event point(s) found for {}\n'.format(subject_names[i]))

            if events[0][0] > 100000:
                tminmax = [0, events[0][0]/raw.info['sfreq']]
            else:
                tminmax = [events[0][0]/raw.info['sfreq'], None]
        else:
            tminmax = None
            warnings.warn('\nNO event points found for {}\n'.format(subject_names[i]))

        # Use the markers to crop to EEG signal to leave only the actual resting state
        if tminmax != None:
            cropped_raw = raw.crop(tmin=tminmax[0], tmax=tminmax[1])
            print(('Event markers are following:\n{}\nStarting point: {} s\nEnding point: {} s\n'
            'Total duration: {} s').format(events, tminmax[0], tminmax[1], tminmax[1]-tminmax[0]))
            # Warn if signal length is not what it is expected for a single condition
            if (230 <= tminmax[1]-tminmax[0] <= 250) != True:
                warnings.warn('\nRaw signal length is not between 230-250s for {}\n'.format(subject_names[i]))
        else:
            cropped_raw = raw
            print('Signal NOT cropped.')
        cropped_raw = cropped_raw.drop_channels(stimulus_channel)
        
        # Filter the signal with bandpass filter and remove EOG artefacts with SSP
        filt = pre_process.filter_raw_data(cropped_raw, filter_design, line_remove=None,
                                           eog_channels=eog_channels, plot_filt=False,
                                           savefig=False, verbose=False)

        # Run artefact rejection function (including AutoReject algorithm) and split to 5-sec epochs
        %matplotlib inline
        epochs = pre_process.artefact_rejection(filt, subject_names[i], epo_duration=epochs_duration,
                                                verbose=False)

        # (Optional) For displaying interactive EEG plots
        #%matplotlib qt
        #epochs.plot(n_channels=32,n_epochs=1)

        # Save the cleaned EEG file as .fif file
        try:
            os.makedirs(export_dir)
        except FileExistsError:
            pass
        try:
            mne.Epochs.save(epochs, fname='{}/{}_clean-epo.fif'.format(export_dir,
                                                                    subject_names[i]),
                                                                    overwrite=True)
        except FileExistsError:
            pass

### ANALYSIS

Study we are replicating:
- https://www.nature.com/articles/s41386-023-01586-4#Sec19

Lempel-Ziv Compexity (LZC)
- https://neuropsychology.github.io/NeuroKit/functions/complexity.html#neurokit2.complexity.complexity_lempelziv
- Lempel, A., & Ziv, J. (1976). On the complexity of finite sequences. IEEE Transactions on information theory, 22(1), 75-81. https://doi.org/10.1109/TIT.1976.1055501
- Zhang, Y., Hao, J., Zhou, C., & Chang, K. (2009). Normalized Lempel-Ziv complexity and its application in bio-sequence analysis. Journal of mathematical chemistry, 46(4), 1203-1212. https://doi.org/10.1007/s10910-008-9512-2

Multiscale Sample Entropy (MSE)
- https://neuropsychology.github.io/NeuroKit/functions/complexity.html#entropy-multiscale

"Due to the sensitivity of sample entropy to signal length we computed MSE on non-overlapping 4 s epochs and averaged across the epochs to achieve the ﬁnal MSE estimate [50]. MSE was estimated using 20 scale factors, m of 2, and r of 0.5."

dimension (m) : default is 3, paper uses 2

tolerance (r) : default is 0.2*signal std, paper uses 0.5

In [15]:
lzc_args = dict(symbolize='median')
mse_args = dict(method='MSEn', scale=10, dimension=2, tolerance='sd')

In [30]:
# Loop through all experiments (i.e., timepoints)
df = pd.DataFrame()
for exp in exp_folder:
    # Get directories of clean EEG files and set export directory
    dir_inprogress = os.path.join(clean_folder,exp)
    file_dirs, subject_names = arrange.read_files(dir_inprogress,'_clean-epo.fif')

    # Loop through all the subjects' directories (EEG files directories)
    df_exp = pd.DataFrame(index=subject_names)
    for i in range(len(file_dirs)):
        # Read the clean data from the disk
        print('{}: {} ({}/{})'.format(exp, subject_names[i], i+1, len(file_dirs)))
        epochs = mne.read_epochs(fname='{}/{}_clean-epo.fif'.format(dir_inprogress, subject_names[i]),
                                                                    verbose=False)
        
        # Resample the data to 256 Hz & convert to dataframe
        epochs = epochs.resample(sfreq=256)
        df_epochs = epochs.to_data_frame()
        
        ### Lempel-Ziv complexity

        # Go through all the channels signals
        lzc_i = []
        for ch in epochs.info['ch_names']:
            # Go through all epochs in the current channel signal
            lzc_ch = []
            for epo in df_epochs['epoch'].unique():
                # Calculate Lempel-Ziv Complexity (LZC) for the current epoch
                epo_signal = df_epochs[df_epochs['epoch']==epo][ch]
                lzc_epo, info = nk.complexity_lempelziv(epo_signal, **lzc_args)
                lzc_ch.append(lzc_epo)
            # Average all epochs' LZC values to get a single value for the channel & add to list
            lzc_i.append(np.mean(lzc_ch))
        # Average all the channels' LZC values to get a single value for the subject & add to master dataframe
        lzc_i_mean = np.mean(lzc_i)
        df_exp.loc[subject_names[i], 'LZC'] = lzc_i_mean

        ### Multiscale Sample Entropy

        # Go through all the channels signals
        mse_i = []
        mse_vals_i = np.zeros(shape=(len(epochs.info['ch_names']), mse_args['scale']))
        for c, ch in enumerate(epochs.info['ch_names']):
            # Go through all epochs in the current channel signal
            mse_ch = []
            mse_vals_epo = []
            for epo in df_epochs['epoch'].unique():
                # Calculate Multiscale Sample Entropy (MSE) measures for the current epoch
                epo_signal = df_epochs[df_epochs['epoch']==epo][ch]
                mse_epo, info = nk.entropy_multiscale(epo_signal.to_numpy(), **mse_args)
                # Get the total and scales' MSE values for the current epoch & add to list including all epochs
                mse_ch.append(mse_epo)
                mse_vals_epo.append(info.get('Value'))
            # Average all epochs' MSE values for every channel for the subject
            mse_vals_i[c] = np.mean(mse_vals_epo, axis=0)
            # Average all epochs' MSE totals to get a single value for the channel & add to list
            mse_i.append(np.mean(mse_ch))
        # Average all the channels' MSE totals & values to get global value
        mse_i_mean = np.mean(mse_i)
        mse_vals_i_mean = np.mean(mse_vals_i, axis=0)
        # Add total MSE to dataframe for the subject
        df_exp.loc[subject_names[i], 'MSE (total)'] = mse_i_mean
        # Add all scales' MSE values to dataframe for the subject
        for scl in range(mse_args['scale']):
            df_exp.loc[subject_names[i], 'MSE (scale={})'.format(scl+1)] = mse_vals_i_mean[scl]
        
    # Add the current timepoint data to the master dataframe
    df = pd.concat([df, df_exp])
df.index.names = ['Subject']
df = df.reset_index()

Files in Data/Clean/OKTOS/Eyes Closed/10 weeks read in: 25
10 weeks: OKTOS_0001_07A_EC (1/25)
10 weeks: OKTOS_0002_07A_EC (2/25)
10 weeks: OKTOS_0003_07A_EC (3/25)
10 weeks: OKTOS_0006_07A_EC (4/25)
10 weeks: OKTOS_0007_07A_EC (5/25)
10 weeks: OKTOS_0008_07A_EC (6/25)
10 weeks: OKTOS_0010_07A_EC (7/25)
10 weeks: OKTOS_0011_07A_EC (8/25)
10 weeks: OKTOS_0012_07A_EC (9/25)
10 weeks: OKTOS_0015_07A_EC (10/25)
10 weeks: OKTOS_0016_07A_EC (11/25)
10 weeks: OKTOS_0018_07A_EC (12/25)
10 weeks: OKTOS_0020_07A_EC (13/25)
10 weeks: OKTOS_0022_07A_EC (14/25)
10 weeks: OKTOS_0023_07A_EC (15/25)
10 weeks: OKTOS_0025_07A_EC (16/25)
10 weeks: OKTOS_0026_07A_EC (17/25)
10 weeks: OKTOS_0027_07A_EC (18/25)
10 weeks: OKTOS_0028_07A_EC (19/25)
10 weeks: OKTOS_0029_07A_EC (20/25)
10 weeks: OKTOS_0031_07A_EC (21/25)
10 weeks: OKTOS_0033_07A_EC (22/25)
10 weeks: OKTOS_0035_07A_EC (23/25)
10 weeks: OKTOS_0038_07A_EC (24/25)
10 weeks: OKTOS_0040_07A_EC (25/25)


In [None]:
# Extract timepoint from subject name code
for i in range(len(df)):
    full_subjectname = df.loc[i, 'Subject']
    df.loc[i, 'Subject'] = full_subjectname.rsplit('_', 2)[0]
    df.loc[i, 'Timepoint'] = full_subjectname.rsplit('_', 2)[1]


# Change timepoint values from dictionary
df.insert(1, 'Timepoint', df.pop('Timepoint'))
df['Timepoint'] = df['Timepoint'].replace(dict_timepoint)

display(df)
# df.to_excel('{}/df_complexity-entropy.xlsx'.format(results_folder))

read in the data again

In [36]:
df = pd.read_excel('{}/df_complexity-entropy.xlsx'.format(results_folder), index_col=0)

In [119]:
np.isinf(df.iloc[:,2:]).values.sum()

31

In [120]:
df_edit = df.replace([np.inf, -np.inf], np.nan, inplace=False)

In [166]:
# fig, ax = plt.subplots(1, 1, dpi=150)

# sns.pointplot(ax=ax, data=df_edit[df_edit['Timepoint']=='PRE'].iloc[:,4:].values,
#               color='blue')
# sns.pointplot(ax=ax, data=df_edit[df_edit['Timepoint']=='POST'].iloc[:,4:].values,
#               color='red')
# sns.pointplot(ax=ax, data=df_edit[df_edit['Timepoint']=='FUP'].iloc[:,4:].values,
#               color='green')
# ax.set_title('Multiscale Sample Entropy')
# ax.set_xlabel('Timescale')
# ax.set_ylabel('Entropy')
# plt.show()

In [152]:
df_edit

Unnamed: 0,Subject,Timepoint,LZC,MSE (total),MSE (scale=1),MSE (scale=2),MSE (scale=3),MSE (scale=4),MSE (scale=5),MSE (scale=6),...,MSE (scale=11),MSE (scale=12),MSE (scale=13),MSE (scale=14),MSE (scale=15),MSE (scale=16),MSE (scale=17),MSE (scale=18),MSE (scale=19),MSE (scale=20)
0,OKTOS_0001,PRE,0.477966,1.467906,0.906817,1.401992,1.474729,1.493229,1.529703,1.578235,...,1.655106,1.634830,1.611169,1.585287,1.549661,1.527281,1.510485,1.503016,1.497446,1.499831
1,OKTOS_0002,PRE,0.538633,1.516030,1.016817,1.565576,1.630699,1.640029,1.652695,1.670641,...,1.606246,1.584452,1.567727,1.591623,1.603369,1.584285,1.569502,1.550875,1.532057,1.536094
2,OKTOS_0003,PRE,0.570050,1.521496,1.171548,1.652550,1.639487,1.594472,1.586869,1.588997,...,1.599142,1.606012,1.605915,1.615304,1.615496,1.614237,1.609809,1.642306,1.634673,
3,OKTOS_0006,PRE,0.552972,1.624186,0.974551,1.585539,1.731536,1.770351,1.804497,1.829811,...,1.845743,1.790133,1.708886,1.654550,1.622263,1.604276,1.594863,1.600312,1.556908,1.531307
4,OKTOS_0007,PRE,0.508891,1.597617,0.816100,1.412260,1.659043,1.797514,1.870841,1.889867,...,1.741755,1.694685,1.675033,1.666897,1.647119,1.620575,1.607568,1.584804,1.576162,1.574493
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,OKTOS_0031,FUP,0.470064,1.579555,0.800644,1.322206,1.498823,1.604386,1.711489,1.807005,...,1.800240,1.716433,1.664358,1.648731,1.659885,1.661897,1.624053,1.608268,1.538461,1.507364
71,OKTOS_0033,FUP,0.490996,1.557328,0.880827,1.472835,1.626219,1.676141,1.708379,1.739904,...,1.699366,1.661753,1.627617,1.615237,1.633440,1.636774,1.616048,1.612088,1.608200,1.569560
72,OKTOS_0035,FUP,0.487237,1.585709,0.894085,1.435820,1.555549,1.593765,1.626122,1.667724,...,1.774987,1.752743,1.743886,1.738927,1.734334,1.733714,1.712461,,1.695346,
73,OKTOS_0038,FUP,0.533490,1.588763,0.881977,1.508484,1.727495,1.800826,1.821965,1.832794,...,1.710213,1.626191,1.607298,1.617685,1.641516,1.642842,1.605853,1.557861,1.525615,1.477517


In [167]:
# var = 'LZC'

# muted_colors = sns.color_palette('muted')
# strp_args = dict(x='Timepoint', y=var, jitter=True, size=6)

# fig, axs = plt.subplots(figsize=(5, 5), layout='tight', dpi=150)

# strp_resp = sns.stripplot(ax=axs, data=df_edit, **strp_args)

# pnt_plt = sns.pointplot(ax=axs, data=df_edit,
#                             x='Timepoint', y=var,
#                             errorbar=None, dodge=False, color='black', 
#                             markers=['v', '^'], linestyles=['--', '-'])