### (WIP) Resting State EEG: entropy and complexity features
Time series analysis of resting state EEG signal using various entropy and complexity measures.

In [1]:
# Import packages
import os, mne
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import neurokit2 as nk

# Set default directory
os.chdir('/Users/tanijarv/Documents/GitHub/EEG-pyline')
mne.set_log_level('error')

# Import functions
import basic.arrange_data as arrange

Define these variables!

In [2]:
# Folder where to get the clean epochs files
clean_folder = 'Data/Clean/'

# Folder where to save the results
results_foldername = 'Results/'

# Sub-folder for the experiment (i.e. timepoint or group) and its acronym
exp_folder = 'OKTOS/Eyes Closed/Baseline'
exp_condition = 'EC_00'

Run this to set the folders for data.

In [3]:
# Get directories of clean EEG files and set export directory
dir_inprogress = os.path.join(clean_folder,exp_folder)
file_dirs, subject_names = arrange.read_files(dir_inprogress,'_clean-epo.fif')

Files in Data/Clean/OKTOS/Eyes Closed/Baseline read in: 1


Study we are replicating:
- https://www.nature.com/articles/s41386-023-01586-4#Sec19

Lempel-Ziv Compexity (LZC)
- https://neuropsychology.github.io/NeuroKit/functions/complexity.html#neurokit2.complexity.complexity_lempelziv
- Lempel, A., & Ziv, J. (1976). On the complexity of finite sequences. IEEE Transactions on information theory, 22(1), 75-81. https://doi.org/10.1109/TIT.1976.1055501
- Zhang, Y., Hao, J., Zhou, C., & Chang, K. (2009). Normalized Lempel-Ziv complexity and its application in bio-sequence analysis. Journal of mathematical chemistry, 46(4), 1203-1212. https://doi.org/10.1007/s10910-008-9512-2

Multiscale Sample Entropy (MSE)
- https://neuropsychology.github.io/NeuroKit/functions/complexity.html#entropy-multiscale

"Due to the sensitivity of sample entropy to signal length we computed MSE on non-overlapping 4 s epochs and averaged across the epochs to achieve the ﬁnal MSE estimate [50]. MSE was estimated using 20 scale factors, m of 2, and r of 0.5."

dimension (m) : default is 3, paper uses 2

tolerance (r) : default is 0.2*signal std, paper uses 0.5

In [97]:
lzc_args = dict(symbolize='median')
mse_args = dict(method='MSEn', scale=20, dimension=2, tolerance='sd')

In [98]:
# Loop through all the subjects' directories (EEG files directories)
df = pd.DataFrame(index=subject_names)
for i in range(len(file_dirs)):
    # Read the clean data from the disk
    print('\n{} in progress:'.format(subject_names[i]))
    epochs = mne.read_epochs(fname='{}/{}_clean-epo.fif'.format(dir_inprogress, subject_names[i]),
                                                                verbose=False)
    
    # Resample the data to 256 Hz & convert to dataframe
    epochs = epochs.resample(sfreq=256)
    df_epochs = epochs.to_data_frame()
    
    ### Lempel-Ziv complexity

    # Go through all the channels signals
    lzc_i = []
    for ch in epochs.info['ch_names']:
        # Go through all epochs in the current channel signal
        lzc_ch = []
        for epo in df_epochs['epoch'].unique():
            # Calculate Lempel-Ziv Complexity (LZC) for the current epoch
            epo_signal = df_epochs[df_epochs['epoch']==epo][ch]
            lzc_epo, info = nk.complexity_lempelziv(epo_signal, **lzc_args)
            lzc_ch.append(lzc_epo)
        # Average all epochs' LZC values to get a single value for the channel & add to list
        lzc_i.append(np.mean(lzc_ch))
    # Average all the channels' LZC values to get a single value for the subject & add to master dataframe
    lzc_i_mean = np.mean(lzc_i)
    df.loc[subject_names[i], 'LZC'] = lzc_i_mean

    ### Multiscale Sample Entropy

    # Go through all the channels signals
    mse_i = []
    mse_vals_i = np.zeros(shape=(len(epochs.info['ch_names']), mse_args['scale']))
    for c, ch in enumerate(epochs.info['ch_names']):
        # Go through all epochs in the current channel signal
        mse_ch = []
        mse_vals_epo = []
        for epo in df_epochs['epoch'].unique():
            # Calculate Multiscale Sample Entropy (MSE) measures for the current epoch
            epo_signal = df_epochs[df_epochs['epoch']==epo][ch]
            mse_epo, info = nk.entropy_multiscale(epo_signal.to_numpy(), **mse_args)
            # Get the total and scales' MSE values for the current epoch & add to list including all epochs
            mse_ch.append(mse_epo)
            mse_vals_epo.append(info.get('Value'))
        # Average all epochs' MSE values for every channel for the subject
        mse_vals_i[c] = np.mean(mse_vals_epo, axis=0)
        # Average all epochs' MSE totals to get a single value for the channel & add to list
        mse_i.append(np.mean(mse_ch))
    # Average all the channels' MSE totals & values to get global value
    mse_i_mean = np.mean(mse_i)
    mse_vals_i_mean = np.mean(mse_vals_i, axis=0)
    # Add total MSE to dataframe for the subject
    df.loc[subject_names[i], 'MSE (total)'] = mse_i_mean
    # Add all scales' MSE values to dataframe for the subject
    for scl in range(mse_args['scale']):
        df.loc[subject_names[i], 'MSE (scale={})'.format(scl+1)] = mse_vals_i_mean[scl]

display(df)


OKTOS_0002_00A_EC in progress:


Unnamed: 0,LZC,MSE (total),MSE (scale=1),MSE (scale=2),MSE (scale=3),MSE (scale=4),MSE (scale=5),MSE (scale=6),MSE (scale=7),MSE (scale=8),...,MSE (scale=11),MSE (scale=12),MSE (scale=13),MSE (scale=14),MSE (scale=15),MSE (scale=16),MSE (scale=17),MSE (scale=18),MSE (scale=19),MSE (scale=20)
OKTOS_0002_00A_EC,0.538633,1.51603,1.016817,1.565576,1.630699,1.640029,1.652695,1.670641,1.684769,1.684857,...,1.606246,1.584452,1.567727,1.591623,1.603369,1.584285,1.569502,1.550875,1.532057,1.536094
