# Feature Extraction


- Author: Elmo Chavez
- Date: 20-Aug-2023

**Description**

> [...]


## Read the Datasets


Libraries


In [1]:
import numpy as np
import pandas as pd
import mne
import os

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
current_directory = os.getcwd()  # Get the current working directory
parent_directory = os.path.dirname(current_directory)  # Get the parent directory

In [3]:
import sys
sys.path.append(parent_directory)
import utils as feeg

Set the path to read the data


In [4]:
from tkinter import Tk
from tkinter.filedialog import askdirectory

root = Tk()
root.withdraw()

path = askdirectory()

root.quit()
root.destroy()

2023-08-26 23:24:38.508 python[83265:1050073] +[CATransaction synchronize] called within transaction


### Participants previosly selected


In [5]:
filename_part = 'participants_selected.csv'

df_participants = pd.read_csv(parent_directory+'/Training Datasets/'+filename_part)
df_participants.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-002,1,78,0,22
1,sub-004,1,67,0,20
2,sub-005,0,70,0,22
3,sub-006,1,61,0,14
4,sub-007,1,79,0,20


### Read all the EEG Raw datasets


In [6]:
dir_list = df_participants['participant_id'].to_list()

print('Subjects found:', len(dir_list))
print('Subjects:', dir_list[:5])

raw_data = []

for l in dir_list:
  folder = path+'/derivatives/'+l+'/eeg/'
  f = l+'_task-eyesclosed_eeg.set'
  #print(l)
  r = mne.io.read_raw_eeglab(folder+f, preload=False, verbose='CRITICAL')
  participant_info = {'id': l}
  r.info['subject_info'] = participant_info
  raw_data.append(r)

print('EEG Files Loaded:',len(raw_data))

Subjects found: 51
Subjects: ['sub-002', 'sub-004', 'sub-005', 'sub-006', 'sub-007']
EEG Files Loaded: 51


## Feature Extraction


### 1. Feature Extracion with PSD Features by Frequency Bands


#### FP1


Calculate Features for each Subject


In [7]:
subject_features = []

for raw in raw_data:
    results = feeg.get_Features_PSD(raw, channels=['Fp1'])
    subject_features.append(results)

df_psd_bands = pd.DataFrame(subject_features)
df_psd_bands.head()

Unnamed: 0,delta_total_power,delta_relative_power,delta_average_power,delta_spectral_entropy,delta_peak_to_peak,delta_std_dev,delta_kurtosis,delta_skewness,theta_total_power,theta_relative_power,...,beta_kurtosis,beta_skewness,gamma_total_power,gamma_relative_power,gamma_average_power,gamma_spectral_entropy,gamma_peak_to_peak,gamma_std_dev,gamma_kurtosis,gamma_skewness
0,2.384185e-09,0.768581,9.727398e-13,5.64289e-08,0.000222,2.8e-05,-0.302043,0.034478,6.691856e-10,0.215723,...,-0.111933,0.003,5.473149e-11,0.017644,2.233027e-14,1.574884e-09,0.00017,2e-06,0.495481,0.00085
1,2.761436e-09,0.819641,1.126657e-12,6.488888e-08,0.000301,3e-05,-0.217582,-0.030682,5.747736e-10,0.170603,...,-0.029379,0.000894,1.225167e-10,0.036365,4.998641e-14,3.4391e-09,0.000229,4e-06,0.2598,-0.001878
2,2.843382e-09,0.798515,1.16009e-12,6.679794e-08,0.000382,3.1e-05,-0.026744,0.022067,6.366061e-10,0.17878,...,0.213,-0.011728,1.53499e-10,0.043108,6.262711e-14,4.26812e-09,0.000134,4e-06,0.801791,-0.00044
3,2.672633e-09,0.711554,1.090426e-12,6.290695e-08,0.00022,3e-05,-0.274343,0.018309,5.638689e-10,0.150123,...,-0.277512,0.051034,4.75668e-11,0.012664,1.94071e-14,1.381791e-09,8.2e-05,2e-06,-0.115277,-7.5e-05
4,2.767352e-09,0.821226,1.12907e-12,6.513636e-08,0.000216,3e-05,-0.337693,-0.087539,7.128454e-10,0.211541,...,-0.224022,0.018619,4.902646e-11,0.014549,2.000264e-14,1.424083e-09,0.000102,2e-06,-0.189164,0.001722


Merge with Participants Info


In [8]:
df_psd_features_fp1 = df_participants.merge(df_psd_bands, left_index=True, right_index=True)
df_psd_features_fp1.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,delta_total_power,delta_relative_power,delta_average_power,delta_spectral_entropy,delta_peak_to_peak,...,beta_kurtosis,beta_skewness,gamma_total_power,gamma_relative_power,gamma_average_power,gamma_spectral_entropy,gamma_peak_to_peak,gamma_std_dev,gamma_kurtosis,gamma_skewness
0,sub-002,1,78,0,22,2.384185e-09,0.768581,9.727398e-13,5.64289e-08,0.000222,...,-0.111933,0.003,5.473149e-11,0.017644,2.233027e-14,1.574884e-09,0.00017,2e-06,0.495481,0.00085
1,sub-004,1,67,0,20,2.761436e-09,0.819641,1.126657e-12,6.488888e-08,0.000301,...,-0.029379,0.000894,1.225167e-10,0.036365,4.998641e-14,3.4391e-09,0.000229,4e-06,0.2598,-0.001878
2,sub-005,0,70,0,22,2.843382e-09,0.798515,1.16009e-12,6.679794e-08,0.000382,...,0.213,-0.011728,1.53499e-10,0.043108,6.262711e-14,4.26812e-09,0.000134,4e-06,0.801791,-0.00044
3,sub-006,1,61,0,14,2.672633e-09,0.711554,1.090426e-12,6.290695e-08,0.00022,...,-0.277512,0.051034,4.75668e-11,0.012664,1.94071e-14,1.381791e-09,8.2e-05,2e-06,-0.115277,-7.5e-05
4,sub-007,1,79,0,20,2.767352e-09,0.821226,1.12907e-12,6.513636e-08,0.000216,...,-0.224022,0.018619,4.902646e-11,0.014549,2.000264e-14,1.424083e-09,0.000102,2e-06,-0.189164,0.001722


Save Training Dataset


In [9]:
filename = '3_psd_bands_features_fp1.csv'
df_psd_features_fp1.to_csv(parent_directory+'/Training Datasets/'+filename, index=False)

#### All Channels


Calculate Features for each Subject


In [10]:
subject_features = []
for raw in raw_data:
    results = feeg.get_Features_PSD(raw)
    subject_features.append(results)

df_psd_bands = pd.DataFrame(subject_features)
df_psd_bands.head()

Unnamed: 0,delta_total_power,delta_relative_power,delta_average_power,delta_spectral_entropy,delta_peak_to_peak,delta_std_dev,delta_kurtosis,delta_skewness,theta_total_power,theta_relative_power,...,beta_kurtosis,beta_skewness,gamma_total_power,gamma_relative_power,gamma_average_power,gamma_spectral_entropy,gamma_peak_to_peak,gamma_std_dev,gamma_kurtosis,gamma_skewness
0,4.640153e-08,0.762022,9.964038e-13,1e-06,0.000239,2.8e-05,-0.316303,0.023121,1.378083e-08,0.226314,...,-0.18198,-0.00444,7.183388e-10,0.011797,1.542526e-14,2.091591e-08,0.000262,2e-06,-9.2e-05,-0.000125
1,5.311004e-08,0.82136,1.140459e-12,1e-06,0.000353,3.1e-05,-0.219722,-0.017265,1.208507e-08,0.186899,...,-0.155691,-0.001708,1.670461e-09,0.025834,3.587066e-14,4.706557e-08,0.000229,3e-06,-0.042265,-0.000748
2,5.083291e-08,0.818591,1.091561e-12,1e-06,0.000493,3e-05,-0.097153,0.037445,1.159714e-08,0.186755,...,0.140466,-0.011447,1.358664e-09,0.021879,2.917528e-14,3.8489e-08,0.000211,3e-06,1.099518,0.001266
3,5.123265e-08,0.739437,1.100145e-12,1e-06,0.000237,3e-05,-0.283563,0.025701,1.078692e-08,0.155687,...,-0.204128,0.001165,1.158447e-09,0.01672,2.487593e-14,3.308492e-08,0.000103,3e-06,0.084531,-0.000323
4,4.966144e-08,0.825354,1.066405e-12,1e-06,0.000255,2.9e-05,-0.324209,-0.060279,1.231365e-08,0.204648,...,-0.234384,-0.001882,8.558305e-10,0.014224,1.837769e-14,2.488476e-08,0.000103,2e-06,-0.141931,0.000154


Merge with Participants Info


In [11]:
df_psd_features_all = df_participants.merge(df_psd_bands, left_index=True, right_index=True)
df_psd_features_all.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,delta_total_power,delta_relative_power,delta_average_power,delta_spectral_entropy,delta_peak_to_peak,...,beta_kurtosis,beta_skewness,gamma_total_power,gamma_relative_power,gamma_average_power,gamma_spectral_entropy,gamma_peak_to_peak,gamma_std_dev,gamma_kurtosis,gamma_skewness
0,sub-002,1,78,0,22,4.640153e-08,0.762022,9.964038e-13,1e-06,0.000239,...,-0.18198,-0.00444,7.183388e-10,0.011797,1.542526e-14,2.091591e-08,0.000262,2e-06,-9.2e-05,-0.000125
1,sub-004,1,67,0,20,5.311004e-08,0.82136,1.140459e-12,1e-06,0.000353,...,-0.155691,-0.001708,1.670461e-09,0.025834,3.587066e-14,4.706557e-08,0.000229,3e-06,-0.042265,-0.000748
2,sub-005,0,70,0,22,5.083291e-08,0.818591,1.091561e-12,1e-06,0.000493,...,0.140466,-0.011447,1.358664e-09,0.021879,2.917528e-14,3.8489e-08,0.000211,3e-06,1.099518,0.001266
3,sub-006,1,61,0,14,5.123265e-08,0.739437,1.100145e-12,1e-06,0.000237,...,-0.204128,0.001165,1.158447e-09,0.01672,2.487593e-14,3.308492e-08,0.000103,3e-06,0.084531,-0.000323
4,sub-007,1,79,0,20,4.966144e-08,0.825354,1.066405e-12,1e-06,0.000255,...,-0.234384,-0.001882,8.558305e-10,0.014224,1.837769e-14,2.488476e-08,0.000103,2e-06,-0.141931,0.000154


Save Training Dataset


In [12]:
filename = '3_psd_bands_features_all.csv'
df_psd_features_all.to_csv(parent_directory+'/Training Datasets/'+filename, index=False)

## 2. Feature Extraction with TFR Features by Frequency Bands


### FP1


Calculate Features for each Subject


In [13]:
subject_features = []

for raw in raw_data:
    results = feeg.get_Features_TFR(raw, channels=['Fp1'])
    subject_features.append(results)

df_tfr_bands = pd.DataFrame(subject_features)
df_tfr_bands.head()

Unnamed: 0,delta_total_power,delta_average_power,delta_peak_power,delta_std,delta_kurtosis,delta_skewness,theta_total_power,theta_average_power,theta_peak_power,theta_std,...,beta_peak_power,beta_std,beta_kurtosis,beta_skewness,gamma_total_power,gamma_average_power,gamma_peak_power,gamma_std,gamma_kurtosis,gamma_skewness
0,0.019382,1.615161e-07,6.714592e-07,2.170376e-07,-0.321085,1.224449,0.000745,6.20652e-09,1.37162e-08,2.218673e-09,...,3.612499e-09,2.619153e-10,18.534499,2.747591,9.5e-05,4.5317e-11,3.106797e-09,9.963583e-11,282.069647,12.964888
1,0.022172,1.847633e-07,7.40517e-07,2.480452e-07,-0.312793,1.226578,0.000732,6.103989e-09,1.653594e-08,3.012602e-09,...,3.277944e-09,1.794504e-10,5.03129,1.336889,0.000199,9.465428e-11,4.366202e-09,1.847765e-10,149.16825,8.822134
2,0.02254,1.878299e-07,7.92484e-07,2.495321e-07,-0.321326,1.217228,0.000807,6.72568e-09,1.850089e-08,3.373933e-09,...,2.240047e-09,2.304361e-10,3.158683,1.444451,0.000236,1.125425e-10,2.232037e-09,1.920152e-10,10.463997,2.696169
3,0.023753,1.979383e-07,8.168917e-07,2.73643e-07,-0.171644,1.268533,0.000717,5.974448e-09,1.50041e-08,3.012578e-09,...,7.074382e-09,1.431966e-09,3.228494,2.105543,8.9e-05,4.243826e-11,6.034054e-10,6.180919e-11,2.767625,1.847439
4,0.020673,1.722733e-07,7.22185e-07,2.24522e-07,-0.316826,1.209621,0.000905,7.538501e-09,1.716169e-08,3.570349e-09,...,1.580119e-09,2.337696e-10,1.041545,1.289885,8.8e-05,4.194346e-11,4.375251e-10,6.052442e-11,2.135301,1.736876


Merge with Participants Info


In [14]:
df_tfr_features_fp1 = df_participants.merge(df_psd_bands, left_index=True, right_index=True)
df_tfr_features_fp1.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,delta_total_power,delta_relative_power,delta_average_power,delta_spectral_entropy,delta_peak_to_peak,...,beta_kurtosis,beta_skewness,gamma_total_power,gamma_relative_power,gamma_average_power,gamma_spectral_entropy,gamma_peak_to_peak,gamma_std_dev,gamma_kurtosis,gamma_skewness
0,sub-002,1,78,0,22,4.640153e-08,0.762022,9.964038e-13,1e-06,0.000239,...,-0.18198,-0.00444,7.183388e-10,0.011797,1.542526e-14,2.091591e-08,0.000262,2e-06,-9.2e-05,-0.000125
1,sub-004,1,67,0,20,5.311004e-08,0.82136,1.140459e-12,1e-06,0.000353,...,-0.155691,-0.001708,1.670461e-09,0.025834,3.587066e-14,4.706557e-08,0.000229,3e-06,-0.042265,-0.000748
2,sub-005,0,70,0,22,5.083291e-08,0.818591,1.091561e-12,1e-06,0.000493,...,0.140466,-0.011447,1.358664e-09,0.021879,2.917528e-14,3.8489e-08,0.000211,3e-06,1.099518,0.001266
3,sub-006,1,61,0,14,5.123265e-08,0.739437,1.100145e-12,1e-06,0.000237,...,-0.204128,0.001165,1.158447e-09,0.01672,2.487593e-14,3.308492e-08,0.000103,3e-06,0.084531,-0.000323
4,sub-007,1,79,0,20,4.966144e-08,0.825354,1.066405e-12,1e-06,0.000255,...,-0.234384,-0.001882,8.558305e-10,0.014224,1.837769e-14,2.488476e-08,0.000103,2e-06,-0.141931,0.000154


Save Training Dataset


In [15]:
filename = '3_tfr_bands_features_fp1.csv'
df_tfr_features_fp1.to_csv(parent_directory+'/Training Datasets/'+filename, index=False)

### All Channels


Calculate Features for each Subject


In [16]:
subject_features = []

for raw in raw_data:
    results = feeg.get_Features_TFR(raw)
    subject_features.append(results)

df_tfr_bands = pd.DataFrame(subject_features)
df_tfr_bands.head()

Unnamed: 0,delta_total_power,delta_average_power,delta_peak_power,delta_std,delta_kurtosis,delta_skewness,theta_total_power,theta_average_power,theta_peak_power,theta_std,...,beta_peak_power,beta_std,beta_kurtosis,beta_skewness,gamma_total_power,gamma_average_power,gamma_peak_power,gamma_std,gamma_kurtosis,gamma_skewness
0,0.372864,1.635367e-07,6.793986e-07,2.183396e-07,-0.380218,1.207277,0.014983,6.571404e-09,1.520874e-08,2.441052e-09,...,7.070273e-09,2.974614e-10,13.23829,2.051895,0.001359,3.407242e-11,6.625042e-09,8.399591e-11,201.578651,9.283318
1,0.426927,1.872485e-07,8.207718e-07,2.51718e-07,-0.324383,1.224164,0.015233,6.681178e-09,1.728118e-08,3.015857e-09,...,4.785417e-09,2.567673e-10,7.084948,1.736381,0.002768,6.936999e-11,4.366202e-09,1.38756e-10,33.02651,3.377042
2,0.388216,1.7027e-07,8.233415e-07,2.238063e-07,-0.225915,1.237077,0.014764,6.475655e-09,2.767134e-08,3.217298e-09,...,6.679231e-09,3.029934e-10,4.48617,1.493655,0.002275,5.700572e-11,3.826836e-09,1.248895e-10,21.44859,3.514905
3,0.461865,2.025725e-07,8.538367e-07,2.810644e-07,-0.198528,1.263394,0.013639,5.982087e-09,1.674307e-08,3.015674e-09,...,1.116373e-08,1.111641e-09,3.09288,1.959567,0.002048,5.132567e-11,1.632791e-09,8.924213e-11,3.113488,1.85658
4,0.37557,1.647235e-07,7.22185e-07,2.149286e-07,-0.361349,1.19898,0.015531,6.811945e-09,1.898941e-08,3.216119e-09,...,1.5936e-09,2.140519e-10,0.923487,1.236433,0.001548,3.879602e-11,7.948744e-10,5.813473e-11,2.702966,1.784417


Merge with Participants Info


In [18]:
df_tfr_features_all = df_participants.merge(df_tfr_bands, left_index=True, right_index=True)
df_tfr_features_all.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,delta_total_power,delta_average_power,delta_peak_power,delta_std,delta_kurtosis,...,beta_peak_power,beta_std,beta_kurtosis,beta_skewness,gamma_total_power,gamma_average_power,gamma_peak_power,gamma_std,gamma_kurtosis,gamma_skewness
0,sub-002,1,78,0,22,0.372864,1.635367e-07,6.793986e-07,2.183396e-07,-0.380218,...,7.070273e-09,2.974614e-10,13.23829,2.051895,0.001359,3.407242e-11,6.625042e-09,8.399591e-11,201.578651,9.283318
1,sub-004,1,67,0,20,0.426927,1.872485e-07,8.207718e-07,2.51718e-07,-0.324383,...,4.785417e-09,2.567673e-10,7.084948,1.736381,0.002768,6.936999e-11,4.366202e-09,1.38756e-10,33.02651,3.377042
2,sub-005,0,70,0,22,0.388216,1.7027e-07,8.233415e-07,2.238063e-07,-0.225915,...,6.679231e-09,3.029934e-10,4.48617,1.493655,0.002275,5.700572e-11,3.826836e-09,1.248895e-10,21.44859,3.514905
3,sub-006,1,61,0,14,0.461865,2.025725e-07,8.538367e-07,2.810644e-07,-0.198528,...,1.116373e-08,1.111641e-09,3.09288,1.959567,0.002048,5.132567e-11,1.632791e-09,8.924213e-11,3.113488,1.85658
4,sub-007,1,79,0,20,0.37557,1.647235e-07,7.22185e-07,2.149286e-07,-0.361349,...,1.5936e-09,2.140519e-10,0.923487,1.236433,0.001548,3.879602e-11,7.948744e-10,5.813473e-11,2.702966,1.784417


Save Training Dataset


In [19]:
filename = '3_tfr_bands_features_all.csv'
df_tfr_features_all.to_csv(parent_directory+'/Training Datasets/'+filename, index=False)