# Feature Extraction - Statistical, TFR & PSD Approaches


- Author: Elmo Chavez
- Date: 01-Aug-2023


## Libraries


In [1]:
import pandas as pd
import numpy as np
import mne
import os

Import file with fuctions to get features.


In [2]:
import features_eeg as feeg

## Read the Data


Select folder with the Datasets


In [3]:
from tkinter import Tk
from tkinter.filedialog import askdirectory

root = Tk()
root.withdraw()

path = askdirectory()

root.quit()
root.destroy()

2023-08-04 09:41:39.798 python[16905:316750] +[CATransaction synchronize] called within transaction


Read Participant's info


In [4]:
filename_part = 'participants.tsv'
df_partipants = pd.read_csv(path+'/'+filename_part, sep='\t')
df_partipants.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,F,57,A,16
1,sub-002,F,78,A,22
2,sub-003,M,70,A,14
3,sub-004,F,67,A,20
4,sub-005,M,70,A,22


In [5]:
df_partipants_ad_ftd = df_partipants[df_partipants['Group'].isin(['A','F'])]

# Encoding Gender, Groups (Classes)
gender_dict = {'M':0, 'F':1}
group_dict = {'A':0, 'F':1}

df_partipants_ad_ftd['Gender'].replace(gender_dict, inplace=True)
df_partipants_ad_ftd['Group'].replace(group_dict, inplace=True)

#df_partipants_ad_ftd['gender'] = df_partipants_ad_ftd['Gender'].replace(gender_dict)
#df_partipants_ad_ftd['group'] = df_partipants_ad_ftd['Group'].replace(group_dict)

print('Only Alzheimer and Fronto Temporal Dementia datasets:',len(df_partipants_ad_ftd))
df_partipants_ad_ftd.head()

Only Alzheimer and Fronto Temporal Dementia datasets: 59


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_partipants_ad_ftd['Gender'].replace(gender_dict, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_partipants_ad_ftd['Group'].replace(group_dict, inplace=True)


Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,1,57,0,16
1,sub-002,1,78,0,22
2,sub-003,0,70,0,14
3,sub-004,1,67,0,20
4,sub-005,0,70,0,22


Reading files


In [6]:
items = os.listdir(path+'/derivatives/')
dir_list = [item for item in items if os.path.isdir(os.path.join(path, item))]
dir_list.sort()

print('Subjects found:',len(dir_list))
print('Subjects:', dir_list[:5])

eeg_data = []

for l in dir_list:
  folder = path+'/derivatives/'+l+'/eeg/'
  f = l+'_task-eyesclosed_eeg.set'
  #print(l)
  r = mne.io.read_raw_eeglab(folder+f, preload=False, verbose='CRITICAL')
  participant_info = {'id': l}
  r.info['subject_info'] = participant_info
  eeg_data.append(r)

print('EEG Files Loaded:',len(eeg_data))

Subjects found: 88
Subjects: ['sub-001', 'sub-002', 'sub-003', 'sub-004', 'sub-005']
EEG Files Loaded: 88


Preselect only AD & FTD with a least 600 seconds of time.

In [7]:
eeg_data_filtered = []
list_participants_id = []

for r in eeg_data:
  # Filtering EEG Data from only Alzheimer and Fronto Temporal Dementia subjects
  if r.info['subject_info']['id'] in df_partipants_ad_ftd['participant_id'].tolist():
    # Filtering EEG Datasets with at least 600 seconds of data recorded.
    if r.tmax >= 600:
      eeg_data_filtered.append(r)
      list_participants_id.append(r.info['subject_info']['id'])

df_partipants_ad_ftd = df_partipants_ad_ftd[df_partipants_ad_ftd['participant_id'].isin(list_participants_id)]
df_partipants_ad_ftd.reset_index(inplace=True, drop=True)

print('Participants Info:',len(df_partipants_ad_ftd))
print('EEG Data Subjects:',len(eeg_data_filtered))
df_partipants_ad_ftd.head()

Participants Info: 51
EEG Data Subjects: 51


Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-002,1,78,0,22
1,sub-004,1,67,0,20
2,sub-005,0,70,0,22
3,sub-006,1,61,0,14
4,sub-007,1,79,0,20


In [8]:
del df_partipants, eeg_data, items, dir_list

# Epochs

In [9]:
sfreq = eeg_data_filtered[0].info['sfreq']

start_time = 0
end_time = 600

duration = 60.0
overlapping = 20.0

epochs = []

for raw in eeg_data_filtered:
  ep = mne.make_fixed_length_epochs(raw.copy().crop(tmin=start_time, tmax=end_time),
                                    duration=duration, overlap=overlapping, preload=True, verbose='CRITICAL')
  epochs.append(ep)

print('Epochs created:', len(epochs[0].events))
print('Subjects with Epochs:', len(epochs))

Epochs created: 14
Subjects with Epochs: 51


# Feature Extraction

## Statistical Approach

In [10]:
print(epochs[0].ch_names)

['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz']


## TFR Approach

### Channel: FP1

In [12]:
channels = ['Fp1']
n_cycles = 5
frequencies = np.arange(1, 46, 1)# All frequencies between 1 - 45Hz
method = 'morlet'

subject_features = []
for ep in epochs:
    features = feeg.TFR_Features(ep.copy().pick(channels), freqs=frequencies, n_cycles=n_cycles, method='morlet')
    subject_features.append(features[0])
    
len(subject_features)

51

In [13]:
columns=subject_features[0].dtype.names
df_tfr_features = pd.DataFrame.from_records([item[0] for item in subject_features], columns=columns)
df_tfr_features.head()

Unnamed: 0,Fp1_mean,Fp1_standard_deviation,Fp1_variance,Fp1_peak_to_peak,Fp1_skewness,Fp1_kurtosis
0,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09
1,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09
2,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09
3,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09
4,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09


Save dataframe

In [15]:
filename = '3_tfr_'+method+'_fp1.csv'
df_tfr_features.to_csv(os.getcwd()+'/Training Datasets/'+filename, index=False)

### Channels: All

In [16]:
n_cycles = 5
frequencies = np.arange(1, 46, 1)# All frequencies between 1 - 45Hz
method = 'morlet'

subject_features = []
for ep in epochs:
    features = feeg.TFR_Features(ep, freqs=frequencies, n_cycles=n_cycles, method=method)
    subject_features.append(features[0])
    
len(subject_features)

51

In [17]:
columns=subject_features[0].dtype.names
df_tfr_features_all = pd.DataFrame.from_records([item[0] for item in subject_features], columns=columns)
df_tfr_features_all.head()

Unnamed: 0,Fp1_mean,Fp2_mean,F3_mean,F4_mean,C3_mean,C4_mean,P3_mean,P4_mean,O1_mean,O2_mean,...,O2_kurtosis,F7_kurtosis,F8_kurtosis,T3_kurtosis,T4_kurtosis,T5_kurtosis,T6_kurtosis,Fz_kurtosis,Cz_kurtosis,Pz_kurtosis
0,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,...,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09,6.147795e-09
1,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,...,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09,7.081915e-09
2,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,...,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09,7.660626e-09
3,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,...,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09,7.503065e-09
4,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,...,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09,7.178072e-09


Save dataframe

In [18]:
filename = '3_tfr_'+method+'_all.csv'
df_tfr_features_all.to_csv(os.getcwd()+'/Training Datasets/'+filename, index=False)

## PSD Approach

### Channel: FP1

In [10]:
channels = ['Fp1']
method = 'multitaper'

subject_features = []
for ep in epochs:
    features = feeg.PSD_Features(ep.copy().pick(channels), 1, 45, method)
    subject_features.append(features[0])
    
len(subject_features)

51

In [11]:
columns=subject_features[0].dtype.names
df_psd_features_fp1 = pd.DataFrame.from_records(subject_features, columns=columns)
df_psd_features_fp1.head()

Unnamed: 0,Fp1_mean,Fp1_standard_deviation,Fp1_variance,Fp1_peak_to_peak,Fp1_skewness,Fp1_kurtosis
0,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09
1,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09
2,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09
3,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09
4,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09


Save to dataframe

In [13]:
filename = '3_psd_'+method+'_fp1.csv'
df_psd_features_fp1.to_csv(os.getcwd()+'/Training Datasets/'+filename, index=False)

### Channels: All

In [14]:
method = 'multitaper'

subject_features = []
for ep in epochs:
    features = feeg.PSD_Features(ep, 1, 45, method)
    subject_features.append(features[0])
    
len(subject_features)

51

In [17]:
columns=subject_features[0].dtype.names
df_psd_features_all = pd.DataFrame.from_records(subject_features, columns=columns)
df_psd_features_all.head()

Unnamed: 0,Fp1_mean,Fp2_mean,F3_mean,F4_mean,C3_mean,C4_mean,P3_mean,P4_mean,O1_mean,O2_mean,...,O2_kurtosis,F7_kurtosis,F8_kurtosis,T3_kurtosis,T4_kurtosis,T5_kurtosis,T6_kurtosis,Fz_kurtosis,Cz_kurtosis,Pz_kurtosis
0,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,...,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09,3.639793e-09
1,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,...,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09,4.138072e-09
2,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,...,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09,4.384113e-09
3,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,...,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09,4.591252e-09
4,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,...,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09,4.193681e-09


In [18]:
filename = '3_psd_'+method+'_all.csv'
df_psd_features_all.to_csv(os.getcwd()+'/Training Datasets/'+filename, index=False)