# Feature Extraction using PSD without Frequency Bands

**Description**:\
Prepare the training dataset by using PSD Method from MNE and calculate the best Features for the Classifier Model as the work done in [1. Preselect EEG Datasets](https://github.com/sobieddch90/mcd-udg-tfm-eeg-classification/blob/main/Feature%20Extraction/1.%20Preselect%20EEG%20Datasets.ipynb) but without using Frequency Bands.

**Author**: Elmo Chavez\
**Date**: November 25, 2023

## Libraries

In [1]:
import pandas as pd
import numpy as np
import sys
import os
import matplotlib.pyplot as plt

path_eeg_mne = os.path.abspath(os.path.join(os.path.dirname('eeg_mne.py'), '..'))
sys.path.append(path_eeg_mne)
import eeg_mne

## Read Datasets

Set the Dataset Path

In [2]:
path = '../ds004504/derivatives'

Participants Preselected

In [3]:
path_training = '../Training Dataset/'
file_part_selected = 'Participants_Selected.csv'

df_participants_selected = pd.read_csv(path_training+file_part_selected)
df_participants_selected = df_participants_selected[df_participants_selected['flag']==True]
subs_selected = df_participants_selected['participant_id'].to_list()
df_participants_selected.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,time_max,points,sfreq,flag
0,sub-001,0,57,0,16,599.798,299900,500.0,True
1,sub-002,0,78,0,22,793.098,396550,500.0,True
3,sub-004,0,67,0,20,706.098,353050,500.0,True
4,sub-005,1,70,0,22,804.098,402050,500.0,True
5,sub-006,0,61,0,14,632.398,316200,500.0,True


### Read All EEG Subjects using Windows (MNE.Epochs)

Get Windows from All the EEG Datasets as a List and All Channels

In [4]:
All_Subs_Windows = eeg_mne.Epochs_Objects_MultiEEGs(path, subs_selected)

EEG Raw Data readed: 44
Epochs (Windows) created for each EEG Data: 11


Show the Content for the first Windows Subject

In [5]:
All_Subs_Windows[0]

0,1
Number of events,11
Events,1: 11
Time range,0.000 – 59.998 s
Baseline,off


## Getting Features from All Subjects by using All the Channels

### Sample Features from One Subject

In [6]:
subsX1_features = eeg_mne.PSD_Features_from_Epochs(All_Subs_Windows[0])

Summary...
-Number of Windows: 11
-Number of Channels: 19
-Frequency Bands: 5
-Number of features computed: 6
--Total Features 6270


### Features for all the Subjects

In [7]:
list_sub_features = []
for i, item in enumerate(All_Subs_Windows):
    sub_features = eeg_mne.PSD_Features_from_Epochs_2(item, sub_id=subs_selected[i], show_summary=False)
    list_sub_features.append(sub_features)

df_feature_extraction_psd = pd.DataFrame(list_sub_features)
df_feature_extraction_psd.reset_index(inplace=True, drop=True)

# Show results
print('Feature Extraction PSD (All Channels) structure:', df_feature_extraction_psd.shape)
df_feature_extraction_psd.head()

Feature Extraction PSD (All Channels) structure: (44, 1046)


Unnamed: 0,participant_id,w0_Fp1_total_power,w0_Fp2_total_power,w0_F3_total_power,w0_F4_total_power,w0_C3_total_power,w0_C4_total_power,w0_P3_total_power,w0_P4_total_power,w0_O1_total_power,...,w10_O2_peak_to_peak,w10_F7_peak_to_peak,w10_F8_peak_to_peak,w10_T3_peak_to_peak,w10_T4_peak_to_peak,w10_T5_peak_to_peak,w10_T6_peak_to_peak,w10_Fz_peak_to_peak,w10_Cz_peak_to_peak,w10_Pz_peak_to_peak
0,sub-001,1.857574e-10,1.715044e-10,1.553671e-10,1.525915e-10,1.489609e-10,1.503204e-10,1.54856e-10,1.592022e-10,1.689394e-10,...,9.812095e-11,9.25418e-11,9.506062e-11,9.39234e-11,9.227297e-11,9.685663e-11,9.354449e-11,9.271965e-11,9.158701e-11,9.473923e-11
1,sub-002,1.424642e-10,1.449379e-10,1.426341e-10,1.484438e-10,1.362669e-10,1.367632e-10,1.477555e-10,1.472636e-10,1.640167e-10,...,9.223662e-11,8.940322e-11,9.460847e-11,9.036469e-11,9.230526e-11,9.441398e-11,9.321173e-11,9.156009e-11,9.139001e-11,9.067818e-11
2,sub-004,1.732468e-10,1.551228e-10,1.868775e-10,1.647622e-10,1.571475e-10,1.513328e-10,1.594483e-10,1.569024e-10,1.621243e-10,...,1.032073e-10,1.105177e-10,1.003569e-10,1.078318e-10,9.765776e-11,1.002962e-10,9.608198e-11,8.806349e-11,8.88593e-11,9.607722e-11
3,sub-005,2.007722e-10,1.533125e-10,1.696497e-10,1.694597e-10,1.595504e-10,1.597895e-10,1.699709e-10,1.682184e-10,1.711858e-10,...,1.159105e-10,1.197711e-10,1.043527e-10,1.109896e-10,1.046087e-10,1.114938e-10,1.064269e-10,1.043931e-10,1.112914e-10,1.087879e-10
4,sub-006,2.117331e-10,2.002724e-10,2.107966e-10,1.979755e-10,1.802383e-10,1.741444e-10,1.946204e-10,1.960456e-10,2.022729e-10,...,9.32406e-11,9.477698e-11,9.350182e-11,9.569112e-11,9.373533e-11,9.496822e-11,9.409168e-11,9.256822e-11,9.179038e-11,9.327637e-11


Adding additional features from participants

In [8]:
df_feature_extraction_psd = df_feature_extraction_psd.merge(df_participants_selected[['participant_id','Gender','Age','Group']], how='inner', on='participant_id')
df_feature_extraction_psd.tail()

Unnamed: 0,participant_id,w0_Fp1_total_power,w0_Fp2_total_power,w0_F3_total_power,w0_F4_total_power,w0_C3_total_power,w0_C4_total_power,w0_P3_total_power,w0_P4_total_power,w0_O1_total_power,...,w10_T3_peak_to_peak,w10_T4_peak_to_peak,w10_T5_peak_to_peak,w10_T6_peak_to_peak,w10_Fz_peak_to_peak,w10_Cz_peak_to_peak,w10_Pz_peak_to_peak,Gender,Age,Group
39,sub-084,1.424736e-10,1.337813e-10,1.470361e-10,1.451102e-10,1.421012e-10,1.362649e-10,1.513588e-10,1.477412e-10,1.507767e-10,...,7.729185e-11,7.015623e-11,6.894528e-11,6.822972e-11,6.947806e-11,6.614375e-11,6.764815e-11,0,71,1
40,sub-085,3.061583e-10,2.689046e-10,1.998686e-10,1.884589e-10,1.797993e-10,1.84718e-10,1.801466e-10,1.802067e-10,1.89953e-10,...,1.397911e-10,1.311803e-10,1.355792e-10,1.32189e-10,1.342118e-10,1.300826e-10,1.28185e-10,1,64,1
41,sub-086,4.239977e-10,4.62795e-10,3.766816e-10,1.22822e-09,1.95362e-10,2.107043e-10,2.164461e-10,1.983399e-10,3.638354e-10,...,1.580026e-10,1.321889e-10,3.050883e-10,3.894882e-10,1.391334e-10,1.589279e-10,3.152465e-10,1,49,1
42,sub-087,1.943504e-10,1.892681e-10,1.70063e-10,1.658963e-10,1.576892e-10,1.610719e-10,1.641199e-10,1.642195e-10,1.682132e-10,...,8.655511e-11,9.019565e-11,8.622377e-11,8.732775e-11,8.394204e-11,8.033266e-11,8.193399e-11,1,73,1
43,sub-088,2.068153e-10,2.124404e-10,2.027712e-10,1.959381e-10,1.860948e-10,1.83766e-10,1.914116e-10,1.917249e-10,2.08536e-10,...,9.490607e-11,9.470834e-11,9.948471e-11,1.080436e-10,9.775769e-11,9.688124e-11,9.743854e-11,1,55,1


### Save Features

In [9]:
path_training_dataset = '../Training Dataset/'
filename_features_psd = 'PSD_Features-All_Channels_no_FreqBands.csv'
df_feature_extraction_psd.to_csv(path_training_dataset+filename_features_psd, index=False)

## Getting Features from All Subjects by using only FP1 Channel

### Sample Features from One Subject

In [10]:
subsX1_features_fp1 = eeg_mne.PSD_Features_from_Epochs(All_Subs_Windows[0].copy().pick(['Fp1']))

Summary...
-Number of Windows: 11
-Number of Channels: 1
-Frequency Bands: 5
-Number of features computed: 6
--Total Features 330


### Features for all the Subjects with Fp1 Channel

In [11]:
list_sub_features_fp1 = []
for i, item in enumerate(All_Subs_Windows):
    sub_features = eeg_mne.PSD_Features_from_Epochs_2(item.copy().pick(['Fp1']), sub_id=subs_selected[i], show_summary=False)
    list_sub_features_fp1.append(sub_features)

df_feature_extraction_psd_fp1 = pd.DataFrame(list_sub_features_fp1)
df_feature_extraction_psd_fp1.reset_index(inplace=True, drop=True)

# Show results
print('Feature Extraction PSD (Fp1) structure:', df_feature_extraction_psd_fp1.shape)
df_feature_extraction_psd_fp1.head()

Feature Extraction PSD (Fp1) structure: (44, 56)


Unnamed: 0,participant_id,w0_Fp1_total_power,w1_Fp1_total_power,w2_Fp1_total_power,w3_Fp1_total_power,w4_Fp1_total_power,w5_Fp1_total_power,w6_Fp1_total_power,w7_Fp1_total_power,w8_Fp1_total_power,...,w1_Fp1_peak_to_peak,w2_Fp1_peak_to_peak,w3_Fp1_peak_to_peak,w4_Fp1_peak_to_peak,w5_Fp1_peak_to_peak,w6_Fp1_peak_to_peak,w7_Fp1_peak_to_peak,w8_Fp1_peak_to_peak,w9_Fp1_peak_to_peak,w10_Fp1_peak_to_peak
0,sub-001,1.857574e-10,1.948316e-10,2.060685e-10,1.662248e-10,1.944321e-10,1.969535e-10,1.874469e-10,1.73996e-10,1.98001e-10,...,1.090058e-10,1.166653e-10,8.962145e-11,1.092403e-10,1.097593e-10,1.054009e-10,9.573939e-11,1.145398e-10,9.599482e-11,1.041098e-10
1,sub-002,1.424642e-10,1.359549e-10,1.645943e-10,1.584541e-10,1.534958e-10,1.490433e-10,1.626773e-10,1.635715e-10,1.728898e-10,...,6.98639e-11,8.594488e-11,8.204958e-11,8.790972e-11,8.794643e-11,9.17786e-11,8.817886e-11,8.938846e-11,8.436417e-11,8.609361e-11
2,sub-004,1.732468e-10,2.14063e-10,1.985052e-10,2.103651e-10,1.507994e-10,1.521748e-10,1.705544e-10,1.910151e-10,2.104146e-10,...,1.268305e-10,1.106173e-10,1.248371e-10,8.254595e-11,8.489998e-11,1.052603e-10,1.13701e-10,1.337418e-10,1.043452e-10,9.469234e-11
3,sub-005,2.007722e-10,1.539208e-10,1.661521e-10,2.065676e-10,2.219515e-10,1.945157e-10,1.978618e-10,2.018495e-10,1.904321e-10,...,8.563951e-11,9.182207e-11,1.175106e-10,1.286069e-10,1.107821e-10,1.175773e-10,1.180435e-10,1.065615e-10,9.258535e-11,1.277758e-10
4,sub-006,2.117331e-10,1.939109e-10,1.777464e-10,1.796638e-10,1.811646e-10,1.763958e-10,2.010193e-10,2.119495e-10,2.139387e-10,...,1.044032e-10,9.625259e-11,9.537868e-11,9.767381e-11,8.845028e-11,1.026078e-10,1.112399e-10,1.047999e-10,8.052892e-11,9.140038e-11


Adding additional features from participants

In [12]:
df_feature_extraction_psd_fp1 = df_feature_extraction_psd_fp1.merge(df_participants_selected[['participant_id','Gender','Age','Group']], how='inner', on='participant_id')
df_feature_extraction_psd_fp1.tail()

Unnamed: 0,participant_id,w0_Fp1_total_power,w1_Fp1_total_power,w2_Fp1_total_power,w3_Fp1_total_power,w4_Fp1_total_power,w5_Fp1_total_power,w6_Fp1_total_power,w7_Fp1_total_power,w8_Fp1_total_power,...,w4_Fp1_peak_to_peak,w5_Fp1_peak_to_peak,w6_Fp1_peak_to_peak,w7_Fp1_peak_to_peak,w8_Fp1_peak_to_peak,w9_Fp1_peak_to_peak,w10_Fp1_peak_to_peak,Gender,Age,Group
39,sub-084,1.424736e-10,1.199984e-10,1.415234e-10,1.275687e-10,1.425373e-10,1.284932e-10,1.25039e-10,1.24497e-10,1.156987e-10,...,7.829739e-11,6.497999e-11,6.720975e-11,7.107629e-11,6.510702e-11,6.8008e-11,6.680846e-11,0,71,1
40,sub-085,3.061583e-10,3.675847e-10,2.729233e-10,2.365207e-10,2.451812e-10,2.955692e-10,3.720969e-10,2.243236e-10,2.221927e-10,...,1.262006e-10,1.586267e-10,1.899231e-10,1.225027e-10,1.144877e-10,1.356564e-10,1.509209e-10,1,64,1
41,sub-086,4.239977e-10,3.244957e-10,4.793038e-10,6.320889e-10,5.575417e-10,5.011688e-10,5.378486e-10,4.398371e-10,2.998827e-10,...,3.343851e-10,3.208517e-10,3.45087e-10,2.449123e-10,1.522879e-10,3.340264e-10,5.445242e-10,1,49,1
42,sub-087,1.943504e-10,1.941474e-10,1.988893e-10,1.937435e-10,1.975405e-10,2.019197e-10,2.000747e-10,1.981154e-10,2.139055e-10,...,1.204395e-10,1.232295e-10,1.245154e-10,1.07883e-10,1.220541e-10,1.270857e-10,9.141201e-11,1,73,1
43,sub-088,2.068153e-10,1.639495e-10,1.776092e-10,1.714845e-10,1.859389e-10,1.750707e-10,1.787806e-10,1.841715e-10,1.876831e-10,...,1.064834e-10,9.90377e-11,9.845374e-11,1.054194e-10,1.026852e-10,9.596785e-11,9.611898e-11,1,55,1


### Save Features

In [13]:
path_training_dataset = '../Training Dataset/'
filename_features_psd = 'PSD_Features-FP1_Channel_no_FreqBands.csv'
df_feature_extraction_psd_fp1.to_csv(path_training_dataset+filename_features_psd, index=False)