# Feature Extraction using PSD for Data Visualization with Tableau Tool

**Description**:\
Prepare the training dataset by using PSD Method from MNE and calculate the best Features for the Classifier Model.

**Author**: Elmo Chavez\
**Date**: October 11, 2023

## Libraries

In [1]:
import pandas as pd
import numpy as np
import sys
import os
import matplotlib.pyplot as plt

path_eeg_mne = os.path.abspath(os.path.join(os.path.dirname('eeg_mne.py'), '..'))
sys.path.append(path_eeg_mne)
import eeg_mne

## Read the Data

In [2]:
# EEG Dataset Path
path = '../ds004504/derivatives'

# Training Directory path
path_training = '../Training Dataset/'
file_part_selected = 'Participants_Selected.csv'

# Read the Preselected Datasets
df_participants_selected = pd.read_csv(path_training+file_part_selected)
subs_selected = df_participants_selected['participant_id'].to_list()
df_participants_selected.head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,time_max,points,sfreq,flag
0,sub-001,0,57,0,16,599.798,299900,500.0,True
1,sub-002,0,78,0,22,793.098,396550,500.0,True
2,sub-003,1,70,0,14,306.098,153050,500.0,False
3,sub-004,0,67,0,20,706.098,353050,500.0,True
4,sub-005,1,70,0,22,804.098,402050,500.0,True


Preselect only 3 Subjects from each Group

In [3]:
# Subjects Preselected
df_participants_selected[df_participants_selected['flag']==True].groupby('Group').head(3)

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,time_max,points,sfreq,flag
0,sub-001,0,57,0,16,599.798,299900,500.0,True
1,sub-002,0,78,0,22,793.098,396550,500.0,True
3,sub-004,0,67,0,20,706.098,353050,500.0,True
65,sub-066,1,73,1,20,549.758,274880,500.0,True
66,sub-067,1,66,1,24,643.098,321550,500.0,True
67,sub-068,1,78,1,25,573.098,286550,500.0,True


In [4]:
# Subjects with Healthy Control
df_participants_selected[df_participants_selected['Group']==2].head()

Unnamed: 0,participant_id,Gender,Age,Group,MMSE,time_max,points,sfreq,flag
36,sub-037,1,57,2,30,777.098,388550,500.0,False
37,sub-038,1,62,2,30,891.398,445700,500.0,False
38,sub-039,1,70,2,30,850.078,425040,500.0,False
39,sub-040,1,61,2,30,965.738,482870,500.0,False
40,sub-041,0,77,2,30,886.098,443050,500.0,False


In [5]:
# Select only AD and FTD datasets
list_subjects_id = df_participants_selected[df_participants_selected['flag']==True].groupby('Group')['participant_id'].head(3).to_list()

# Add the Healthy subjects to compare with the rest 
list_subjects_id.extend(df_participants_selected[df_participants_selected['Group']==2]['participant_id'].head(3).to_list())
list_subjects_id

['sub-001',
 'sub-002',
 'sub-004',
 'sub-066',
 'sub-067',
 'sub-068',
 'sub-037',
 'sub-038',
 'sub-039']

Read only the Preselected Subjects as Raw Datasets

In [6]:
path = '../ds004504/derivatives/'
raw_list = eeg_mne.EEG_Raw_Data_List(path, list_subjects_id)

EEG Raw Data readed: 9


In [7]:
raw_list[0]

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,22 points
Good channels,19 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available
Sampling frequency,500.00 Hz
Highpass,0.00 Hz
Lowpass,250.00 Hz


## Dataset for Visualization Insight

In [8]:
list_subs_df = list()
for raw, sub_id in zip(raw_list, list_subjects_id):
    df = eeg_mne.Raw_by_Freq_Bands(raw, tmax=480, sub_id=sub_id)
    list_subs_df.append(df)

print(len(list_subs_df))

9


In [9]:
for df in list_subs_df:
    print(df.shape)

(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)
(1200005, 22)


In [11]:
df_all_freq_bands = pd.concat(list_subs_df, ignore_index=True)
df_all_freq_bands.shape

(10800045, 22)

## Save Result

In [18]:
path_result = '../Vizualization Datasets/'
filename = 'Raws_by_Frequency_Bands.csv'
df_all_freq_bands.to_csv(path_result+filename, index=False)

This dataset won't be uploaded to the repository due to long space storage required.