In [None]:
## create eeg feature set
## 1 epoch of eeg has 3000 datapoints
## Instead of calculating psd for all 3000 datapoints, divide into 100 x 30
## create 1D vector of length 30, each value corresponds to psd of 100 datapoints

In [1]:
import numpy as np
import os
import pickle
import pandas as pd

from scipy import signal, stats
import pywt
from statistics import mode 


In [2]:
def get_statistical_features(eeg):
     ## statistical features
    min_eeg = min(eeg)
    max_eeg = max(eeg)
    mean_eeg = np.mean(eeg)
    median_eeg = np.median(eeg)
    #mode_eeg = mode(eeg)
    first_quartile = np.percentile(eeg,25)
    third_quartile = np.percentile(eeg,75)
    iqr_eeg = stats.iqr(eeg)
    sd_eeg = np.std(eeg)
    return [min_eeg,max_eeg,mean_eeg,median_eeg,first_quartile,third_quartile,iqr_eeg,sd_eeg]

In [3]:
def get_eeg_feature_set(x,pid,day):
    df = []
    label=[]
    epoch = []
    
    for k in x['wake'].keys():
        label.append('wake')
        epoch.append(k)
        df2=[]
        sig = x['wake'][k]
        snippet = [sig[i:i+600] for i in range(0,len(sig),600)]
        #for i in range(0,100,len(sig)):
         #   snippet = sig[i:i+100]
        for s in snippet:
            stat_feat = get_statistical_features(s)
            #df2.extend([psd[0],psd[1],psd[2],psd[3],psd[4],psd_r1,psd_r2])
            df2.extend([stat_feat[0],stat_feat[1],stat_feat[2],stat_feat[3],stat_feat[4],
                       stat_feat[5],stat_feat[6],stat_feat[7]])
        df.append(df2)
        
    for k in x['sleep_stage_1'].keys():
        label.append('sleep_stage_1')
        epoch.append(k)
        df2=[]
        sig = x['sleep_stage_1'][k]
        snippet = [sig[i:i+600] for i in range(0,len(sig),600)]
        #for i in range(0,100,len(sig)):
         #   snippet = sig[i:i+100]
        for s in snippet:
            stat_feat = get_statistical_features(s)
            #df2.extend([psd[0],psd[1],psd[2],psd[3],psd[4],psd_r1,psd_r2])
            df2.extend([stat_feat[0],stat_feat[1],stat_feat[2],stat_feat[3],stat_feat[4],
                       stat_feat[5],stat_feat[6],stat_feat[7]])
        df.append(df2)
    
        
    for k in x['sleep_stage_2'].keys():
        label.append('sleep_stage_2')
        epoch.append(k)
        df2=[]
        sig = x['sleep_stage_2'][k]
        snippet = [sig[i:i+600] for i in range(0,len(sig),600)]
        #for i in range(0,100,len(sig)):
         #   snippet = sig[i:i+100]
        for s in snippet:
            stat_feat = get_statistical_features(s)
            #df2.extend([psd[0],psd[1],psd[2],psd[3],psd[4],psd_r1,psd_r2])
            df2.extend([stat_feat[0],stat_feat[1],stat_feat[2],stat_feat[3],stat_feat[4],
                       stat_feat[5],stat_feat[6],stat_feat[7]])
        df.append(df2)
    
    #df = pd.DataFrame(df,columns=['patient_id','day','epoch','delta','theta','alpha','beta','gamma','label',])        
    df = pd.DataFrame(df)
    df['patient_id'] = pid
    df['day'] = day
    epoch = pd.DataFrame(epoch,columns=['epoch'])
    label = pd.DataFrame(label,columns=['class'])
    df = pd.concat([df,epoch,label],axis=1)
    return df

In [4]:
#file_loc = r"C:\Users\BOL1KOR\Desktop\BCI Project\eeg_pickle_files"
file_location='C:/Users/DAR9KOR/Desktop/data/HEALTHCARE/EEG/data/eeg_epochs_dec12_2018/raw_data' 
os.chdir(file_location)

files = os.listdir(file_location)
file_list = [f for f in files if f.endswith('.pickle')]
#print(file_list)

In [None]:
## for 1 segment of 1 subject

#file_val = 'SC4001E0-EEG.pickle'
#pid = file_val[2:5]
#day = file_val[5:6]
#with open(file_val ,'rb') as f:
   # x = pickle.load(f)
    #dat = get_eeg_feature_set(x,pid,day)
#f.close()



In [5]:
df_features = pd.DataFrame()

for file_val in file_list:
    print(file_val)
    patient_id = file_val[2:5]
    day = file_val[5:6]
    
    with open(file_val,'rb') as f:
        x = pickle.load(f)
        data = get_eeg_feature_set(x,patient_id,day)
    f.close()
    
    df_features = pd.concat([df_features,data], ignore_index = True)

SC4001E0-PSG-EEG.pickle
SC4002E0-PSG-EEG.pickle
SC4011E0-PSG-EEG.pickle
SC4012E0-PSG-EEG.pickle
SC4021E0-PSG-EEG.pickle
SC4022E0-PSG-EEG.pickle
SC4031E0-PSG-EEG.pickle
SC4032E0-PSG-EEG.pickle
SC4041E0-PSG-EEG.pickle
SC4042E0-PSG-EEG.pickle
SC4051E0-PSG-EEG.pickle
SC4052E0-PSG-EEG.pickle
SC4061E0-PSG-EEG.pickle
SC4062E0-PSG-EEG.pickle
SC4071E0-PSG-EEG.pickle
SC4072E0-PSG-EEG.pickle
SC4081E0-PSG-EEG.pickle
SC4082E0-PSG-EEG.pickle
SC4091E0-PSG-EEG.pickle
SC4092E0-PSG-EEG.pickle
SC4101E0-PSG-EEG.pickle
SC4102E0-PSG-EEG.pickle
SC4111E0-PSG-EEG.pickle
SC4112E0-PSG-EEG.pickle
SC4121E0-PSG-EEG.pickle
SC4122E0-PSG-EEG.pickle
SC4131E0-PSG-EEG.pickle
SC4141E0-PSG-EEG.pickle
SC4142E0-PSG-EEG.pickle
SC4151E0-PSG-EEG.pickle
SC4152E0-PSG-EEG.pickle
SC4161E0-PSG-EEG.pickle
SC4162E0-PSG-EEG.pickle
SC4171E0-PSG-EEG.pickle
SC4172E0-PSG-EEG.pickle
SC4181E0-PSG-EEG.pickle
SC4182E0-PSG-EEG.pickle
SC4191E0-PSG-EEG.pickle
SC4192E0-PSG-EEG.pickle


In [6]:
# write to file
#df_features.to_excel("eeg_statistical_feature_vector.xlsx")

pickle.dump(df_features, open('eeg_statistical_feature_vector.pkl','wb'))

In [7]:
df_features.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,34,35,36,37,38,39,patient_id,day,epoch,class
0,-121.576557,76.190476,-0.416508,1.687912,-17.230769,24.240293,41.471062,36.58704,-114.637363,156.553846,...,6.245118,4.547985,-13.737729,25.013919,38.751648,31.30439,400,1,s1e1,wake
1,-94.945055,33.054945,-0.898657,0.703297,-6.704762,7.454945,14.159707,16.226144,-67.750916,40.275458,...,-1.548972,-4.547985,-25.295238,23.888645,49.183883,32.831691,400,1,s1e2,wake
2,-69.532601,77.221978,0.975082,-0.984615,-10.127473,12.635897,22.76337,21.954602,-62.874725,79.5663,...,0.635624,3.000733,-14.792674,20.700366,35.49304,27.585531,400,1,s1e3,wake
3,-58.561172,50.027839,-2.881641,-2.391209,-18.754579,13.761172,32.515751,22.161343,-43.651282,33.805128,...,-0.064078,0.468864,-11.229304,11.698168,22.927473,15.383105,400,1,s1e4,wake
4,-48.621245,37.74359,-0.967736,1.734799,-14.816117,13.034432,27.850549,18.337279,-33.336264,34.461538,...,0.517158,0.750183,-11.838828,10.549451,22.388278,21.240018,400,1,s1e5,wake


In [10]:
df_features.columns

Index([           0,            1,            2,            3,            4,
                  5,            6,            7,            8,            9,
                 10,           11,           12,           13,           14,
                 15,           16,           17,           18,           19,
                 20,           21,           22,           23,           24,
                 25,           26,           27,           28,           29,
                 30,           31,           32,           33,           34,
                 35,           36,           37,           38,           39,
       'patient_id',        'day',      'epoch',      'class'],
      dtype='object')