In [195]:
from pandas import Series
from pandas import DataFrame
import pandas as pd
from matplotlib import pyplot
import os
import matplotlib.pyplot as plt
from pandas.plotting import parallel_coordinates
import numpy as np
from numpy.lib.stride_tricks import as_strided
import scipy.stats as stats

#  ML
from sklearn.model_selection import KFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
import seaborn as sns

from sklearn.model_selection import cross_val_score
import sklearn.model_selection as ms

In [196]:
# test rolling windows

# overlapping function  
def windowed_view(arr, window, overlap): 
    arr = np.asarray(arr)
    window_step = window - overlap
    new_shape = arr.shape[:-1] + ((arr.shape[-1] - overlap) // window_step,
                                 window)
    new_strides = (arr.strides[:-1] + (window_step * arr.strides[-1],) +
                   arr.strides[-1:])
    return as_strided(arr, shape=new_shape, strides=new_strides)

# get processed date per channels per group funtion
def overlapData_pergroup_perChannels(groupIndex,channel,windowSize,overlap,dataFrameArr):
    # single channel
    signleGroup = dataFrameArr[groupIndex]
    c1Arr = signleGroup[channel].to_numpy()
    frame2Arr = c1Arr
    # print(frame2Arr)
    series_overlap = windowed_view(frame2Arr,windowSize,overlap) 
    print('The length of this data is ',len(series_overlap))
    return series_overlap



In [197]:
# read data from csv 
def preProcess_ForEach_Participant(fileName):
    df = pd.read_csv(fileName)
    # group data
    groupData = df.groupby(['Label'])
    groupData.first()
    dataFrameArr = []
    for name,group in groupData:
        dataFrameArr.append(group)
    return dataFrameArr




In [198]:
def get_features_each_channel(dataFrameArr,processedChannel,windowsize_group,overlapping):
    C1_2 = overlapData_pergroup_perChannels(1,processedChannel,windowsize_group,overlapping,dataFrameArr)
    C1_3 = overlapData_pergroup_perChannels(2,processedChannel,windowsize_group,overlapping,dataFrameArr)
    C1_4 = overlapData_pergroup_perChannels(3,processedChannel,windowsize_group,overlapping,dataFrameArr)
    C1_5 = overlapData_pergroup_perChannels(4,processedChannel,windowsize_group,overlapping,dataFrameArr)
    C1_6 = overlapData_pergroup_perChannels(5,processedChannel,windowsize_group,overlapping,dataFrameArr)
    C1_8 = overlapData_pergroup_perChannels(7,processedChannel,windowsize_group,overlapping,dataFrameArr)

    processed_Arr = []
    processed_Arr.append(C1_2)
    processed_Arr.append(C1_3)
    processed_Arr.append(C1_4)
    processed_Arr.append(C1_5)
    processed_Arr.append(C1_6)
    processed_Arr.append(C1_8)
    # print(len(processed_Arr))

    # mean
    mean_arr = []
    for data in processed_Arr:
        win_avg = np.mean(data,axis=-1)
        # print(len(win_avg))
        mean_arr.append(win_avg)

    # std
    # standard deviation
    std_arr = []
    for data in processed_Arr:
        win_std = np.std(data,axis=-1)
        # print(len(win_std))
        std_arr.append(win_std)

    # peak
    peak_arr = []
    for data in processed_Arr:
        win_max = np.max(data,axis=-1)
        # print(len(win_max))
        peak_arr.append(win_max)

    # AUC
    AUC_arr = []
    for data in processed_Arr:
        win_abc = np.abs(data)
        win_sum = np.sum(win_abc,axis=-1)
        # print(len(win_sum))
        AUC_arr.append(win_sum)

    # skewness
    skew_arr = []
    for data in processed_Arr:
        win_skew = stats.skew(data,axis=-1)
        # print(win_skew)
        skew_arr.append(win_skew)

    # Kurt
    kurt_arr = []
    for data in processed_Arr:
        win_kurt = stats.kurtosis(data,axis=-1)
        # print(win_skew)
        kurt_arr.append(win_kurt)

    # combination
    mean_com = np.concatenate([mean_arr[0],mean_arr[1],mean_arr[2],mean_arr[3],mean_arr[4],mean_arr[5]])
    std_com = np.concatenate([std_arr[0],std_arr[1],std_arr[2],std_arr[3],std_arr[4],std_arr[5]])
    peak_com = np.concatenate([peak_arr[0],peak_arr[1],peak_arr[2],peak_arr[3],peak_arr[4],peak_arr[5]])
    AUC_com = np.concatenate([AUC_arr[0],AUC_arr[1],AUC_arr[2],AUC_arr[3],AUC_arr[4],AUC_arr[5]])
    kurt_com = np.concatenate([kurt_arr[0],kurt_arr[1],kurt_arr[2],kurt_arr[3],kurt_arr[4],kurt_arr[5]])
    skew_com = np.concatenate([skew_arr[0],skew_arr[1],skew_arr[2],skew_arr[3],skew_arr[4],skew_arr[5]])
    Class_G2 = np.full(len(mean_arr[0]),2)
    Class_G3 = np.full(len(mean_arr[1]),3)
    Class_G4 = np.full(len(mean_arr[2]),4)
    Class_G5 = np.full(len(mean_arr[3]),5)
    Class_G7 = np.full(len(mean_arr[4]),7)
    Class_G8 = np.full(len(mean_arr[5]),8)
    class_allGroup = np.concatenate([Class_G2,Class_G3,Class_G4,Class_G5,Class_G7,Class_G8])

    Class_G2 = np.full(len(mean_arr[0]),1)
    Class_G3 = np.full(len(mean_arr[1]),1)
    Class_G4 = np.full(len(mean_arr[2]),2)
    Class_G5 = np.full(len(mean_arr[3]),2)
    Class_G7 = np.full(len(mean_arr[4]),1)
    Class_G8 = np.full(len(mean_arr[5]),1)
    class_allGroup2 = np.concatenate([Class_G2,Class_G3,Class_G4,Class_G5,Class_G7,Class_G8])

    Class_G2 = np.full(len(mean_arr[0]),1)
    Class_G3 = np.full(len(mean_arr[1]),1)
    Class_G4 = np.full(len(mean_arr[2]),2)
    Class_G5 = np.full(len(mean_arr[3]),2)
    Class_G7 = np.full(len(mean_arr[4]),3)
    Class_G8 = np.full(len(mean_arr[5]),3)
    class_allGroup3 = np.concatenate([Class_G2,Class_G3,Class_G4,Class_G5,Class_G7,Class_G8])

    Class_G2 = np.full(len(mean_arr[0]),1)
    Class_G3 = np.full(len(mean_arr[1]),2)
    Class_G4 = np.full(len(mean_arr[2]),2)
    Class_G5 = np.full(len(mean_arr[3]),3)
    Class_G7 = np.full(len(mean_arr[4]),3)
    Class_G8 = np.full(len(mean_arr[5]),4)
    class_allGroup4 = np.concatenate([Class_G2,Class_G3,Class_G4,Class_G5,Class_G7,Class_G8])

    # Normalization
    mean_com_z = stats.zscore(mean_com)
    std_com_z = stats.zscore(std_com)
    peak_com_z = stats.zscore(peak_com)
    AUC_com_z = stats.zscore(AUC_com)
    kurt_com_z = stats.zscore(kurt_com)
    skew_com_z = stats.zscore(skew_com)

    # convert to dataframe
    rawData = {'Group':class_allGroup, 'Mean':mean_com,'Std':std_com,'Peak':peak_com,'AUC':AUC_com,'Kurt':kurt_com,'Skew':skew_com}
    df_features = pd.DataFrame(rawData)
    df_features['Group2'] = class_allGroup2
    df_features['Group3'] = class_allGroup3
    df_features['Group4'] = class_allGroup4
    # print(df_features.head())
    # could but not output yet

    zData = {'Group':class_allGroup, 'Mean':mean_com_z,'Std':std_com_z,'Peak':peak_com_z,'AUC':AUC_com_z,'Kurt':kurt_com_z,'Skew':skew_com_z}
    df_features_z = pd.DataFrame(zData)
    df_features_z['Group2'] = class_allGroup2
    df_features_z['Group3'] = class_allGroup3
    df_features_z['Group4'] = class_allGroup4
    print(df_features_z)

    return df_features_z




In [199]:
participant = 3
fileName = './csv/p'+str(participant) +'.csv'
windowsize_group = 235 # 235 # 312
percentage = 0.85
overlapping = int(windowsize_group* percentage) #227 150 80 188 250
processedData = preProcess_ForEach_Participant(fileName)

saveFileName = './results/'+str(participant)+'/'

In [200]:
# get each channel for one participant

featuresAllChannels =[]
for index in range(1,19):
    # print("index is ------------",index)
    processedChannel = 'C'+str(index)
    featureData_perChannel = get_features_each_channel(processedData,processedChannel,windowsize_group,overlapping)
    featuresAllChannels.append(featureData_perChannel)
   
print(len(featuresAllChannels))


aN   NaN   NaN       1       3       4
199      8   NaN  NaN   NaN  NaN   NaN   NaN       1       3       4
200      8   NaN  NaN   NaN  NaN   NaN   NaN       1       3       4

[201 rows x 10 columns]
The length of this data is  28
The length of this data is  30
The length of this data is  44
The length of this data is  36
The length of this data is  26
The length of this data is  37
     Group      Mean       Std      Peak       AUC      Kurt      Skew  \
0        2  2.141697  0.517405  0.751332  0.980876 -0.407804 -1.023234   
1        2  1.255357  1.141245  0.751332  1.290319 -0.788653 -0.912167   
2        2  0.852644  0.933920  0.751332  0.959057 -0.591026 -0.768243   
3        2  0.617345  0.728289  0.749533  0.750439 -0.475531 -1.013692   
4        2 -0.002946  0.472700 -0.124942  0.438521 -0.673175 -0.896690   
..     ...       ...       ...       ...       ...       ...       ...   
196      8  1.323524 -0.772745 -0.419019 -0.380257 -0.805539 -0.545986   
197      8  1.587370

In [201]:
# c3 c8 c1 LF, #c12 c11 c17 Rf
C3 = featuresAllChannels[3]
C3R = C3.rename(columns={"Group":"Group6", "Mean":"Mean3","Std":"Std3","Peak":"Peak3","AUC":"AUC3","Kurt":"Kurt3", "Skew":"Skew3"})
# print(C3R)
C8 = featuresAllChannels[8]
C8R = C8.rename(columns={"Group":"Group6", "Mean":"Mean8","Std":"Std8","Peak":"Peak8","AUC":"AUC8","Kurt":"Kurt8", "Skew":"Skew8"})
C8RS = C8R.drop(['Group6', 'Group4','Group3','Group2'], axis=1)
# print(C8R)
# print(C8RS)

C1 = featuresAllChannels[1]
C1R = C1.rename(columns={"Group":"Group6", "Mean":"Mean1","Std":"Std1","Peak":"Peak1","AUC":"AUC1","Kurt":"Kurt1", "Skew":"Skew1"})
C1RS = C1R.drop(['Group6', 'Group4','Group3','Group2'], axis=1)
# print(C1R)
# print(C1RS)
# print(C1R)
CLF = pd.concat([C8RS,C1RS,C3R],axis=1, )
# print(CLF)

# dividerLF = 3
# if C3R['Mean3'].isnull().values.any():
#     dividerLF = dividerLF-1
# if C8R['Mean8'].isnull().values.any():
#     dividerLF = dividerLF-1
# if C1R['Mean1'].isnull().values.any():
#     dividerLF = dividerLF-1

CLF["Mean_R"] = CLF[['Mean1','Mean3','Mean8']].mean(axis=1) 
CLF["Std_R"] = CLF[['Std1','Std3','Std8']].mean(axis=1) 
CLF["Peak_R"] = CLF[['Peak1','Peak3','Peak8']].mean(axis=1) 
CLF["AUC_R"] = CLF[['AUC1','AUC3','AUC8']].mean(axis=1) 
CLF["Kurt_R"] = CLF[['Kurt1','Kurt3','Kurt8']].mean(axis=1) 
CLF["Skew_R"] = CLF[['Skew1','Skew3','Skew8']].mean(axis=1) 
# print(dividerLF)
# print(CLF)

# LF = 
# CLF = 

C12 = featuresAllChannels[12]
C12R = C12.rename(columns={"Group":"Group6", "Mean":"Mean12","Std":"Std12","Peak":"Peak12","AUC":"AUC12","Kurt":"Kurt12", "Skew":"Skew12"})
# print(C12R)
C11 = featuresAllChannels[11]
C11R = C11.rename(columns={"Group":"Group6", "Mean":"Mean11","Std":"Std11","Peak":"Peak11","AUC":"AUC11","Kurt":"Kurt11", "Skew":"Skew11"})
C11RS = C11R.drop(['Group6', 'Group4','Group3','Group2'], axis=1)
# print(C11R)
C17 = featuresAllChannels[17]
C17R = C17.rename(columns={"Group":"Group6", "Mean":"Mean17","Std":"Std17","Peak":"Peak17","AUC":"AUC17","Kurt":"Kurt17", "Skew":"Skew17"})
C17RS = C17R.drop(['Group6', 'Group4','Group3','Group2'], axis=1)

CRF = pd.concat([C11RS,C17RS,C12R],axis=1, )
# print(CRF)

# dividerRF = 3
# if C12R['Mean12'].isnull().values.any():
#     dividerRF = dividerRF-1
# if C11R['Mean11'].isnull().values.any():
#     dividerRF = dividerRF-1
# if C17R['Mean17'].isnull().values.any():
#     dividerRF = dividerRF-1

# print(dividerRF)

CRF["Mean_R"] = CRF[['Mean11','Mean17','Mean12']].mean(axis=1) 
CRF["Std_R"] = CRF[['Std11','Std17','Std12']].mean(axis=1) 
CRF["Peak_R"] = CRF[['Peak11','Peak17','Peak12']].mean(axis=1) 
CRF["AUC_R"] = CRF[['AUC11','AUC17','AUC12']].mean(axis=1) 
CRF["Kurt_R"] = CRF[['Kurt11','Kurt17','Kurt12']].mean(axis=1) 
CRF["Skew_R"] = CRF[['Skew11','Skew17','Skew12']].mean(axis=1) 

# print(CRF)




In [202]:
addLeft = CLF[['Mean_R','Std_R','Peak_R','AUC_R','Kurt_R','Skew_R']]
addLeft = addLeft.rename(columns={"Mean_R":"Mean_L", "Std_R":"Std_L","Peak_R":"Peak_L","AUC_R":"AUC_L","Kurt_R":"Kurt_L","Skew_R":"Skew_L"})

allFeaturers = pd.concat([addLeft,CRF],axis=1)
print(allFeaturers.summary())

AttributeError: 'DataFrame' object has no attribute 'summary'

In [0]:
from sklearn.model_selection import cross_validate
# cross validation in the other way
clf = svm.SVC(gamma=2, C=1)

X = allFeaturers[['Mean_R','Std_R','Peak_R','AUC_R','Kurt_R','Skew_R','Mean_L','Std_L','Peak_L','AUC_L','Kurt_L','Skew_L']]
y = allFeaturers['Group3']


scoring = {'acc': 'accuracy',
           'prec_macro': 'precision_macro',
           'rec_micro': 'recall_macro'}
scores = cross_validate(clf, X, y, scoring=scoring,
                         cv = ms.KFold(shuffle = True,n_splits=10), return_train_score=True)
print(scores.keys())
print("ACC ",scores['test_acc'].mean())  
print("REC ",scores['test_rec_micro'].mean())  
print("Pre ",scores['test_prec_macro'].mean())  

# print(scores)
