Link to Excel file of extracted features: https://drive.google.com/drive/u/1/folders/1ZMbW7HxNkBUAlnT-_lXLD4J_r0A778SJ

In [1]:
# import required packages
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler


In [2]:
# read in features csv file
df = pd.read_csv('ck_features.csv')
df

Unnamed: 0,Total Power,Abs LFP,Abs HFP,nLFP,nHFP,LF/HF,RMSSD,SDNN,label
0,11961.755298,3249.551607,8043.922163,0.271662,0.672470,0.403976,932.714731,54.825976,ck_pre
1,9211.352702,2022.717898,6829.397915,0.219590,0.741411,0.296178,803.437128,120.850089,ck_pre
2,9283.913716,2378.753676,6678.590063,0.256223,0.719372,0.356176,793.228617,162.223603,ck_pre
3,9994.154159,2431.402950,7008.622545,0.243283,0.701272,0.346916,865.589904,67.238339,ck_pre
4,9597.894148,1897.800009,7150.309660,0.197731,0.744987,0.265415,852.451001,70.600140,ck_pre
...,...,...,...,...,...,...,...,...,...
283,1160.095357,223.552939,813.610805,0.192702,0.701331,0.274766,735.611693,147.849426,ck_med
284,1420.536153,275.481168,1015.442748,0.193928,0.714831,0.271292,766.606858,156.710024,ck_med
285,1393.953672,196.719824,1092.517596,0.141124,0.783755,0.180061,779.975532,148.275939,ck_med
286,1267.482134,253.493676,906.193736,0.199998,0.714956,0.279735,760.283782,139.385959,ck_med


In [3]:
# target variables
Y = df['label'].values
# features
X = df.drop(labels = ['label'], axis = 1)

# Standardizing the features
X = StandardScaler().fit_transform(X)

In [4]:
arr = ["{}{}".format("PC", i) for i in np.arange(np.shape(df)[1]-1)+ 1]
(np.shape(df)[1]-1)+ 1
arr

['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8']

In [5]:
from sklearn.decomposition import PCA
pca = PCA()
X_pca = pca.fit_transform(X)
pca_df = pd.DataFrame(X_pca, columns=arr)
dfp = pd.merge(df, pca_df, right_index=True, left_index=True)
print('Explained Variance Ratio')
for i in range(len(arr)):
    print('PC{}: {}'.format(i+1,pca.explained_variance_ratio_[i]))

Explained Variance Ratio
PC1: 0.46328155255576875
PC2: 0.3474578489050008
PC3: 0.12668020082217119
PC4: 0.051366234316070146
PC5: 0.007840200287478014
PC6: 0.002667613314629765
PC7: 0.0006881515790563938
PC8: 1.8198219824683278e-05


In [6]:
pca = PCA(.99)
X_pca = pca.fit_transform(X)
print('Explained Variance Ratio')
for i in range(len(arr)):
    print('PC{}: {}'.format(i+1,pca.explained_variance_ratio_[i]))

Explained Variance Ratio
PC1: 0.46328155255576875
PC2: 0.3474578489050008
PC3: 0.12668020082217119
PC4: 0.051366234316070146
PC5: 0.007840200287478014


IndexError: index 5 is out of bounds for axis 0 with size 5

In [8]:
from sklearn.decomposition import PCA
pca = PCA(0.99)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['PC1','PC2','PC3','PC4','PC5'])
principalDf

Unnamed: 0,PC1,PC2,PC3,PC4,PC5
0,3.578898,0.395738,-0.047779,0.014839,-0.042165
1,0.336555,-0.201751,1.707125,0.596749,-0.009439
2,1.249003,0.456976,2.923192,1.710269,-0.003426
3,1.895098,0.237179,0.107955,0.098686,-0.021157
4,0.187784,-0.567702,0.276168,-0.571162,-0.005510
...,...,...,...,...,...
283,-1.933990,2.119364,1.530949,1.577346,1.010596
284,-2.009835,2.035656,1.940617,1.466609,0.787319
285,-3.796804,0.768672,1.941153,0.378488,0.410919
286,-1.834836,2.069482,1.382685,1.389709,0.583992


In [9]:
finalDf = pd.concat([principalDf, df['label']], axis = 1)
finalDf

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,label
0,3.578898,0.395738,-0.047779,0.014839,-0.042165,ck_pre
1,0.336555,-0.201751,1.707125,0.596749,-0.009439,ck_pre
2,1.249003,0.456976,2.923192,1.710269,-0.003426,ck_pre
3,1.895098,0.237179,0.107955,0.098686,-0.021157,ck_pre
4,0.187784,-0.567702,0.276168,-0.571162,-0.005510,ck_pre
...,...,...,...,...,...,...
283,-1.933990,2.119364,1.530949,1.577346,1.010596,ck_med
284,-2.009835,2.035656,1.940617,1.466609,0.787319,ck_med
285,-3.796804,0.768672,1.941153,0.378488,0.410919,ck_med
286,-1.834836,2.069482,1.382685,1.389709,0.583992,ck_med


In [10]:
# save to a csv file
finalDf.to_csv('ck_pca_features.csv',index=False)