In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import random
import os
from scipy.signal import butter, lfilter
from scipy.stats import skew, kurtosis
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.svm import SVC

In [2]:
seed = 57
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [3]:
x = pickle.load(open('x.pkl', 'rb'))
y = pickle.load(open('y.pkl', 'rb'))

In [4]:
x_normal = np.concatenate((x[:300], x[400:]), axis=0)
x_seizure = x[300:400]
print(x_normal.shape)
print(x_seizure.shape)
sampling_freq = 173.6 #based on info from website

(400, 4097)
(100, 4097)


In [5]:
b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

x_normal_filtered = np.array([lfilter(b,a,x_normal[ind,:]) for ind in range(x_normal.shape[0])])
x_seizure_filtered = np.array([lfilter(b,a,x_seizure[ind,:]) for ind in range(x_seizure.shape[0])])
print(x_normal.shape)
print(x_seizure.shape)

x_normal = x_normal_filtered
x_seizure = x_seizure_filtered

x = np.concatenate((x_normal,x_seizure))
y = np.concatenate((np.zeros((400,1)),np.ones((100,1))))

(400, 4097)
(100, 4097)


### 1.lets classify with only 1 feature with a decision tree and compute the accuracy to find out how much the feature is good solely

In [6]:
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):
  
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini",
            random_state = 100,max_depth=3, min_samples_leaf=5)
  
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini

In [7]:
# Function to make predictions
def prediction(X_test, clf_object):
  
    # Predicton on test with giniIndex
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred

In [8]:
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
      
    print("Confusion Matrix: ",
        confusion_matrix(y_test, y_pred))
      
    print ("Accuracy : ",
    accuracy_score(y_test,y_pred)*100)
      
    print("Report : ",
    classification_report(y_test, y_pred))

In [9]:
arr1 = np.zeros((500,1))
arr = np.zeros((500,61))
ar = np.zeros((500, 15))
#mean, the first feature testing
for i in range(500):
    arr1[i,0] = np.mean(x[i])
    arr[i,0] = np.mean(x[i])
    ar[i,0] = np.mean(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr1,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)

Predicted values:
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
Confusion Matrix:  [[72  2]
 [20  6]]
Accuracy :  78.0
Report :                precision    recall  f1-score   support

         0.0       0.78      0.97      0.87        74
         1.0       0.75      0.23      0.35        26

    accuracy                           0.78       100
   macro avg       0.77      0.60      0.61       100
weighted avg       0.77      0.78      0.73       100



In [10]:
arr2 = np.zeros((500,1))
#std
for i in range(500):
    arr2[i,0] = np.std(x[i])
    arr[i,1] = np.std(x[i])
    ar[i,1] = np.std(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr2,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)

Accuracy :  95.0


In [11]:
arr3 = np.zeros((500,1))
#max
for i in range(500):
    arr3[i,0] = max(x[i])
    arr[i,2] = max(x[i])
    ar[i,2] = max(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr3,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)

Accuracy :  95.0


In [12]:
arr4 = np.zeros((500,1))
#min
for i in range(500):
    arr4[i,0] = min(x[i])
    arr[i,3] = min(x[i])
    ar[i,3] = min(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr4,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)  

Accuracy :  95.0


In [13]:
arr5 = np.zeros((500,1))
#skewness
for i in range(500):
    arr5[i,0] = skew(x[i], axis=0, bias=True)
    arr[i,4] = skew(x[i], axis=0, bias=True)
    ar[i,4] = skew(x[i], axis=0, bias=True)
    
X_train, X_test, y_train, y_test = train_test_split(arr5,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)  

Accuracy :  85.0


In [14]:
arr6 = np.zeros((500,1))
#kurtosis
for i in range(500):
    arr6[i,0] = kurtosis(x[i], fisher=True)
    arr[i,5] = kurtosis(x[i], fisher=True)
    ar[i,5] = kurtosis(x[i], fisher=True)
    
X_train, X_test, y_train, y_test = train_test_split(arr6,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  78.0


In [15]:
arr7 = np.zeros((500,1))
#ptp
for i in range(500):
    arr7[i,0] = np.ptp(x[i])
    arr[i,6] = np.ptp(x[i])
    ar[i,6] = np.ptp(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr7,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  94.0


In [16]:
arr8 = np.zeros((500,1))
#twopp
for i in range(500):
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    arr8[i,0] = twopp
    arr[i,7] = twopp
    ar[i,7] = twopp
    
X_train, X_test, y_train, y_test = train_test_split(arr8,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  73.0


In [17]:
arr9 = np.zeros((500,1))
#pps
for i in range(500):
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    arr9[i,0] = ptp/twopp
    arr[i,8] = ptp/twopp
    ar[i,8] = ptp/twopp
    
X_train, X_test, y_train, y_test = train_test_split(arr9,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  77.0


In [18]:
def get_rms_acceleration(signal, frame_size, hop_length):
    rms = []
    for i in range(0, len(signal), hop_length):
        current_rms = np.sqrt(np.sum(signal[i:i+frame_size]**2)/frame_size)
        rms.append(current_rms)
    return rms

In [19]:
arr10 = np.zeros((500,9))
new_x = []
for i in range(500):
    new_x.append(get_rms_acceleration(x[i], 1024,512))
 
    for j in range(9):
        arr10[i,j] = new_x[i][j]
        arr[i,9+j] = new_x[i][j]
    ar[i,9] = np.mean(arr10[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr10,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  97.0


In [20]:
def get_margin_factor(signal, frame_size, hop_length):
    mar_fac = []
    for i in range(0, len(signal), hop_length):
        curr_mar_fac = np.max(np.abs(signal[i:i+frame_size])) / ((np.sum(np.sqrt(np.abs(signal[i:i+frame_size])))/ frame_size**2))
        mar_fac.append(curr_mar_fac)                             
    return mar_fac

In [21]:
arr11 = np.zeros((500,9))
new_x = []
for i in range(500):
    new_x.append(get_margin_factor(x[i], 1024,512))
    for j in range(9):
        arr11[i,j] = new_x[i][j]
        arr[i,18+j] = new_x[i][j]
    ar[i,10] = np.mean(arr11[i])
    
X_train, X_test, y_train, y_test = train_test_split(arr11,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  93.0


In [22]:
def get_shape_factor(signal, frame_size, hop_length):
    fin_shape_fact = []
    for i in range(0, len(signal), hop_length):
        cur_shape_fact = np.sqrt(((np.sum(signal[i:i+frame_size]**2))/frame_size) / (np.sum(np.abs(signal[i:i+frame_size]))/frame_size))
        fin_shape_fact.append(cur_shape_fact)

    return fin_shape_fact

In [23]:
arr12 = np.zeros((500,9))
new_x = []
for i in range(500):
    new_x.append(get_shape_factor(x[i], 1024,512))
    
    for j in range(9):
        arr12[i,j] = new_x[i][j]
        arr[i,27+j] = new_x[i][j]
    ar[i,11] = np.mean(arr12[i])
        
X_train, X_test, y_train, y_test = train_test_split(arr12,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  94.0


In [24]:
def get_impulse_factor(signal, frame_size, hop_length):
    impulse_factor = []
    for i in range(0, len(signal), hop_length):
        current_impls = max(np.abs(signal[i:i+frame_size]))/(np.sum(np.abs(signal[i:i+frame_size])/frame_size))
        impulse_factor.append(current_impls)
    return impulse_factor

In [25]:
arr13 = np.zeros((500,9))
new_x = []
for i in range(500):
    new_x.append(get_impulse_factor(x[i], 1024,512))
    
    for j in range(9):
        arr13[i,j] = new_x[i][j]
        arr[i,36+j] = new_x[i][j]
    ar[i,12] = np.mean(arr13[i])
        
X_train, X_test, y_train, y_test = train_test_split(arr13,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  75.0


In [26]:
def get_third_freq(signal, frame_size, hop_length):
    third = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_third = (np.sum((y - (np.sum(y)/frame_size))**3))/(frame_size * (np.sqrt((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)))**3)
        third.append(current_third)
    return np.array(third)

In [27]:
arr14 = np.zeros((500,8))
new_x = []
for i in range(500):
    new_x.append(get_third_freq(x[i], 1024,512))
    
    for j in range(8):
        arr14[i,j] = new_x[i][j]
        arr[i,45+j] = new_x[i][j]
    ar[i,13] = np.mean(arr14[i])
        
X_train, X_test, y_train, y_test = train_test_split(arr14,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

  current_third = (np.sum((y - (np.sum(y)/frame_size))**3))/(frame_size * (np.sqrt((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)))**3)


Accuracy :  74.0


In [28]:
def get_forth_freq(signal, frame_size, hop_length):
    forth = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)
        forth.append(current_forth)
    return np.array(forth)

In [29]:
arr15 = np.zeros((500,8))
new_x = []
for i in range(500):
    new_x.append(get_forth_freq(x[i], 1024,512))
    
    for j in range(8):
        arr15[i,j] = new_x[i][j]
        arr[i,53+j] = new_x[i][j]
    ar[i,14] = np.mean(arr15[i])

X_train, X_test, y_train, y_test = train_test_split(arr15,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


Accuracy :  76.0


### 2.correlation of each pair of features 

In [30]:
df = pd.DataFrame(ar, columns = ['mean', 'std','max','min','skew','kurt','ptp','twopp','pps','rms','margin','shape', 'impulse','3f','4f'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()

print(sorted_mat)

pps     twopp     0.012514
twopp   pps       0.012514
        margin    0.013541
margin  twopp     0.013541
min     twopp     0.030808
                    ...   
        min       1.000000
max     max       1.000000
std     std       1.000000
3f      3f        1.000000
4f      4f        1.000000
Length: 225, dtype: float64


In [31]:
print(sorted_mat[0:10])

pps     twopp     0.012514
twopp   pps       0.012514
        margin    0.013541
margin  twopp     0.013541
min     twopp     0.030808
twopp   min       0.030808
mean    max       0.033498
max     mean      0.033498
pps     mean      0.049601
mean    pps       0.049601
dtype: float64


### 3.now, among those who have less correlations  

In [32]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.mean(x[i])
    feat[i,1] = np.max(x[i])
    
df = pd.DataFrame(feat, columns = ['mean','max'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

mean  max     0.033498
max   mean    0.033498
mean  mean    1.000000
max   max     1.000000
dtype: float64


In [33]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    feat[i,1] = np.std(x[i])
    
df = pd.DataFrame(feat, columns = ['max','std'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max  std    0.927024
std  max    0.927024
max  max    1.000000
std  std    1.000000
dtype: float64


In [34]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    feat[i,1] = np.min(x[i])
    
df = pd.DataFrame(feat, columns = ['max','min'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max  min    0.856563
min  max    0.856563
max  max    1.000000
min  min    1.000000
dtype: float64


In [35]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    feat[i,1] = np.ptp(x[i])
    
df = pd.DataFrame(feat, columns = ['max','ptp'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max  ptp    0.958717
ptp  max    0.958717
max  max    1.000000
ptp  ptp    1.000000
dtype: float64


In [36]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    feat[i,1] = skew(x[i], axis=0, bias=True)
    
df = pd.DataFrame(feat, columns = ['max','skew'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max   skew    0.147628
skew  max     0.147628
max   max     1.000000
skew  skew    1.000000
dtype: float64


In [37]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.mean(x[i])
    feat[i,1] = skew(x[i], axis=0, bias=True)
    
df = pd.DataFrame(feat, columns = ['mean','skew'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

mean  skew    0.185241
skew  mean    0.185241
mean  mean    1.000000
skew  skew    1.000000
dtype: float64


In [38]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    feat[i,1] = kurtosis(x[i], fisher=True)
    
df = pd.DataFrame(feat, columns = ['max','kurt'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max   kurt    0.275252
kurt  max     0.275252
max   max     1.000000
kurt  kurt    1.000000
dtype: float64


In [39]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.mean(x[i])
    feat[i,1] = kurtosis(x[i], fisher=True)
    
df = pd.DataFrame(feat, columns = ['mean','kurt'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

mean  kurt    0.156201
kurt  mean    0.156201
mean  mean    1.000000
kurt  kurt    1.000000
dtype: float64


In [40]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = skew(x[i], axis=0, bias=True)
    feat[i,1] = kurtosis(x[i], fisher=True)
    
df = pd.DataFrame(feat, columns = ['skew','kurt'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

skew  kurt    0.153823
kurt  skew    0.153823
skew  skew    1.000000
kurt  kurt    1.000000
dtype: float64


In [41]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    feat[i,1] = ptp/twopp
    
df = pd.DataFrame(feat, columns = ['max','pps'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max  pps    0.134742
pps  max    0.134742
max  max    1.000000
pps  pps    1.000000
dtype: float64


In [42]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.mean(x[i])
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    feat[i,1] = ptp/twopp
    
df = pd.DataFrame(feat, columns = ['mean','pps'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

mean  pps     0.049601
pps   mean    0.049601
mean  mean    1.000000
pps   pps     1.000000
dtype: float64


In [43]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = skew(x[i], axis=0, bias=True)
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    feat[i,1] = ptp/twopp
    
df = pd.DataFrame(feat, columns = ['skew','pps'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

skew  pps     0.166648
pps   skew    0.166648
skew  skew    1.000000
pps   pps     1.000000
dtype: float64


In [44]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = kurtosis(x[i], fisher=True)
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    feat[i,1] = ptp/twopp
    
df = pd.DataFrame(feat, columns = ['kurt','pps'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

kurt  pps     0.073673
pps   kurt    0.073673
kurt  kurt    1.000000
pps   pps     1.000000
dtype: float64


In [45]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
new_x = []
for i in range(500):  
    new_x.append(get_rms_acceleration(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i])
    
df = pd.DataFrame(feat, columns = ['max','rms'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max  rms    0.926009
rms  max    0.926009
max  max    1.000000
rms  rms    1.000000
dtype: float64


In [46]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
new_x = []
for i in range(500):  
    new_x.append(get_margin_factor(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i])
    
df = pd.DataFrame(feat, columns = ['max','margin'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max     margin    0.682277
margin  max       0.682277
max     max       1.000000
margin  margin    1.000000
dtype: float64


In [47]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
new_x = []
for i in range(500):  
    new_x.append(get_shape_factor(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i])
    
df = pd.DataFrame(feat, columns = ['max','shape'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

max    shape    0.938253
shape  max      0.938253
max    max      1.000000
shape  shape    1.000000
dtype: float64


In [48]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.max(x[i])
new_x = []
for i in range(500):  
    new_x.append(get_forth_freq(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i][0:-1])
    
df = pd.DataFrame(feat, columns = ['max','4f'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


max  4f     0.16219
4f   max    0.16219
max  max    1.00000
4f   4f     1.00000
dtype: float64


In [49]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = np.mean(x[i])
new_x = []
for i in range(500):  
    new_x.append(get_forth_freq(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i][0:-1])
    
df = pd.DataFrame(feat, columns = ['mean','f4'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


mean  f4      0.088334
f4    mean    0.088334
mean  mean    1.000000
f4    f4      1.000000
dtype: float64


In [50]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = skew(x[i], axis=0, bias=True)
new_x = []
for i in range(500):  
    new_x.append(get_forth_freq(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i][0:-1])
    
df = pd.DataFrame(feat, columns = ['skew','4f'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


skew  4f      0.069601
4f    skew    0.069601
skew  skew    1.000000
4f    4f      1.000000
dtype: float64


In [51]:
feat = np.zeros((500,2))
for i in range(500):
    feat[i,0] = kurtosis(x[i], fisher=True)
new_x = []
for i in range(500):  
    new_x.append(get_forth_freq(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i][0:-1])
    
df = pd.DataFrame(feat, columns = ['kurt','4f'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


kurt  4f      0.117544
4f    kurt    0.117544
kurt  kurt    1.000000
4f    4f      1.000000
dtype: float64


In [52]:
feat = np.zeros((500,2))
for i in range(500):
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    feat[i,0] = ptp/twopp
new_x = []
for i in range(500):  
    new_x.append(get_forth_freq(x[i], 1024,512))
    feat[i,1] = np.mean(new_x[i][0:-1])
    
df = pd.DataFrame(feat, columns = ['pps','4f'])
# Create correlation matrix
corr_mat = df.corr(method='pearson')

a = abs(corr_mat.unstack())
# Convert correlation matrix to 1-D Series and sort
sorted_mat = a.sort_values()
print(sorted_mat)

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


pps  4f     0.080607
4f   pps    0.080607
pps  pps    1.000000
4f   4f     1.000000
dtype: float64


In [53]:
ar = np.zeros((500,6))
new_x = []
for i in range(500):
    ar[i,0] = max(x[i])
    ar[i,1] = np.mean(x[i])
    ar[i,2] = skew(x[i], axis=0, bias=True)
    ar[i,3] = kurtosis(x[i], fisher=True)
    
    ptp = np.ptp(x[i])
    tmax = x[i].argmax()
    tmin = x[i].argmin()
    twopp = tmax - tmin
    ar[i,4] = ptp/twopp
    
    new_x.append(get_forth_freq(x[i], 1024,512))
    ar[i,5] = np.mean(new_x[i][0:-1])

  current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)


### 4.lets see how much  feature has new information, and improve the classification 

In [54]:
ar = np.zeros((500,1))
for i in range(500):
    ar[i,0] = max(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  95.0
17
32
80
92
95


In [55]:
ar = np.zeros((500,1))
for i in range(500):
    ar[i,0] = np.std(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  95.0
1
47
63
73
95


In [56]:
ar = np.zeros((500,1))
for i in range(500):
    ar[i,0] = min(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  95.0
5
17
38
73
91


In [57]:
ar = np.zeros((500,2))
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  99.0
95


In [58]:
ar = np.zeros((500,3))
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    ar[i,2] = min(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  99.0
95


In [59]:
ar = np.zeros((500,3))
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    ar[i,2] = skew(x[i], axis=0, bias=True)
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  99.0
95


In [60]:
ar = np.zeros((500,3))
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    ar[i,2] = np.ptp(x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)

Accuracy :  99.0
95


In [61]:
ar = np.zeros((500,3))
new_x = []
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    new_x.append(get_rms_acceleration(x[i], 1024,512))
    ar[i,2] = np.mean(new_x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i)    

Accuracy :  98.0
63
95


In [62]:
ar = np.zeros((500,3))
new_x = []
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    new_x.append(get_margin_factor(x[i], 1024,512))
    ar[i,2] = np.mean(new_x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i) 

Accuracy :  99.0
95


In [63]:
ar = np.zeros((500,3))
new_x = []
for i in range(500):
    ar[i,0] = np.std(x[i])
    ar[i,1] = max(x[i])
    new_x.append(get_shape_factor(x[i], 1024,512))
    ar[i,2] = np.mean(new_x[i])
    
X_train, X_test, y_train, y_test = train_test_split(ar,y,random_state=seed,test_size=0.2)
clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 
print ("f1 : ", f1_score(y_test,y_pred, average = None))
for i in range(len(y_test)):
    if(y_pred[i]!=y_test[i]):
        print(i) 

Accuracy :  99.0
f1 :  [0.99328859 0.98039216]
95


In [64]:
x_final = np.zeros((500,2))
for i in range(500):
    x_final[i,0] = np.std(x[i])
    x_final[i,1] = max(x[i])

# 5.using clustring to improve classification

In [65]:
clustering = AgglomerativeClustering().fit(x_final)

In [66]:
clustering = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(x_final)

In [67]:
cluster1 = []
y1 =[]
cluster2 = []
y2 = []
for i in range(len(x_final)):
    if(clustering.labels_[i]==0):
        cluster1.append(x_final[i])
        y1.append(y[i])
    if(clustering.labels_[i]==1):
        cluster2.append(x_final[i]) 
        y2.append(y[i])

In [68]:
X_train, X_test, y_train, y_test = train_test_split(cluster1,y1,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  97.70114942528735


In [69]:
X_train, X_test, y_train, y_test = train_test_split(cluster2,y2,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  92.85714285714286


In [70]:
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

  y = column_or_1d(y, warn=True)


Accuracy :  92.85714285714286


### 6.testing with different class groups

In [71]:
x = pickle.load(open('x.pkl', 'rb'))
y = pickle.load(open('y.pkl', 'rb'))

#ABCD E
x_normal = np.concatenate((x[:300], x[400:]), axis=0)
x_seizure = x[300:400]
print(x_normal.shape)
print(x_seizure.shape)
sampling_freq = 173.6 #based on info from website


b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

x_normal_filtered = np.array([lfilter(b,a,x_normal[ind,:]) for ind in range(x_normal.shape[0])])
x_seizure_filtered = np.array([lfilter(b,a,x_seizure[ind,:]) for ind in range(x_seizure.shape[0])])
print(x_normal.shape)
print(x_seizure.shape)

x_normal = x_normal_filtered
x_seizure = x_seizure_filtered

x = np.concatenate((x_normal,x_seizure))
y = np.concatenate((np.zeros((400,1)),np.ones((100,1))))

(400, 4097)
(100, 4097)
(400, 4097)
(100, 4097)


In [72]:
x_final = np.zeros((500,2))
for i in range(500):
    x_final[i,0] = np.std(x[i])
    x_final[i,1] = max(x[i])


X_train, X_test, y_train, y_test = train_test_split(x_final,y,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  99.0


In [73]:
x = pickle.load(open('x.pkl', 'rb'))
y = pickle.load(open('y.pkl', 'rb'))

#AC E
x_normal = np.concatenate((x[:100], x[200:300]), axis=0)
x_seizure = x[300:400]
sampling_freq = 173.6 #based on info from website


b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

x_normal_filtered = np.array([lfilter(b,a,x_normal[ind,:]) for ind in range(x_normal.shape[0])])
x_seizure_filtered = np.array([lfilter(b,a,x_seizure[ind,:]) for ind in range(x_seizure.shape[0])])

x_normal = x_normal_filtered
x_seizure = x_seizure_filtered

x = np.concatenate((x_normal,x_seizure))
y = np.concatenate((np.zeros((200,1)),np.ones((100,1))))

In [74]:
x_final = np.zeros((300,2))
for i in range(300):
    x_final[i,0] = np.std(x[i])
    x_final[i,1] = max(x[i])


X_train, X_test, y_train, y_test = train_test_split(x_final,y,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  93.33333333333333


In [75]:
x = pickle.load(open('x.pkl', 'rb'))
y = pickle.load(open('y.pkl', 'rb'))

#D E
x_normal = x[400:]
x_seizure = x[300:400]
sampling_freq = 173.6 #based on info from website


b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

x_normal_filtered = np.array([lfilter(b,a,x_normal[ind,:]) for ind in range(x_normal.shape[0])])
x_seizure_filtered = np.array([lfilter(b,a,x_seizure[ind,:]) for ind in range(x_seizure.shape[0])])

x_normal = x_normal_filtered
x_seizure = x_seizure_filtered

x = np.concatenate((x_normal,x_seizure))
y = np.concatenate((np.zeros((100,1)),np.ones((100,1))))

In [76]:
x_final = np.zeros((200,2))
for i in range(200):
    x_final[i,0] = np.std(x[i])
    x_final[i,1] = max(x[i])


X_train, X_test, y_train, y_test = train_test_split(x_final,y,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  100.0


In [77]:
x = pickle.load(open('x.pkl', 'rb'))
y = pickle.load(open('y.pkl', 'rb'))

#AB CD E
x_normal = x[:200]
x_epilepsy = np.concatenate((x[200:300], x[400:]), axis=0)
x_seizure = x[300:400]
sampling_freq = 173.6 #based on info from website


b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

x_normal_filtered = np.array([lfilter(b,a,x_normal[ind,:]) for ind in range(x_normal.shape[0])])
x_epilepsy_filtered = np.array([lfilter(b,a,x_epilepsy[ind,:]) for ind in range(x_epilepsy.shape[0])])
x_seizure_filtered = np.array([lfilter(b,a,x_seizure[ind,:]) for ind in range(x_seizure.shape[0])])
x_normal = x_normal_filtered
x_epilepsy = x_epilepsy_filtered
x_seizure = x_seizure_filtered

x = np.concatenate((x_normal,x_epilepsy,x_seizure))
y = np.concatenate((np.zeros((200,1)),np.zeros((200,1)),np.ones((100,1))))

In [78]:
x_final = np.zeros((500,2))
for i in range(500):
    x_final[i,0] = np.std(x[i])
    x_final[i,1] = max(x[i])


X_train, X_test, y_train, y_test = train_test_split(x_final,y,random_state=seed,test_size=0.2)

clf_gini = train_using_gini(X_train, X_test, y_train)
    
# Prediction using gini
y_pred = clf_gini.predict(X_test)

print ("Accuracy : ", accuracy_score(y_test,y_pred)*100) 

Accuracy :  99.0
