In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import glob, os
import datetime
%matplotlib inline

from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Functions
# Adding a target column
def generateTarget(row) :
    if row['trial_type'] == 'ADLs' :
        return 0
    if row['trial_type'] == 'Near_Falls' :
        return 0
    if row['trial_type'] == 'Falls' :
        return 1
    
# Adding a target column
def generateTarget2(row) :
    return 0

# We'll use this function to test our models from now on
def modelProcessing(X_train,y_train,X_test,y_test,model) :
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    
    tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()
    specificity = (tn / (fp + tn))*100
    sensitivity = (tp / (tp + fn))*100
    accuracy = ((tn+tp) / (tp + tn + fp + fn))
    print("Confusion matrix : ")
    print("TN : "+str(tn) + " FP : " +str(fp))
    print("FN : "+str(fn) + " TP : " +str(tp))
    print("")
    print("Accuracy : "+str(accuracy_score(y_test,y_pred)))
    print("Recall : " +str(recall_score(y_test,y_pred)))
    print("Precision : "+str(precision_score(y_test,y_pred)))
    print("F-measure :"+str(f1_score(y_test,y_pred)))
    print("Sensitivity : "+str(sensitivity))
    print("Specificity : "+str(specificity))
    

# Obtain a DF with the metrics and bodyparts you want
def filterCols(df,metrics,bodyparts,resultants=True) :
    # Metrics are Acceleration,Magnetic and Velocity (List of strings)
    # Bodyparts are waist,l.ankle,r.ankle,l.thigh,r.thigh,sternum,head (list of strings)
    # Resultants = true will get the resultants of the respective metrics
    groupcols = ['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime']
    metriccols = []
    bodycols = []
    
    for col in df.columns.values :
        for metric in metrics :
            if (metric in col) :
                metriccols.append(col)
            if (resultants) :
                if (metric.lower() in col) :
                    metriccols.append(col)
        for part in bodyparts :
            if (part in col) :
                bodycols.append(col)
    dfOut = df[groupcols + list(set(metriccols) & set(bodycols))]
    return dfOut

In [4]:
# Load full database
dfMain = pickle.load(open("../../../dataResultants/dataset_consolidated.p", "rb"))

In [None]:
# Only run one of the following cells at a time unless your computer has enough RAM
# Used for the overlapping window preprocessing
df = dfMain.copy()
df = dfMain.drop(['target','Time','time_seconds'],axis=1)

In [6]:
# Used for the resultant peak window preprocessing
df2 = dfMain.copy()
df2 = df2.drop(['target','Time','time_seconds'],axis=1)

In [5]:
# Used for the resultant difference window preprocessing
df3 = dfMain.copy()
df3 = df3.drop(['target','Time','time_seconds'])

In [5]:
# Used for the convolutional neural network approach
# Used for the resultant peak window preprocessing
df4 = dfMain.copy()
df4 = df4.drop(['target','Time','time_seconds'],axis=1)

In [6]:
# Get the data columns and separate them based on the sensor and the feature (accel, vel and magfield)

allcols = dfMain.columns.values

groupcols = ['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime']
waistcols = []
ranklecols = []
lanklecols = []
rthighcols = []
lthighcols = []
headcols = []
sternumcols = []
accelcols = []
velcols = []
magcols = []
meancols = []
resultantcols = []
varcols = []

for col in allcols : 
    if 'r.ankle' in col :
        ranklecols.append(col)
    if 'l.ankle' in col :
        lanklecols.append(col)
    if 'waist' in col :
        waistcols.append(col)
    if 'r.thigh' in col :
        rthighcols.append(col)
    if 'l.thigh' in col :
        lthighcols.append(col)
    if 'head' in col :
        headcols.append(col)
    if 'Velocity' in col :
        velcols.append(col)
    if 'Magnetic' in col :
        magcols.append(col)
    if 'Acceleration' in col :
        accelcols.append(col)
    if 'mean' in col :
        meancols.append(col)
    if 'var' in col :
        varcols.append(col)
    if 'resultant' in col :
        resultantcols.append(col)

In [94]:
df = df.drop(resultantcols,axis=1) # Drop the resultant columns from here

In [101]:
# group in intervals of 0.5 seconds, calculating the mean
df_window_mean = df.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).mean()
df_window_mean = df_window_mean.reset_index()

# renaming the acceleration measurement columns, including a '_mean' in the end
for col in accelcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

for col in velcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)
    
for col in magcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

In [102]:
# group in intervals of 2 seconds, calculating the variance

df_window_variance = df.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).var()
df_window_variance = df_window_variance.reset_index()

# renaming the acceleration measurement columns, including a '_variance' in the end

for col in accelcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    
for col in velcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    
for col in magcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)

In [104]:
# final dataframe, with all accelerometer columns (means and variances)
all_trials_window = pd.merge(df_window_mean, df_window_variance,on=['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime'])

# This dataframe will be used in case we decide to try different preprocessing steps
all_trials_window = all_trials_window.dropna(axis=0, how='any')

In [105]:
# Get the data columns and separate them based on the sensor and the feature (accel, vel and magfield)

allcols = all_trials_window.columns.values

groupcols = ['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original']
waistcols = []
ranklecols = []
lanklecols = []
rthighcols = []
lthighcols = []
headcols = []
sternumcols = []
accelcols = []
velcols = []
magcols = []
meancols = []
varcols = []

for col in allcols : 
    if 'r.ankle' in col :
        ranklecols.append(col)
    if 'l.ankle' in col :
        lanklecols.append(col)
    if 'waist' in col :
        waistcols.append(col)
    if 'r.thigh' in col :
        rthighcols.append(col)
    if 'l.thigh' in col :
        lthighcols.append(col)
    if 'head' in col :
        headcols.append(col)
    if 'Velocity' in col :
        velcols.append(col)
    if 'Magnetic' in col :
        magcols.append(col)
    if 'Acceleration' in col :
        accelcols.append(col)
    if 'mean' in col :
        meancols.append(col)
    if 'var' in col :
        varcols.append(col)

# 1. Overlapping windows around axis acceleration peaks
#### First we find the biggest peak in each acceleration axis (be it a maximum or minimum peak) and we create a window that spans from the smallest peak-1 second to the biggest peak + 1 second. This creates a window that takes into account all 3 peaks. 
#### We'll be using only the waist for these tests

In [107]:
# Get just waist acceleration columns, time and groupcols
dfWaistAccels = filterCols(all_trials_window,['Acceleration'],['waist'],True)

['subject' 'trial_type' 'trial_subtype' 'trial_num' 'trial_num_original'
 'time_datetime' 'waist Acceleration Z (m/s^2)_var'
 'waist Acceleration X (m/s^2)_var' 'waist Acceleration Z (m/s^2)_mean'
 'waist Acceleration X (m/s^2)_mean' 'waist Acceleration Y (m/s^2)_var'
 'waist Acceleration Y (m/s^2)_mean']


In [108]:

auxdf = dfWaistAccels

# Add absolute value of the acceleration means as new columns to auxdf
auxdf['AbsX'] = auxdf['waist Acceleration X (m/s^2)_mean'].abs()
auxdf['AbsY'] = auxdf['waist Acceleration Y (m/s^2)_mean'].abs()
auxdf['AbsZ'] = auxdf['waist Acceleration Z (m/s^2)_mean'].abs()

# Find the id of the rows with max absolute value for each axis
dfWaistAccels['YMax'] = auxdf.groupby(groupcols)['AbsY'].transform('idxmax')
dfWaistAccels['XMax'] = auxdf.groupby(groupcols)['AbsX'].transform('idxmax')
dfWaistAccels['ZMax'] = auxdf.groupby(groupcols)['AbsZ'].transform('idxmax')

# Find the max and min ids from the last section
dfWaistAccels['AxisMax'] = dfWaistAccels[["YMax", "XMax","ZMax"]].max(axis=1)
dfWaistAccels['AxisMin'] = dfWaistAccels[["YMax", "XMax","ZMax"]].min(axis=1)



In [109]:
# Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
dfList = []
for sub in dfWaistAccels['subject'].unique() :
    for trialtype in dfWaistAccels['trial_type'].unique() :
        for subtype in dfWaistAccels['trial_subtype'].unique() :
            for num in dfWaistAccels['trial_num'].unique() :
                aux1 = dfWaistAccels[(dfWaistAccels['subject'] == sub) & (dfWaistAccels['trial_type'] == trialtype) 
                    & (dfWaistAccels['trial_subtype'] == subtype) & (dfWaistAccels['trial_num'] == num)]
                aux2 = aux1[(aux1.index < aux1.AxisMax+2) & (aux1.index > aux1.AxisMin-2)]
                dfList.append(aux2)

fulldf = pd.concat(dfList)

In [110]:
# Dropping the columns we don't need anymore
fulldf = fulldf.drop(['XMax','ZMax','YMax','AxisMax','AxisMin','AbsX','AbsY','AbsZ'],axis=1)
    
fulldf['target'] = fulldf.apply (lambda row: generateTarget(row),axis=1)

In [111]:
y_train = fulldf[(fulldf['subject'] >= 6)]['target']
X_train = fulldf[(fulldf['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
y_test = fulldf[(fulldf['subject'] < 6)]['target']
X_test = fulldf[(fulldf['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Accuracy : 0.780831914376
Recall : 0.414669571532
Precision : 0.857357357357
F-measure :0.558981889378


###  Using only mean columns 

In [112]:
y_train = fulldf[(fulldf['subject'] >= 6)]['target']
X_train = fulldf[(fulldf['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
X_train = X_train.drop(['waist Acceleration X (m/s^2)_var','waist Acceleration Y (m/s^2)_var',
                        'waist Acceleration Z (m/s^2)_var'],axis=1)
y_test = fulldf[(fulldf['subject'] < 6)]['target']
X_test = fulldf[(fulldf['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
X_test = X_test.drop(['waist Acceleration X (m/s^2)_var','waist Acceleration Y (m/s^2)_var',
                        'waist Acceleration Z (m/s^2)_var'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Accuracy : 0.778156166383
Recall : 0.366739288308
Precision : 0.926605504587
F-measure :0.525494276795


# 2. Resultant peak windows :

In [7]:
# group in intervals of 0.5 seconds, calculating the mean
df2_window_mean = df2.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).mean()
df2_window_mean = df2_window_mean.reset_index()

# renaming the acceleration measurement columns, including a '_mean' in the end
for col in df2.columns.values :
    if ('Acceleration' in col) or ('Velocity' in col) or ('Magnetic' in col) or ('resultant' in col) :
        df2_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

In [8]:
# group in intervals of 0.5 seconds, calculating the variance

df2_window_variance = df2.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).var()
df2_window_variance = df2_window_variance.reset_index()

# renaming the acceleration measurement columns, including a '_variance' in the end
for col in df2.columns.values :
    if ('Acceleration' in col) or ('Velocity' in col) or ('Magnetic' in col) or ('resultant' in col) :
        df2_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    

In [9]:
# final dataframe, with all accelerometer columns (means and variances)
df2_all_windows = pd.merge(df2_window_mean, df2_window_variance,on=['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime'])

# This dataframe will be used in case we decide to try different preprocessing steps
df2_all_windows = df2_all_windows.dropna(axis=0, how='any')

In [10]:
dfResWindows = filterCols(df2_all_windows,['Acceleration'],['waist'],True)
dfResWindows['target'] = dfResWindows.apply(lambda row: generateTarget2(row),axis=1)
print(dfResWindows.columns.values)

['subject' 'trial_type' 'trial_subtype' 'trial_num' 'trial_num_original'
 'time_datetime' 'waist Acceleration Y (m/s^2)_mean'
 'waist Acceleration Y (m/s^2)_var' 'waist resultant acceleration_var'
 'waist Acceleration Z (m/s^2)_var' 'waist Acceleration Z (m/s^2)_mean'
 'waist Acceleration X (m/s^2)_mean' 'waist Acceleration X (m/s^2)_var'
 'waist resultant acceleration_mean' 'target']


In [32]:
# # Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
df2List = []
for sub in dfResWindows['subject'].unique() :
    for trialtype in dfResWindows['trial_type'].unique() :
        for subtype in dfResWindows['trial_subtype'].unique() :
            for num in dfResWindows['trial_num'].unique() :
                aux1 = dfResWindows[(dfResWindows['subject'] == sub) & 
                                         (dfResWindows['trial_type'] == trialtype) & 
                                         (dfResWindows['trial_subtype'] == subtype) & 
                                         (dfResWindows['trial_num'] == num)]
                if (aux1.shape[0] > 0) :
                    if (trialtype == 'Falls') :
                        peak_index = aux1['waist resultant acceleration_mean'].idxmax()
                        for i in range(peak_index-4,peak_index+4) : # Add the target 1 to the window
                            aux1.set_value(i, 'target', 1)
                    df2List.append(aux1)

fulldf2 = pd.concat(df2List)

In [33]:
y_train = fulldf2[(fulldf2['subject'] >= 6)]['target']
X_train = fulldf2[(fulldf2['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

y_test = fulldf2[(fulldf2['subject'] < 6)]['target']
X_test = fulldf2[(fulldf2['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Confusion matrix : 
TN : 8627 FP : 114
FN : 509 TP : 331

Accuracy : 0.934975472289
Recall : 0.394047619048
Precision : 0.743820224719
F-measure :0.515175097276
Sensitivity : 39.4047619048
Specificity : 98.6958013957


# 3. Resultant Difference Window

In [7]:
# group in intervals of 0.5 seconds, calculating the mean
df3_window_mean = df3.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='1000000us')]).mean()
df3_window_mean = df3_window_mean.reset_index()

# renaming the acceleration measurement columns, including a '_mean' in the end
for col in df3.columns.values :
    if ('Acceleration' in col) or ('Velocity' in col) or ('Magnetic' in col) or ('resultant' in col) :
        df3_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

In [8]:
# group in intervals of 0.5 seconds, calculating the variance

df3_window_variance = df3.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='1000000us')]).var()
df3_window_variance = df3_window_variance.reset_index()

# renaming the acceleration measurement columns, including a '_variance' in the end
for col in df3.columns.values :
    if ('Acceleration' in col) or ('Velocity' in col) or ('Magnetic' in col) or ('resultant' in col) :
        df3_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    

In [9]:
# final dataframe, with all accelerometer columns (means and variances)
df3_all_windows = pd.merge(df3_window_mean, df3_window_variance,on=['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime'])

# This dataframe will be used in case we decide to try different preprocessing steps
df3_all_windows = df3_all_windows.dropna(axis=0, how='any')

In [10]:
df3ResWindows = filterCols(df3_all_windows,['Acceleration'],['waist'],True)
df3ResWindows['target'] = df3ResWindows.apply(lambda row: generateTarget2(row),axis=1)
print(df3ResWindows.columns.values)

['subject' 'trial_type' 'trial_subtype' 'trial_num' 'trial_num_original'
 'time_datetime' 'waist Acceleration Z (m/s^2)_var'
 'waist Acceleration X (m/s^2)_mean' 'waist resultant acceleration_var'
 'waist Acceleration X (m/s^2)_var' 'waist Acceleration Y (m/s^2)_var'
 'waist Acceleration Y (m/s^2)_mean' 'waist Acceleration Z (m/s^2)_mean'
 'waist resultant acceleration_mean' 'target']


In [11]:
# # Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
df3List = []
for sub in df3ResWindows['subject'].unique() :
    for trialtype in df3ResWindows['trial_type'].unique() :
        for subtype in df3ResWindows['trial_subtype'].unique() :
            for num in df3ResWindows['trial_num'].unique() :
                aux1 = df3ResWindows[(df3ResWindows['subject'] == sub) & 
                                         (df3ResWindows['trial_type'] == trialtype) & 
                                         (df3ResWindows['trial_subtype'] == subtype) & 
                                         (df3ResWindows['trial_num'] == num)]
                aux1['resultant_diff'] = df3ResWindows['waist resultant acceleration_mean'].diff().fillna(0)
                df3List.append(aux1)

df3resdiff3 = pd.concat(df3List)

In [13]:
print(df3resdiff3.columns.values)

['subject' 'trial_type' 'trial_subtype' 'trial_num' 'trial_num_original'
 'time_datetime' 'waist Acceleration Z (m/s^2)_var'
 'waist Acceleration X (m/s^2)_mean' 'waist resultant acceleration_var'
 'waist Acceleration X (m/s^2)_var' 'waist Acceleration Y (m/s^2)_var'
 'waist Acceleration Y (m/s^2)_mean' 'waist Acceleration Z (m/s^2)_mean'
 'waist resultant acceleration_mean' 'target' 'resultant_diff']


In [14]:
# # Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
# This takes a while to run (about at least 10-15mins)
df3List = []
for sub in df3resdiff3['subject'].unique() :
    for trialtype in df3resdiff3['trial_type'].unique() :
        for subtype in df3resdiff3['trial_subtype'].unique() :
            for num in df3resdiff3['trial_num'].unique() :
                aux1 = df3resdiff3[(df3resdiff3['subject'] == sub) & 
                                         (df3resdiff3['trial_type'] == trialtype) & 
                                         (df3resdiff3['trial_subtype'] == subtype) & 
                                         (df3resdiff3['trial_num'] == num)]
                if (aux1.shape[0] > 0) :
                    if (trialtype == 'Falls') :
                        peak_index = aux1['resultant_diff'].idxmax()
                        for i in range(peak_index-2,peak_index+2) : # Add the target 1 to the window
                            aux1.set_value(i, 'target', 1)
                    df3List.append(aux1)

fulldf3 = pd.concat(df3List)

In [20]:
y_train = fulldf3[(fulldf3['subject'] >= 6)]['target']
X_train = fulldf3[(fulldf3['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

y_test = fulldf3[(fulldf3['subject'] < 6)]['target']
X_test = fulldf3[(fulldf3['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Confusion matrix : 
TN : 4455 FP : 65
FN : 210 TP : 210

Accuracy : 0.944331983806
Recall : 0.5
Precision : 0.763636363636
F-measure :0.604316546763
Sensitivity : 50.0
Specificity : 98.5619469027


# Convolutional Neural Networks

In [9]:
df4 = filterCols(df4,['Acceleration'],['waist'],True)
df4['target'] = df4.apply(lambda row: generateTarget2(row),axis=1)
print(df4.columns.values)

['subject' 'trial_type' 'trial_subtype' 'trial_num' 'trial_num_original'
 'time_datetime' 'waist Acceleration Y (m/s^2)'
 'waist Acceleration Z (m/s^2)' 'waist Acceleration X (m/s^2)'
 'waist resultant acceleration' 'target']


In [11]:
# # Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
df4List = []
for sub in df4['subject'].unique() :
    for trialtype in df4['trial_type'].unique() :
        for subtype in df4['trial_subtype'].unique() :
            for num in df4['trial_num'].unique() :
                aux1 = df4[(df4['subject'] == sub) & 
                                         (df4['trial_type'] == trialtype) & 
                                         (df4['trial_subtype'] == subtype) & 
                                         (df4['trial_num'] == num)]
                if (aux1.shape[0] > 0) :
                    if (trialtype == 'Falls') :
                        peak_index = aux1['waist resultant acceleration'].idxmax()
                        for i in range(peak_index-256,peak_index+256) : # Add the target 1 to the window
                            aux1.set_value(i, 'target', 1)
                    df4List.append(aux1)

fulldf4 = pd.concat(df4List)

In [18]:
y_train = fulldf4[(fulldf4['subject'] >= 6)]['target']
X_train = fulldf4[(fulldf4['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

y_test = fulldf4[(fulldf4['subject'] < 6)]['target']
X_test = fulldf4[(fulldf4['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 
                    'trial_num','target','time_datetime'],axis=1)

In [14]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D,MaxPooling1D, Flatten
from keras.optimizers import SGD

Using TensorFlow backend.


In [19]:
X_train = X_train.as_matrix()
X_test = X_test.as_matrix()

In [63]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
conv = Sequential()
conv.add(Conv1D(filters=4, kernel_size=1, input_shape = X_train.shape[1:4], activation = 'relu'))
conv.add(MaxPooling1D(4))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'binary_crossentropy', optimizer = sgd, metrics = ['accuracy'])
conv.fit(X_train, y_train, batch_size = 500, epochs = 50, verbose = 0)

ValueError: Error when checking input: expected conv1d_9_input to have shape (None, 10, 1) but got array with shape (595502, 4, 1)

In [58]:
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
metrics = conv.evaluate(X_test,y_test,verbose=0)
print("Loss : "+str(metrics[0]))
print("Accuracy : "+str(metrics[1]))

Loss : 0.302806179657
Accuracy : 0.909895825453


In [59]:
tp,tn,fp,fn = 0,0,0,0
i = 0
while i < X_test.shape[0] :
    elem = X_test[i:i+4]
    pred = conv.predict(elem)
    realVal = y_test[i:i+4]
    
    # Real value
    totalReal = 0
    y = -1
    for val in realVal : 
        totalReal += val
    totalReal = totalReal / 4
    if totalReal >= 0.5 : 
        y = 1
    else : 
        y = 0
        
    # Predicted value
    total = 0
    predVal = -1
    for num in pred : 
        total += num
    total = total / 4
    if (total >= 0.5) :
        predVal = 1
    else : 
        predVal = 0
        
    if (y == 1) and (predVal == 1) :
        tp += 1
    elif (y == 0) and (predVal == 0) :
        tn += 1
    elif (y == 1) and (predVal == 0) :
        fn += 1
    elif (y == 0) and (predVal == 1) :
        fp += 1
     
    i += 4

In [60]:
specificity = (tn / (fp + tn))*100
sensitivity = (tp / (tp + fn))*100
accuracy = ((tn+tp) / (tp + tn + fp + fn))
print("Confusion matrix : ")
print("TN : "+str(tn) + " FP : " +str(fp))
print("FN : "+str(fn) + " TP : " +str(tp))
print("")
print("Accuracy : "+str(accuracy))
print("Sensitivity : "+str(sensitivity))
print("Specificity : "+str(specificity))

Confusion matrix : 
TN : 135292 FP : 0
FN : 13425 TP : 0

Accuracy : 0.9097278724019446
Sensitivity : 0.0
Specificity : 100.0
