In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import glob, os
import datetime
%matplotlib inline

from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score



In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Load full database
df = pickle.load(open("../../../dataResultants/dataset_consolidated.p", "rb"))

# # Load metadata
# meta = pickle.load(open("../../../data/metadata.p", "rb"))

In [4]:
# Drop columns we won't be needing
df2 = df.copy()

df = df.drop(['target','Time','time_seconds'],axis=1)
# df will be used for the overlapping window models

In [5]:
# Get the data columns and separate them based on the sensor and the feature (accel, vel and magfield)

allcols = df.columns.values

groupcols = ['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime']
waistcols = []
ranklecols = []
lanklecols = []
rthighcols = []
lthighcols = []
headcols = []
sternumcols = []
accelcols = []
velcols = []
magcols = []
meancols = []
resultantcols = []
varcols = []

for col in allcols : 
    if 'r.ankle' in col :
        ranklecols.append(col)
    if 'l.ankle' in col :
        lanklecols.append(col)
    if 'waist' in col :
        waistcols.append(col)
    if 'r.thigh' in col :
        rthighcols.append(col)
    if 'l.thigh' in col :
        lthighcols.append(col)
    if 'head' in col :
        headcols.append(col)
    if 'Velocity' in col :
        velcols.append(col)
    if 'Magnetic' in col :
        magcols.append(col)
    if 'Acceleration' in col :
        accelcols.append(col)
    if 'mean' in col :
        meancols.append(col)
    if 'var' in col :
        varcols.append(col)
    if 'resultant' in col :
        resultantcols.append(col)

In [6]:
df = df.drop(resultantcols,axis=1) # Drop the resultant columns from here

In [7]:
# group in intervals of 0.5 seconds, calculating the mean
df_window_mean = df.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).mean()
df_window_mean = df_window_mean.reset_index()

# renaming the acceleration measurement columns, including a '_mean' in the end
for col in accelcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

for col in velcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)
    
for col in magcols:
    df_window_mean.rename(columns={col: str(col+'_mean')}, inplace=True)

In [8]:
# group in intervals of 2 seconds, calculating the variance

df_window_variance = df.groupby(['subject','trial_type','trial_subtype','trial_num','trial_num_original',pd.Grouper(key='time_datetime', freq='500000us')]).var()
df_window_variance = df_window_variance.reset_index()

# renaming the acceleration measurement columns, including a '_variance' in the end

for col in accelcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    
for col in velcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)
    
for col in magcols : 
    df_window_variance.rename(columns={col: str(col+'_var')}, inplace=True)

In [9]:
# final dataframe, with all accelerometer columns (means and variances)
all_trials_window = pd.merge(df_window_mean, df_window_variance,on=['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original','time_datetime'])

# This dataframe will be used in case we decide to try different preprocessing steps
all_trials_window = all_trials_window.dropna(axis=0, how='any')

In [10]:
# Get the data columns and separate them based on the sensor and the feature (accel, vel and magfield)

allcols = all_trials_window.columns.values

groupcols = ['subject', 'trial_type', 'trial_subtype', 'trial_num','trial_num_original']
waistcols = []
ranklecols = []
lanklecols = []
rthighcols = []
lthighcols = []
headcols = []
sternumcols = []
accelcols = []
velcols = []
magcols = []
meancols = []
varcols = []

for col in allcols : 
    if 'r.ankle' in col :
        ranklecols.append(col)
    if 'l.ankle' in col :
        lanklecols.append(col)
    if 'waist' in col :
        waistcols.append(col)
    if 'r.thigh' in col :
        rthighcols.append(col)
    if 'l.thigh' in col :
        lthighcols.append(col)
    if 'head' in col :
        headcols.append(col)
    if 'Velocity' in col :
        velcols.append(col)
    if 'Magnetic' in col :
        magcols.append(col)
    if 'Acceleration' in col :
        accelcols.append(col)
    if 'mean' in col :
        meancols.append(col)
    if 'var' in col :
        varcols.append(col)

# Overlapping windows around axis acceleration peaks
#### First we find the biggest peak in each acceleration axis (be it a maximum or minimum peak) and we create a window that spans from the smallest peak-1 second to the biggest peak + 1 second. This creates a window that takes into account all 3 peaks. 
#### We'll be using only the waist for these tests

In [11]:
# Get just waist acceleration columns, time and groupcols
dfWaistAccels = all_trials_window[['time_datetime']+groupcols + list(set(accelcols) & set(waistcols))]

In [12]:

auxdf = dfWaistAccels

# Add absolute value of the acceleration means as new columns to auxdf
auxdf['AbsX'] = auxdf['waist Acceleration X (m/s^2)_mean'].abs()
auxdf['AbsY'] = auxdf['waist Acceleration Y (m/s^2)_mean'].abs()
auxdf['AbsZ'] = auxdf['waist Acceleration Z (m/s^2)_mean'].abs()

# Find the id of the rows with max absolute value for each axis
dfWaistAccels['YMax'] = auxdf.groupby(groupcols)['AbsY'].transform('idxmax')
dfWaistAccels['XMax'] = auxdf.groupby(groupcols)['AbsX'].transform('idxmax')
dfWaistAccels['ZMax'] = auxdf.groupby(groupcols)['AbsZ'].transform('idxmax')

# Find the max and min ids from the last section
dfWaistAccels['AxisMax'] = dfWaistAccels[["YMax", "XMax","ZMax"]].max(axis=1)
dfWaistAccels['AxisMin'] = dfWaistAccels[["YMax", "XMax","ZMax"]].min(axis=1)



In [14]:
# Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
dfList = []
for sub in dfWaistAccels['subject'].unique() :
    for trialtype in dfWaistAccels['trial_type'].unique() :
        for subtype in dfWaistAccels['trial_subtype'].unique() :
            for num in dfWaistAccels['trial_num'].unique() :
                aux1 = dfWaistAccels[(dfWaistAccels['subject'] == sub) & (dfWaistAccels['trial_type'] == trialtype) 
                    & (dfWaistAccels['trial_subtype'] == subtype) & (dfWaistAccels['trial_num'] == num)]
                aux2 = aux1[(aux1.index < aux1.AxisMax+2) & (aux1.index > aux1.AxisMin-2)]
                dfList.append(aux2)

fulldf = pd.concat(dfList)

In [15]:
# Dropping the columns we don't need anymore
fulldf = fulldf.drop(['XMax','ZMax','YMax','AxisMax','AxisMin','AbsX','AbsY','AbsZ'],axis=1)

# Adding a target column
def generateTarget(row) :
    if row['trial_type'] == 'ADLs' :
        return 0
    if row['trial_type'] == 'Near_Falls' :
        return 0
    if row['trial_type'] == 'Falls' :
        return 1
    
fulldf['target'] = fulldf.apply (lambda row: generateTarget(row),axis=1)

In [16]:
# We'll use this function to test our models from now on
def modelProcessing(X_train,y_train,X_test,y_test,model) :
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    
    print("Accuracy : "+str(accuracy_score(y_test,y_pred)))
    print("Recall : " +str(recall_score(y_test,y_pred)))
    print("Precision : "+str(precision_score(y_test,y_pred)))
    print("F-measure :"+str(f1_score(y_test,y_pred)))

## Using all variance and mean columns to predict and subjects 6-10 to train, 1-5 to test

In [17]:
y_train = fulldf[(fulldf['subject'] >= 6)]['target']
X_train = fulldf[(fulldf['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
y_test = fulldf[(fulldf['subject'] < 6)]['target']
X_test = fulldf[(fulldf['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Accuracy : 0.780831914376
Recall : 0.414669571532
Precision : 0.857357357357
F-measure :0.558981889378


## Using only mean columns 

In [18]:
y_train = fulldf[(fulldf['subject'] >= 6)]['target']
X_train = fulldf[(fulldf['subject'] >= 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
X_train = X_train.drop(['waist Acceleration X (m/s^2)_var','waist Acceleration Y (m/s^2)_var',
                        'waist Acceleration Z (m/s^2)_var'],axis=1)
y_test = fulldf[(fulldf['subject'] < 6)]['target']
X_test = fulldf[(fulldf['subject'] < 6)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime','target'],axis=1)
X_test = X_test.drop(['waist Acceleration X (m/s^2)_var','waist Acceleration Y (m/s^2)_var',
                        'waist Acceleration Z (m/s^2)_var'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)

Accuracy : 0.778156166383
Recall : 0.366739288308
Precision : 0.926605504587
F-measure :0.525494276795


# Resultant peak windows :
## Without making smaller windows first
#### Only with waist and acceleration for now

In [19]:
# Adding a target column
def generateTarget2(row) :
    return 0

In [20]:
# Get just waist acceleration columns, time and groupcols
# print(resultantcols)
resultantwaist = []
for col in resultantcols :
    if ("waist" in col) & ("acceleration" in col) :
        resultantwaist.append(col)
accelcols = []
waistcols = []
for col in df2.columns.values :
    if ("Acceleration" in col) :
        accelcols.append(col)
    if ("waist" in col) :
        waistcols.append(col)
    
df2WaistResultant = df2[['time_seconds']+groupcols + resultantwaist + list(set(accelcols) & set(waistcols))]
df2WaistResultant['target'] = df2WaistResultant.apply(lambda row: generateTarget2(row),axis=1)

In [37]:

# 128 times 0.00781 is approx 1 second.
# Meaning i need 256 rows up and 256 rows down

In [21]:
# # Creating the window for each subject,trialtype, subtype and number and combine them all into one single dataframe
df2List = []
for sub in df2WaistResultant['subject'].unique() :
    for trialtype in ['Falls'] :
        for subtype in df2WaistResultant['trial_subtype'].unique() :
            for num in df2WaistResultant['trial_num'].unique() :
                aux1 = df2WaistResultant[(df2WaistResultant['subject'] == sub) & 
                                         (df2WaistResultant['trial_type'] == trialtype) & 
                                         (df2WaistResultant['trial_subtype'] == subtype) & 
                                         (df2WaistResultant['trial_num'] == num)]
                if (aux1.shape[0] > 0) :
                    peak_index = aux1['waist resultant acceleration'].idxmax()
#                     time_peak = aux1.iloc[peak_index,aux1.columns.get_loc('time_seconds')]
#                     aux2 = aux1[(aux1.index < peak_index+2) & (aux1.index > peak_index-2)]
                    for i in range(peak_index-256,peak_index+256) : # Add the target 1 to the window
                        aux1.set_value(i, 'target', 1)
                    df2List.append(aux1)

fulldf2 = pd.concat(df2List)

In [None]:
# THIS PART OF THE CODE CAUSES THE KERNEL TO STOP AND RESTART
y_train = fulldf2[(fulldf2['subject'] >= 9)]['target']
X_train = fulldf2[(fulldf2['subject'] >= 9)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype','waist resultant acceleration', 
                    'trial_num', 'time_seconds','target','time_datetime'],axis=1)

y_test = fulldf2[(fulldf2['subject'] < 2)]['target']
X_test = fulldf2[(fulldf2['subject'] < 2)].drop(['trial_num_original',
                'trial_type', 'subject', 'trial_subtype','waist resultant acceleration', 
                    'trial_num', 'time_seconds','target','time_datetime'],axis=1)

clf = svm.SVC(decision_function_shape='ovo', cache_size=500000, coef0=0, C=1, gamma=0.01,  class_weight=None)
modelProcessing(X_train,y_train,X_test,y_test,clf)