In [1]:
import pandas as pd

fall_dataset = pd.read_csv("../pre_processed_dataset.csv")
fall_dataset.head()

Unnamed: 0,subject,trial_type,trial_subtype,trial_num,time_datetime,waist Acceleration X (m/s^2)_mean,waist Acceleration Y (m/s^2)_mean,waist Acceleration Z (m/s^2)_mean,waist Acceleration X (m/s^2)_variance,waist Acceleration Y (m/s^2)_variance,waist Acceleration Z (m/s^2)_variance
0,1,ADLs,AS,trial1,2011-05-07 20:39:05.000,-9.485754,-0.413151,2.40259,0.212882,0.066554,0.387473
1,1,ADLs,AS,trial1,2011-05-07 20:39:07.500,-9.061236,-0.146159,-2.566998,4.085101,1.400227,8.418117
2,1,ADLs,AS,trial1,2011-05-07 20:39:10.000,-9.205295,-0.58301,-4.060932,7.310128,1.231231,1.979515
3,1,ADLs,AS,trial1,2011-05-07 20:39:12.500,-9.348276,-0.473525,-2.822297,7.434103,1.403955,2.246406
4,1,ADLs,AS,trial1,2011-05-07 20:39:15.000,-9.236891,-0.61412,-2.572829,5.628038,1.477972,6.185981


In [2]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import LabelEncoder
import numpy as np

X = fall_dataset.drop(['trial_type', 'subject', 'trial_subtype', 'trial_num', 'time_datetime'], axis=1)
y = fall_dataset['trial_type'].values

le = LabelEncoder()
le.fit(['ADLs', 'Near_Falls', 'Falls'])
y = le.transform(y)
v = DictVectorizer(sparse=False)
X = X.values
X, y

(array([[ -9.48575426e+00,  -4.13150723e-01,   2.40259017e+00,
           2.12882201e-01,   6.65538192e-02,   3.87473157e-01],
        [ -9.06123550e+00,  -1.46158731e-01,  -2.56699811e+00,
           4.08510052e+00,   1.40022749e+00,   8.41811688e+00],
        [ -9.20529471e+00,  -5.83009612e-01,  -4.06093172e+00,
           7.31012817e+00,   1.23123084e+00,   1.97951487e+00],
        ..., 
        [ -9.02955157e+00,  -2.93286097e-01,  -2.44748599e+00,
           3.50228049e+00,   1.29646323e+00,   6.52206397e+00],
        [ -9.18225718e+00,  -9.49277256e-01,   3.06013741e+00,
           4.60111271e-02,   2.51660258e-02,   3.67029647e-01],
        [ -9.10446482e+00,  -1.03542865e+00,   3.44432050e+00,
           2.34900967e-03,   3.49081530e-03,   4.77366522e-03]]),
 array([0, 0, 0, ..., 2, 2, 2]))

### Doing multiclass classification and try to predict whether it is falls, near falls, or ADLS

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV

clf = svm.SVC(decision_function_shape='ovo', cache_size=200, C=1000, gamma=0.01)
print(clf)
clf.fit(X_train, y_train) 
clf.score(X_test, y_test)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


0.6222479721900348

### Using [SVC](http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)

In [5]:
clf = svm.SVC(cache_size=200, C=1000, gamma=0.01)
print(clf)
clf.fit(X_train, y_train) 
clf.score(X_test, y_test)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


0.6222479721900348

In [8]:
parameters = {'kernel':['rbf'], 'C':[1, 5, 10, 50, 100, 1000], 'gamma': [1.0, 0.1, 0.3, 0.01, 0.001, 0.0001], 'decision_function_shape': ['ovo', 'ovr']}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=10)
clf.fit(X_train, y_train)
print(clf.best_params_)
clf.score(X_test, y_test)

### Using [NuSVC](http://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVC.html#sklearn.svm.NuSVC)

In [7]:
parameters = {'kernel':['rbf'], 'tol':[0.1, 0.01, 0.001, 0.0001], 'gamma': [0.1, 0.01, 0.001, 'auto'], 'decision_function_shape': ['ovo', 'ovr']}
svc = svm.NuSVC()
clf = GridSearchCV(svc, parameters, cv=10)
clf.fit(X_train, y_train)
print(clf.best_params_)
clf.score(X_test, y_test)

{'decision_function_shape': 'ovo', 'gamma': 0.001, 'kernel': 'rbf', 'tol': 0.001}


0.56546929316338357

## Classifiy between fall vs not falls

In [6]:
classfication_coverted = np.vectorize( (lambda x: max(0, x-1)) )
y_train_binary = classfication_coverted(y_train)
y_test_binary = classfication_coverted(y_test)

parameters_svc = [
    {
        'kernel':['rbf'], 
        'C':[1, 5, 10, 50, 100, 1000], 
        'gamma': [1.0, 0.1, 0.3, 0.01, 0.001, 0.0001], 
        'decision_function_shape': ['ovo', 'ovr']
    },
    {
        'kernel': ['sigmoid'], 
        'coef0': [0.0, 1.0],
        'C': [1, 10, 100, 1000]
    }
]

parameters_linear = {
    'C':[1, 5, 10, 50, 100, 1000], 
    'dual': [False],
    'tol':[0.1, 0.01, 0.001, 0.0001],
    'fit_intercept': [True, False],
}

parameters_svc, parameters_linear


([{'C': [1, 5, 10, 50, 100, 1000],
   'decision_function_shape': ['ovo', 'ovr'],
   'gamma': [1.0, 0.1, 0.3, 0.01, 0.001, 0.0001],
   'kernel': ['rbf']},
  {'C': [1, 10, 100, 1000], 'coef0': [0.0, 1.0], 'kernel': ['sigmoid']}],
 {'C': [1, 5, 10, 50, 100, 1000],
  'dual': [False],
  'fit_intercept': [True, False],
  'tol': [0.1, 0.01, 0.001, 0.0001]})

In [15]:
print('[')
for i in y_test_binary:
    if i == 1:
        print(list(X_test[i]), ',')
print(']')

[
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.930775

[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525  0.88660068  5.8400903   0.13517084  0.03459784  0.06943125] ,
[ 7.93077525

In [7]:
y_test_binary

array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0,

### Using [SVC](http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)

In [8]:
svc = svm.SVC()
clf = GridSearchCV(svc, parameters_svc, cv=10)
clf.fit(X_train, y_train_binary)
print(clf.best_params_)
score = clf.score(X_test, y_test_binary)
print(score)

{'C': 50, 'decision_function_shape': 'ovo', 'gamma': 0.01, 'kernel': 'rbf'}
0.790266512167


In [15]:
import pickle
from sklearn.metrics import confusion_matrix

svm_model = open("svm_model.pickle","wb")
pickle.dump(clf, svm_model)
loaded_model = pickle.load( open( "svm_model.pickle", "rb" ) )
y_predict = loaded_model.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test_binary, y_predict).ravel()
specificity = tn / (tn+fp)
sensivity =  tp / (tp+fn)
sensivity, specificity

(0.17910447761194029, 0.97583081570996977)

### Using [LinearSVC](http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC)

In [19]:
svc = svm.LinearSVC()
clf = GridSearchCV(svc, parameters_linear, cv=10)
clf.fit(X_train, y_train_binary)
print(clf.best_params_)
clf.score(X_test, y_test_binary)

{'C': 1, 'dual': False, 'fit_intercept': True, 'tol': 0.001}


0.76593279258400926