In [1]:
import os
import os.path
import pandas as pd
import numpy as np
import scipy
from scipy import stats

In [2]:
x = []
y = []
cnt = 0
ind = []
for dirpath, dirnames, filenames in os.walk("../SisFall_dataset"):
    for filename in [f for f in filenames if f.endswith(".txt")]:
        if(filename != "Readme.txt"):
            
            if filename[0] == "D":
                # ADL file
                df = pd.read_csv(os.path.join(dirpath, filename),header=None)
                ax = np.array(df[0])*32/8192
                ay = np.array(df[1])*32/8192
                az = np.array(df[2])*32/8192
                aax = ax[np.arange(0,ax.shape[0],8)]
                aay = ay[np.arange(0,ax.shape[0],8)]
                aaz = az[np.arange(0,ax.shape[0],8)]
                s = np.sqrt(aax**2 + aay**2 + aaz**2)
                for i in range(s.shape[0]):
                    if s[i] >= 1.6:
                        st = i - 38
                        ed =  i + 39
                        if st < 0 or ed >= s.shape[0]:
                            continue
                        
                        win = s[st:ed]
                        p = win/np.sum(win)
                        feature = np.array([np.min(win),np.max(win),np.mean(win),np.median(win)
                                  ,scipy.stats.iqr(win),np.var(win), np.std(win),np.mean(np.absolute(win - np.mean(win)))
                                   ,np.sqrt(np.mean(win**2)), scipy.stats.entropy(p)
                                    ,scipy.stats.skew(win),scipy.stats.kurtosis(win)
                                  ])

                        
                        x.append(feature)
                        y.append(1)


            else:
                # Fall Data
                df = pd.read_csv(os.path.join(dirpath, filename),header=None)
                ax = np.array(df[0])*32/8192
                ay = np.array(df[1])*32/8192
                az = np.array(df[2])*32/8192
                m = 0
                aax = ax[np.arange(m,ax.shape[0],8)]
                aay = ay[np.arange(m,ax.shape[0],8)]
                aaz = az[np.arange(m,ax.shape[0],8)]
                s = np.sqrt(aax**2 + aay**2 + aaz**2)
                i = np.argmax(s)
                st = i - 38
                ed =  i + 39

                if st < 0 or ed >= s.shape[0] or s[i] < 1.6:
                    continue

                win = s[st:ed]
                p = win/np.sum(win)
                feature = np.array([np.min(win),np.max(win),np.mean(win),np.median(win)
                                  ,scipy.stats.iqr(win),np.var(win), np.std(win),np.mean(np.absolute(win - np.mean(win)))
                                   ,np.sqrt(np.mean(win**2)), scipy.stats.entropy(p)
                                    ,scipy.stats.skew(win),scipy.stats.kurtosis(win)
                                  ])

                        
                x.append(feature)

                y.append(0)


In [3]:
X = np.array(x)
Y = np.array(y)

print(X.shape,Y.shape)
from collections import Counter
cnt =  Counter(Y)
print(cnt)

(78723, 12) (78723,)
Counter({1: 76925, 0: 1798})


In [5]:
from sklearn.model_selection import train_test_split
X_tr, X_test, y_tr, y_test = train_test_split(X, Y, test_size=0.2, random_state=32)

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
over = SMOTE(sampling_strategy=1)

steps = [('o', over)]
pipeline = Pipeline(steps=steps)
X_train, y_train = pipeline.fit_resample(X_tr, y_tr)
print(X_train.shape,y_train.shape)
from collections import Counter
cnt =  Counter(y_train)
print(cnt)

(123040, 12) (123040,)
Counter({1: 61520, 0: 61520})


In [6]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(kernel='rbf'))
clf.fit(X_train, y_train)


In [7]:
print(clf.score(X_test,y_test))

0.9702127659574468


In [8]:
from sklearn.metrics import confusion_matrix


predictions = clf.predict(X)

c = confusion_matrix(y, predictions)
print('Confusion matrix:\n', c)
print('Accuracy', (c[0, 0]+c[1, 1]) / (c[0, 1] + c[0, 0] + c[1, 1] + c[1, 0]))
print('sensitivity', c[0, 0] / (c[0, 1] + c[0, 0]))
print('specificity', c[1, 1] / (c[1, 1] + c[1, 0]))

Confusion matrix:
 [[ 1752    46]
 [ 2113 74812]]
Accuracy 0.9725747240323667
sensitivity 0.9744160177975528
specificity 0.9725316867078323


In [9]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(X_train, y_train)


In [10]:
print(neigh.score(X_test,y_test))

0.9932041918069229


In [11]:
from sklearn.metrics import confusion_matrix


predictions = neigh.predict(X)

# y_test_flat = np.argmax(y_test, axis=-1)
c = confusion_matrix(y, predictions)
print('Confusion matrix:\n', c)
print('Accuracy', (c[0, 0]+c[1, 1]) / (c[0, 1] + c[0, 0] + c[1, 1] + c[1, 0]))
print('sensitivity', c[0, 0] / (c[0, 1] + c[0, 0]))
print('specificity', c[1, 1] / (c[1, 1] + c[1, 0]))

Confusion matrix:
 [[ 1751    47]
 [   60 76865]]
Accuracy 0.9986408038311548
sensitivity 0.9738598442714127
specificity 0.9992200194995126


In [15]:

from xgboost import XGBClassifier
XGB = XGBClassifier(max_depth = 3, 
                        n_estimators = 5,
                        n_jobs = 5)
xgb = XGB.fit(X_train, y_train) 

# clf.fit(X_train, y_train)


In [16]:
from sklearn.metrics import accuracy_score
y_test_pred = xgb.predict(X)
accuracy_score(y,y_test_pred)

0.9623871041499943

In [17]:
from sklearn.metrics import confusion_matrix


c = confusion_matrix(y, y_test_pred)
print('Confusion matrix:\n', c)
print('Accuracy', (c[0, 0]+c[1, 1]) / (c[0, 1] + c[0, 0] + c[1, 1] + c[1, 0]))
print('sensitivity', c[0, 0] / (c[0, 1] + c[0, 0]))
print('specificity', c[1, 1] / (c[1, 1] + c[1, 0]))

Confusion matrix:
 [[ 1722    76]
 [ 2885 74040]]
Accuracy 0.9623871041499943
sensitivity 0.9577308120133482
specificity 0.9624959376015599
