In [77]:
# adding jerk features to stat features

In [2]:
import numpy as np

from sklearn import preprocessing, svm
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import matplotlib.pyplot as plt

import scipy.stats as st
from scipy.fftpack import fft, fftfreq
from scipy.signal import argrelextrema
import operator

In [3]:
# load body acceleration raw signals 
# x axis
bx_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_x_train.txt')
bx_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_x_test.txt')
# y axis
by_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_y_train.txt')
by_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_y_test.txt')
# z axis
bz_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_z_train.txt')
bz_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_z_test.txt')

In [4]:
# load total acceleration raw signals 
# x axis
tx_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_x_train.txt')
tx_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_x_test.txt')
# y axis
ty_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_y_train.txt')
ty_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_y_test.txt')
# z axis
tz_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_z_train.txt')
tz_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_z_test.txt')

In [5]:
# load body gyroscope raw signals 
# x axis
gx_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_x_train.txt')
gx_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_x_test.txt')
# y axis
gy_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_y_train.txt')
gy_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_y_test.txt')
# z axis
gz_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_z_train.txt')
gz_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_z_test.txt')

In [6]:
bx_train.shape, bx_test.shape

((7352, 128), (2947, 128))

In [7]:
tx_train.shape, tx_test.shape

((7352, 128), (2947, 128))

In [8]:
gx_train.shape, gx_test.shape

((7352, 128), (2947, 128))

In [9]:
# load label vectors
y_train = np.loadtxt('./HARDataset/train/y_train.txt')
y_test = np.loadtxt('./HARDataset/test/y_test.txt')

In [10]:
y_train.shape, y_test.shape

((7352,), (2947,))

In [11]:
label_names = ['Walking', 'Walking upstairs', 'Walking downstairs', 'Sitting', 'Standing', 'Laying']

In [12]:
def stat_area_features(x, Te=1.0):
    # mean
    mean_ts = np.mean(x, axis=1).reshape(-1, 1)
    # max
    max_ts = np.amax(x, axis=1).reshape(-1, 1)
    # min
    min_ts = np.amin(x, axis=1).reshape(-1, 1)
    # std
    std_ts = np.std(x, axis=1).reshape(-1, 1)
    # skew
    skew_ts = st.skew(x, axis=1).reshape(-1, 1)
    # kurtosis
    kurtosis_ts = st.kurtosis(x, axis=1).reshape(-1, 1)
    # interquartile range
    iqr_ts = st.iqr(x, axis=1).reshape(-1, 1)
    # median absolute deviation
    mad_ts = np.median(np.sort(abs(x - np.mean(x, axis=1).reshape(-1, 1)), axis=1), axis=1).reshape(-1, 1)
    # area under curve
    area_ts = np.trapz(x, axis=1, dx=Te).reshape(-1, 1)
    # area under curve ** 2
    sq_area_ts = np.trapz(x ** 2, axis=1, dx=Te).reshape(-1, 1)
    
    return np.concatenate((mean_ts, max_ts, min_ts, std_ts, skew_ts, kurtosis_ts, iqr_ts, 
                           mad_ts, area_ts, sq_area_ts), axis=1)

In [13]:
# stats for train data

In [18]:
bx_train_stats = stat_area_features(bx_train)
by_train_stats = stat_area_features(by_train)
bz_train_stats = stat_area_features(bz_train)

In [19]:
bx_train_stats.shape, by_train_stats.shape, bz_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [20]:
tx_train_stats = stat_area_features(tx_train)
ty_train_stats = stat_area_features(ty_train)
tz_train_stats = stat_area_features(tz_train)

In [21]:
tx_train_stats.shape, ty_train_stats.shape, tz_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [22]:
gx_train_stats = stat_area_features(gx_train)
gy_train_stats = stat_area_features(gy_train)
gz_train_stats = stat_area_features(gz_train)

In [23]:
gx_train_stats.shape, gy_train_stats.shape, gz_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [20]:
# stats for test data

In [26]:
bx_test_stats = stat_area_features(bx_test)
by_test_stats = stat_area_features(by_test)
bz_test_stats = stat_area_features(bz_test)

In [27]:
bx_test_stats.shape, by_test_stats.shape, bz_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [28]:
tx_test_stats = stat_area_features(tx_test)
ty_test_stats = stat_area_features(ty_test)
tz_test_stats = stat_area_features(tz_test)

In [29]:
tx_test_stats.shape, ty_test_stats.shape, tz_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [30]:
gx_test_stats = stat_area_features(gx_test)
gy_test_stats = stat_area_features(gy_test)
gz_test_stats = stat_area_features(gz_test)

In [31]:
gx_test_stats.shape, gy_test_stats.shape, gz_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [13]:
# jerk for train data

In [34]:
bx_train_jerk = stat_area_features((bx_train[:, 1:] - bx_train[:, :-1])/1.0)
by_train_jerk = stat_area_features((by_train[:, 1:] - by_train[:, :-1])/1.0)
bz_train_jerk = stat_area_features((bz_train[:, 1:] - bz_train[:, :-1])/1.0)

In [35]:
bx_train_jerk.shape, by_train_jerk.shape, bz_train_jerk.shape

((7352, 10), (7352, 10), (7352, 10))

In [36]:
tx_train_jerk = stat_area_features((tx_train[:, 1:] - tx_train[:, :-1])/1.0)
ty_train_jerk = stat_area_features((ty_train[:, 1:] - ty_train[:, :-1])/1.0)
tz_train_jerk = stat_area_features((tz_train[:, 1:] - tz_train[:, :-1])/1.0)

In [37]:
tx_train_jerk.shape, ty_train_jerk.shape, tz_train_jerk.shape

((7352, 10), (7352, 10), (7352, 10))

In [38]:
gx_train_jerk = stat_area_features((gx_train[:, 1:] - gx_train[:, :-1])/1.0)
gy_train_jerk = stat_area_features((gy_train[:, 1:] - gy_train[:, :-1])/1.0)
gz_train_jerk = stat_area_features((gz_train[:, 1:] - gz_train[:, :-1])/1.0)

In [39]:
gx_train_jerk.shape, gy_train_jerk.shape, gz_train_jerk.shape

((7352, 10), (7352, 10), (7352, 10))

In [20]:
# jerk for test data

In [40]:
bx_test_jerk = stat_area_features((bx_test[:, 1:] - bx_test[:, :-1])/1.0)
by_test_jerk = stat_area_features((by_test[:, 1:] - by_test[:, :-1])/1.0)
bz_test_jerk = stat_area_features((bz_test[:, 1:] - bz_test[:, :-1])/1.0)

In [41]:
bx_test_jerk.shape, by_test_jerk.shape, bz_test_jerk.shape

((2947, 10), (2947, 10), (2947, 10))

In [42]:
tx_test_jerk = stat_area_features((tx_test[:, 1:] - tx_test[:, :-1])/1.0)
ty_test_jerk = stat_area_features((ty_test[:, 1:] - ty_test[:, :-1])/1.0)
tz_test_jerk = stat_area_features((tz_test[:, 1:] - tz_test[:, :-1])/1.0)

In [43]:
tx_test_jerk.shape, ty_test_jerk.shape, tz_test_jerk.shape

((2947, 10), (2947, 10), (2947, 10))

In [44]:
gx_test_jerk = stat_area_features((gx_test[:, 1:] - gx_test[:, :-1])/1.0)
gy_test_jerk = stat_area_features((gy_test[:, 1:] - gy_test[:, :-1])/1.0)
gz_test_jerk = stat_area_features((gz_test[:, 1:] - gz_test[:, :-1])/1.0)

In [45]:
gx_test_jerk.shape, gy_test_jerk.shape, gz_test_jerk.shape

((2947, 10), (2947, 10), (2947, 10))

In [53]:
# def frequency_domain_features(x, Te=1.0):
    
#     # figuring out scipy fft, fftfreq
#     # also figuring out jerk signal derivative at end of previous notebook
#     # Te bit just ain't working - not clear why
    
#     # as DFT coefficients and their corresponding frequencies are symetrical arrays with respect to
#     # the middle of the array, need to control for whether samples in x are odd or even to then split arrays
#     if x.shape[1]%2 == 0:
#         N = int(x.shape[1]/2)
#     else: 
#         N = int(x.shape[1]/2) - 1
#     xf = np.repeat_
    

In [28]:
# creating X_train and X_test 

In [46]:
X_train = np.concatenate((bx_train_stats, by_train_stats, bz_train_stats, 
                          tx_train_stats, ty_train_stats, tz_train_stats, 
                          gx_train_stats, gy_train_stats, gz_train_stats, 
                          bx_train_jerk, by_train_jerk, bz_train_jerk, 
                          tx_train_jerk, ty_train_jerk, tz_train_jerk, 
                          gx_train_jerk, gy_train_jerk, gz_train_jerk), axis=1)

In [47]:
X_test = np.concatenate((bx_test_stats, by_test_stats, bz_test_stats, 
                          tx_test_stats, ty_test_stats, tz_test_stats, 
                          gx_test_stats, gy_test_stats, gz_test_stats,
                          bx_test_jerk, by_test_jerk, bz_test_jerk, 
                          tx_test_jerk, ty_test_jerk, tz_test_jerk, 
                          gx_test_jerk, gy_test_jerk, gz_test_jerk), axis=1)

In [48]:
X_train.shape, X_test.shape

((7352, 180), (2947, 180))

In [49]:
# scaling data makes big differnce! to all! 

In [50]:
from sklearn.preprocessing import StandardScaler

In [51]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [52]:
#can one do a simple log reg first?
from sklearn.linear_model import LogisticRegression

In [53]:
logreg = LogisticRegression()

In [54]:
logreg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [56]:
logreg.score(X_train, y_train) # adding jerk: 0.95 to 0.97

0.970076169749728

In [57]:
y_pred_logreg = logreg.predict(X_test)

In [59]:
print(classification_report(y_test, y_pred_logreg)) # adding jerk accuracy: 0.87 to 0.93

              precision    recall  f1-score   support

         1.0       0.92      0.90      0.91       496
         2.0       0.96      0.91      0.94       471
         3.0       0.88      0.96      0.92       420
         4.0       0.87      0.91      0.89       491
         5.0       0.92      0.88      0.90       532
         6.0       1.00      0.99      1.00       537

    accuracy                           0.93      2947
   macro avg       0.92      0.93      0.92      2947
weighted avg       0.93      0.93      0.93      2947



In [62]:
from sklearn import svm

svm = svm.SVC()

In [63]:
svm.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [65]:
svm.score(X_train, y_train) # adding jerk 0.95 to 0.96

0.9635473340587595

In [66]:
y_pred_svm = svm.predict(X_test)

In [67]:
print(classification_report(y_test, y_pred_svm)) # adding jerk accuracy 0.88 to 0.91 

              precision    recall  f1-score   support

         1.0       0.93      0.93      0.93       496
         2.0       0.99      0.95      0.97       471
         3.0       0.92      0.96      0.94       420
         4.0       0.83      0.81      0.82       491
         5.0       0.84      0.84      0.84       532
         6.0       0.98      1.00      0.99       537

    accuracy                           0.91      2947
   macro avg       0.91      0.91      0.91      2947
weighted avg       0.91      0.91      0.91      2947



In [68]:
from sklearn.ensemble import RandomForestClassifier

In [69]:
rfc = RandomForestClassifier()

In [70]:
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [72]:
rfc.score(X_train, y_train) # 1.0 already without jerk 

1.0

In [73]:
y_pred_rfc = rfc.predict(X_test)

In [76]:
print(classification_report(y_test, y_pred_rfc)) # adding jerk  accuracy 0.86 to 0.88

              precision    recall  f1-score   support

         1.0       0.84      0.87      0.85       496
         2.0       0.85      0.85      0.85       471
         3.0       0.92      0.88      0.90       420
         4.0       0.77      0.97      0.86       491
         5.0       0.97      0.73      0.83       532
         6.0       1.00      1.00      1.00       537

    accuracy                           0.88      2947
   macro avg       0.89      0.88      0.88      2947
weighted avg       0.89      0.88      0.88      2947



In [52]:
# cool this got better - code still messy and can be improved - how? use functions? 
# add more features? perhaps 