In [1]:
# adding more features, esp fft related ones 

In [2]:
import numpy as np

from sklearn import preprocessing, svm
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import matplotlib.pyplot as plt

import scipy.stats as st
from scipy.fftpack import fft, fftfreq
from scipy.signal import argrelextrema
import operator

In [3]:
# load body acceleration raw signals 
# x axis
body_acc_x_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_x_train.txt')
body_acc_x_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_x_test.txt')
# y axis
body_acc_y_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_y_train.txt')
body_acc_y_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_y_test.txt')
# z axis
body_acc_z_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_acc_z_train.txt')
body_acc_z_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_acc_z_test.txt')

In [4]:
# load total acceleration raw signals 
# x axis
total_acc_x_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_x_train.txt')
total_acc_x_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_x_test.txt')
# y axis
total_acc_y_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_y_train.txt')
total_acc_y_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_y_test.txt')
# z axis
total_acc_z_train = np.loadtxt('./HARDataset/train/Inertial Signals/total_acc_z_train.txt')
total_acc_z_test = np.loadtxt('./HARDataset/test/Inertial Signals/total_acc_z_test.txt')

In [5]:
# load body gyroscope raw signals 
# x axis
body_gyro_x_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_x_train.txt')
body_gyro_x_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_x_test.txt')
# y axis
body_gyro_y_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_y_train.txt')
body_gyro_y_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_y_test.txt')
# z axis
body_gyro_z_train = np.loadtxt('./HARDataset/train/Inertial Signals/body_gyro_z_train.txt')
body_gyro_z_test = np.loadtxt('./HARDataset/test/Inertial Signals/body_gyro_z_test.txt')

In [6]:
body_acc_x_train.shape, body_acc_x_test.shape

((7352, 128), (2947, 128))

In [7]:
total_acc_x_train.shape, total_acc_x_test.shape

((7352, 128), (2947, 128))

In [8]:
body_gyro_x_train.shape, body_gyro_x_test.shape

((7352, 128), (2947, 128))

In [9]:
# load label vectors
y_train = np.loadtxt('./HARDataset/train/y_train.txt')
y_test = np.loadtxt('./HARDataset/test/y_test.txt')

In [10]:
y_train.shape, y_test.shape

((7352,), (2947,))

In [11]:
label_names = ['Walking', 'Walking upstairs', 'Walking downstairs', 'Sitting', 'Standing', 'Laying']

In [12]:
def stat_area_features(x, Te=1.0):
    # mean
    mean_ts = np.mean(x, axis=1).reshape(-1, 1)
    # max
    max_ts = np.amax(x, axis=1).reshape(-1, 1)
    # min
    min_ts = np.amin(x, axis=1).reshape(-1, 1)
    # std
    std_ts = np.std(x, axis=1).reshape(-1, 1)
    # skew
    skew_ts = st.skew(x, axis=1).reshape(-1, 1)
    # kurtosis
    kurtosis_ts = st.kurtosis(x, axis=1).reshape(-1, 1)
    # interquartile range
    iqr_ts = st.iqr(x, axis=1).reshape(-1, 1)
    # median absolute deviation
    mad_ts = np.median(np.sort(abs(x - np.mean(x, axis=1).reshape(-1, 1)), axis=1), axis=1).reshape(-1, 1)
    # area under curve
    area_ts = np.trapz(x, axis=1, dx=Te).reshape(-1, 1)
    # area under curve ** 2
    sq_area_ts = np.trapz(x ** 2, axis=1, dx=Te).reshape(-1, 1)
    
    return np.concatenate((mean_ts, max_ts, min_ts, std_ts, skew_ts, kurtosis_ts, iqr_ts, 
                           mad_ts, area_ts, sq_area_ts), axis=1)

In [13]:
# stats for train data

In [14]:
body_acc_x_train_stats = stat_area_features(body_acc_x_train)
body_acc_y_train_stats = stat_area_features(body_acc_y_train)
body_acc_z_train_stats = stat_area_features(body_acc_z_train)

In [15]:
body_acc_x_train_stats.shape, body_acc_y_train_stats.shape, body_acc_z_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [16]:
total_acc_x_train_stats = stat_area_features(total_acc_x_train)
total_acc_y_train_stats = stat_area_features(total_acc_y_train)
total_acc_z_train_stats = stat_area_features(total_acc_z_train)

In [17]:
total_acc_x_train_stats.shape, total_acc_y_train_stats.shape, total_acc_z_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [18]:
body_gyro_x_train_stats = stat_area_features(body_gyro_x_train)
body_gyro_y_train_stats = stat_area_features(body_gyro_y_train)
body_gyro_z_train_stats = stat_area_features(body_gyro_z_train)

In [19]:
body_gyro_x_train_stats.shape, body_gyro_y_train_stats.shape, body_gyro_z_train_stats.shape

((7352, 10), (7352, 10), (7352, 10))

In [20]:
# stats for test data

In [21]:
body_acc_x_test_stats = stat_area_features(body_acc_x_test)
body_acc_y_test_stats = stat_area_features(body_acc_y_test)
body_acc_z_test_stats = stat_area_features(body_acc_z_test)

In [22]:
body_acc_x_test_stats.shape, body_acc_y_test_stats.shape, body_acc_z_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [23]:
total_acc_x_test_stats = stat_area_features(total_acc_x_test)
total_acc_y_test_stats = stat_area_features(total_acc_y_test)
total_acc_z_test_stats = stat_area_features(total_acc_z_test)

In [24]:
total_acc_x_test_stats.shape, total_acc_y_test_stats.shape, total_acc_z_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [25]:
body_gyro_x_test_stats = stat_area_features(body_gyro_x_test)
body_gyro_y_test_stats = stat_area_features(body_gyro_y_test)
body_gyro_z_test_stats = stat_area_features(body_gyro_z_test)

In [26]:
body_gyro_x_test_stats.shape, body_gyro_y_test_stats.shape, body_gyro_z_test_stats.shape

((2947, 10), (2947, 10), (2947, 10))

In [53]:
# def frequency_domain_features(x, Te=1.0):
    
#     # figuring out scipy fft, fftfreq
#     # also figuring out jerk signal derivative at end of previous notebook
#     # Te bit just ain't working - not clear why
    
#     # as DFT coefficients and their corresponding frequencies are symetrical arrays with respect to
#     # the middle of the array, need to control for whether samples in x are odd or even to then split arrays
#     if x.shape[1]%2 == 0:
#         N = int(x.shape[1]/2)
#     else: 
#         N = int(x.shape[1]/2) - 1
#     xf = np.repeat_
    

In [54]:
def make_feature_vector(x, y, z, Te=1.0):
    
    # raw signals: stats and area features
    features_xt = stat_area_features(x, Te=Te)

SyntaxError: invalid syntax (<ipython-input-54-575be965ada5>, line 1)

In [28]:
# creating X_train and X_test 

In [29]:
X_train = np.concatenate((body_acc_x_train_stats, body_acc_y_train_stats, body_acc_z_train_stats, 
                          total_acc_x_train_stats, total_acc_y_train_stats, total_acc_z_train_stats, 
                          body_gyro_x_train_stats, body_gyro_y_train_stats, body_gyro_z_train_stats), axis=1)

In [30]:
X_test = np.concatenate((body_acc_x_test_stats, body_acc_y_test_stats, body_acc_z_test_stats, 
                          total_acc_x_test_stats, total_acc_y_test_stats, total_acc_z_test_stats, 
                          body_gyro_x_test_stats, body_gyro_y_test_stats, body_gyro_z_test_stats), axis=1)

In [31]:
X_train.shape, X_test.shape

((7352, 90), (2947, 90))

In [32]:
# scaling data makes big differnce! to all! 

In [33]:
from sklearn.preprocessing import StandardScaler

In [34]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [35]:
#can one do a simple log reg first?
from sklearn.linear_model import LogisticRegression

In [36]:
logreg = LogisticRegression()

In [37]:
logreg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [38]:
logreg.score(X_train, y_train)

0.9530739934711643

In [39]:
y_pred_logreg = logreg.predict(X_test)

In [40]:
print(classification_report(y_test, y_pred_logreg))

              precision    recall  f1-score   support

         1.0       0.82      0.76      0.79       496
         2.0       0.82      0.76      0.79       471
         3.0       0.76      0.93      0.84       420
         4.0       0.88      0.90      0.89       491
         5.0       0.90      0.89      0.90       532
         6.0       1.00      0.95      0.97       537

    accuracy                           0.87      2947
   macro avg       0.86      0.87      0.86      2947
weighted avg       0.87      0.87      0.87      2947



In [41]:
from sklearn import svm

svm = svm.SVC()

In [42]:
svm.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [43]:
svm.score(X_train, y_train)

0.9581066376496191

In [44]:
y_pred_svm = svm.predict(X_test)

In [45]:
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

         1.0       0.85      0.83      0.84       496
         2.0       0.85      0.86      0.86       471
         3.0       0.91      0.93      0.92       420
         4.0       0.81      0.82      0.82       491
         5.0       0.85      0.82      0.83       532
         6.0       0.97      1.00      0.98       537

    accuracy                           0.88      2947
   macro avg       0.87      0.88      0.88      2947
weighted avg       0.88      0.88      0.88      2947



In [46]:
from sklearn.ensemble import RandomForestClassifier

In [47]:
rfc = RandomForestClassifier()

In [48]:
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [49]:
rfc.score(X_train, y_train)

1.0

In [50]:
y_pred_rfc = rfc.predict(X_test)

In [51]:
print(classification_report(y_test, y_pred_rfc))

              precision    recall  f1-score   support

         1.0       0.72      0.79      0.75       496
         2.0       0.80      0.73      0.76       471
         3.0       0.90      0.88      0.89       420
         4.0       0.81      0.92      0.86       491
         5.0       0.92      0.80      0.86       532
         6.0       1.00      1.00      1.00       537

    accuracy                           0.86      2947
   macro avg       0.86      0.85      0.85      2947
weighted avg       0.86      0.86      0.86      2947



In [52]:
# cool this got better - code still messy and can be improved - how? use functions? 
# add more features? perhaps 