# UCI Human Action Recognition

### Import

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import sklearn
from sklearn import metrics

In [3]:
# Import different classifiers
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier

## Functions

### read dataset

In [4]:
def read_data(file):
    data = pd.read_csv(file)
    
    # suffle data
    data = sklearn.utils.shuffle(data)
    
    X_data = data.drop(['subject', 'Activity', 'ActivityName'], axis=1)
    y_data = data.ActivityName
    
    return np.array(X_data), np.array(y_data)

### Classifiers:

In [5]:
def train_model(train_x, train_y, model_name='NB', validation=None):
    """
    Possible model names: ['NB', 'SVM', 'XGB', 'MLP', 'ADA', 'BAG', 'RF']
    default = 'NB'
    
    validation: (val_x, val_y) tupple for validation accuracy score.
    
    return: trained model
    """
    model = None
    if model_name == 'SVM':
        model = svm.SVC(gamma='scale', probability=True)
    elif model_name == 'XGB':
        model = XGBClassifier(n_estimators=200, max_depth=5, n_jobs=2)
#         model = XGBClassifier()
    elif model_name == 'MLP':
        model = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=800, alpha=0.0001,
                     solver='sgd', verbose=10, tol=0.000000001)
    elif model_name == 'ADA':
        model = AdaBoostClassifier(n_estimators=50)
    elif model_name == 'BAG':
        model = BaggingClassifier(n_jobs=2, n_estimators=50)
    elif model_name == 'RF':
        model = RandomForestClassifier(n_estimators=200, max_depth=10)
    elif model_name == 'KNN':
        model = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)
    else:
        model = GaussianNB()
    
    model.fit(train_x, train_y)
    
    if validation is not None:
        y_hat = model.predict(validation[0])
        acc = metrics.accuracy_score(validation[1], y_hat)
        print(f"Validation Accuracy in '{model_name}' = {acc}")
        cm = metrics.confusion_matrix(validation[1], y_hat)
        print(cm)
        recall = cm[0][0] / (cm[0][0] + cm[0][1])
        precision = cm[0][0] / (cm[0][0] + cm[1][0])
        f1 = 2*(precision*recall)/(precision+recall)
        print(f"Recall in '{model_name}' = {recall}")
        print(f"Precision in '{model_name}' = {precision}")
        print(f"F1 Score in '{model_name}' = {f1}")
               
    return model

## Main Cells

### Load data:

In [6]:
train.to_csv('C:/Users/dixit/Desktop/uci-har dataset/UCI HAR Dataset/data/train.csv', index=False)
test.to_csv('C:/Users/dixit/Desktop/uci-har dataset/UCI HAR Dataset/data/test.csv', index=False)

NameError: name 'train' is not defined

In [7]:
print("Train  : ", train_X.shape, train_y.shape)
print("Test   : ", test_X.shape, test_y.shape)

Train  :  (7352, 561) (7352,)
Test   :  (2947, 561) (2947,)


In [10]:
train_y

array(['WALKING', 'LAYING', 'WALKING_UPSTAIRS', ..., 'WALKING_UPSTAIRS',
       'LAYING', 'WALKING_UPSTAIRS'], dtype=object)

### Classification:

In [10]:
model1 = train_model(train_X, train_y, model_name='RF', validation=(test_X, test_y))

Validation Accuracy in 'RF' = 0.9239904988123515
[[537   0   0   0   0   0]
 [  0 432  59   0   0   0]
 [  0  46 486   0   0   0]
 [  0   0   0 486  10   0]
 [  0   0   0  25 354  41]
 [  0   0   0  37   6 428]]
Recall in 'RF' = 1.0
Precision in 'RF' = 1.0
F1 Score in 'RF' = 1.0


In [11]:
model2 = train_model(train_X, train_y, model_name='BAG', validation=(test_X, test_y))

Validation Accuracy in 'BAG' = 0.8978622327790974
[[537   0   0   0   0   0]
 [  0 399  92   0   0   0]
 [  0  54 478   0   0   0]
 [  0   0   0 477  14   5]
 [  0   0   0  11 364  45]
 [  0   0   0  69  11 391]]
Recall in 'BAG' = 1.0
Precision in 'BAG' = 1.0
F1 Score in 'BAG' = 1.0


In [12]:
model3 = train_model(train_X, train_y, model_name='ADA', validation=(test_X, test_y))

Validation Accuracy in 'ADA' = 0.5310485239226331
[[537   0   0   0   0   0]
 [  0   0 491   0   0   0]
 [  0   0 532   0   0   0]
 [  0   0   0 496   0   0]
 [  0   0   0 420   0   0]
 [  0   0   0 471   0   0]]
Recall in 'ADA' = 1.0
Precision in 'ADA' = 1.0
F1 Score in 'ADA' = 1.0


In [13]:
model4 = train_model(train_X, train_y, model_name='NB', validation=(test_X, test_y))

Validation Accuracy in 'NB' = 0.7702748557855447
[[323 211   0   0   0   3]
 [  5 368 111   0   0   7]
 [  8  54 455   0   0  15]
 [  0   0   0 416  42  38]
 [  0   0   0  80 257  83]
 [  0   0   0   9  11 451]]
Recall in 'NB' = 0.6048689138576779
Precision in 'NB' = 0.9847560975609756
F1 Score in 'NB' = 0.7494199535962879


In [14]:
model5 = train_model(train_X, train_y, model_name='SVM', validation=(test_X, test_y))

Validation Accuracy in 'SVM' = 0.9504580929759077
[[537   0   0   0   0   0]
 [  0 438  51   0   0   2]
 [  0  29 503   0   0   0]
 [  0   0   0 488   3   5]
 [  0   0   0  10 384  26]
 [  0   0   0  20   0 451]]
Recall in 'SVM' = 1.0
Precision in 'SVM' = 1.0
F1 Score in 'SVM' = 1.0


In [15]:
model6 = train_model(train_X, train_y, model_name='XGB', validation=(test_X, test_y))

Validation Accuracy in 'XGB' = 0.9426535459789617
[[537   0   0   0   0   0]
 [  0 426  63   0   0   2]
 [  0  29 503   0   0   0]
 [  0   0   0 489   3   4]
 [  0   0   0   8 383  29]
 [  0   0   0  25   6 440]]
Recall in 'XGB' = 1.0
Precision in 'XGB' = 1.0
F1 Score in 'XGB' = 1.0


In [16]:
model7 = train_model(train_X, train_y, model_name='KNN', validation=(test_X, test_y))

Validation Accuracy in 'KNN' = 0.9002375296912114
[[534   2   1   0   0   0]
 [  0 388 100   0   0   3]
 [  0  37 495   0   0   0]
 [  0   0   0 484  10   2]
 [  0   0   0  44 331  45]
 [  0   0   0  38  12 421]]
Recall in 'KNN' = 0.996268656716418
Precision in 'KNN' = 1.0
F1 Score in 'KNN' = 0.9981308411214954


In [17]:
model8 = train_model(train_X, train_y, model_name='MLP', validation=(test_X, test_y))

Iteration 1, loss = 1.69583624
Iteration 2, loss = 1.39671824
Iteration 3, loss = 1.11764323
Iteration 4, loss = 0.90482845
Iteration 5, loss = 0.76359330
Iteration 6, loss = 0.66750973
Iteration 7, loss = 0.59702098
Iteration 8, loss = 0.54119112
Iteration 9, loss = 0.49391876
Iteration 10, loss = 0.45298413
Iteration 11, loss = 0.41766312
Iteration 12, loss = 0.38774017
Iteration 13, loss = 0.36121983
Iteration 14, loss = 0.33863531
Iteration 15, loss = 0.31845377
Iteration 16, loss = 0.30080831
Iteration 17, loss = 0.28578848
Iteration 18, loss = 0.27197180
Iteration 19, loss = 0.25981109
Iteration 20, loss = 0.24845548
Iteration 21, loss = 0.23768007
Iteration 22, loss = 0.22811392
Iteration 23, loss = 0.21994205
Iteration 24, loss = 0.21169990
Iteration 25, loss = 0.20279170
Iteration 26, loss = 0.19604339
Iteration 27, loss = 0.18974789
Iteration 28, loss = 0.18461731
Iteration 29, loss = 0.17698515
Iteration 30, loss = 0.17225720
Iteration 31, loss = 0.16519418
Iteration 32, los

Iteration 253, loss = 0.02407212
Iteration 254, loss = 0.02458025
Iteration 255, loss = 0.02677248
Iteration 256, loss = 0.02374630
Iteration 257, loss = 0.02385537
Iteration 258, loss = 0.02375220
Iteration 259, loss = 0.02515390
Iteration 260, loss = 0.02304719
Iteration 261, loss = 0.02293338
Iteration 262, loss = 0.02441084
Iteration 263, loss = 0.02421092
Iteration 264, loss = 0.02394041
Iteration 265, loss = 0.02407972
Iteration 266, loss = 0.02297920
Iteration 267, loss = 0.02396081
Iteration 268, loss = 0.02336860
Iteration 269, loss = 0.02357585
Iteration 270, loss = 0.02339379
Iteration 271, loss = 0.02238554
Iteration 272, loss = 0.02246567
Iteration 273, loss = 0.02367331
Iteration 274, loss = 0.02310994
Iteration 275, loss = 0.02194864
Iteration 276, loss = 0.02207082
Iteration 277, loss = 0.02313145
Iteration 278, loss = 0.02339173
Iteration 279, loss = 0.02301822
Iteration 280, loss = 0.02236818
Iteration 281, loss = 0.02303212
Iteration 282, loss = 0.02371027
Iteration 