In [140]:
import pandas as pd
import numpy as np
import random
from keras.models import Sequential
from keras.layers import LSTM, Conv1D, MaxPooling1D
from keras.layers.core import Dense, Dropout, Flatten, Dropout
from keras.utils import to_categorical
from keras.optimizers import Adam
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Getting generated train and test features

In [2]:
train = pd.read_csv('UCI_HAR_dataset/csv_files/train.csv')
test = pd.read_csv('UCI_HAR_dataset/csv_files/test.csv')
print(train.shape, test.shape)

(7352, 564) (2947, 564)


In [3]:
train.head()

Unnamed: 0,tBodyAccmeanX,tBodyAccmeanY,tBodyAccmeanZ,tBodyAccstdX,tBodyAccstdY,tBodyAccstdZ,tBodyAccmadX,tBodyAccmadY,tBodyAccmadZ,tBodyAccmaxX,...,angletBodyAccMeangravity,angletBodyAccJerkMeangravityMean,angletBodyGyroMeangravityMean,angletBodyGyroJerkMeangravityMean,angleXgravityMean,angleYgravityMean,angleZgravityMean,subject,Activity,ActivityName
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,5,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,5,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,5,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,5,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,5,STANDING


In [25]:
# get X_train and y_train from csv files
X_train = train.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_train = train.Activity

# get X_test and y_test from test csv file
X_test = test.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_test = test.Activity

In [26]:
print('X_train and y_train : ({},{})'.format(X_train.shape, y_train.shape))
print('X_test  and y_test  : ({},{})'.format(X_test.shape, y_test.shape))

X_train and y_train : ((7352, 561),(7352,))
X_test  and y_test  : ((2947, 561),(2947,))


In [27]:
X_train.head()

Unnamed: 0,tBodyAccmeanX,tBodyAccmeanY,tBodyAccmeanZ,tBodyAccstdX,tBodyAccstdY,tBodyAccstdZ,tBodyAccmadX,tBodyAccmadY,tBodyAccmadZ,tBodyAccmaxX,...,fBodyBodyGyroJerkMagmeanFreq,fBodyBodyGyroJerkMagskewness,fBodyBodyGyroJerkMagkurtosis,angletBodyAccMeangravity,angletBodyAccJerkMeangravityMean,angletBodyGyroMeangravityMean,angletBodyGyroJerkMeangravityMean,angleXgravityMean,angleYgravityMean,angleZgravityMean
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.074323,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.158075,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,0.414503,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,0.404573,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.087753,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892


# Training Second Stage Classifiers

## Preparing data 

### Preparing train data for staitic HAR Model

In [42]:
activity_4_index = np.where(y_train == 4)[0]
activity_5_index = np.where(y_train == 5)[0]
activity_6_index = np.where(y_train == 6)[0]

In [44]:
static_activity_index = np.concatenate([activity_4_index, activity_5_index, activity_6_index])

In [53]:
# Shuffling static data index
rand_var = random.random()
random.shuffle(static_activity_index, lambda: rand_var)

In [54]:
#getting X_train associated with static activites
# static_activity_index = list(static_activity_index)
X_train_static = X_train.loc[static_activity_index]
y_train_static = y_train.loc[static_activity_index]

In [83]:
#converting y_train from (4,5,6) to (0,1,2)
y_train_static = y_train_static - 4

### Preparing test data for staitic HAR Model

In [57]:
activity_4_index = np.where(y_test == 4)[0]
activity_5_index = np.where(y_test == 5)[0]
activity_6_index = np.where(y_test == 6)[0]

In [58]:
static_activity_index = np.concatenate([activity_4_index, activity_5_index, activity_6_index])

In [59]:
# Shuffling static data index
rand_var = random.random()
random.shuffle(static_activity_index, lambda: rand_var)

In [60]:
#getting X_train associated with static activites
X_test_static = X_test.loc[static_activity_index]
y_test_static = y_test.loc[static_activity_index]

In [84]:
#converting y_test from (4,5,6) to (0,1,2)
y_test_static = y_test_static - 4

### Preparing train data for dynamic HAR Model

In [75]:
activity_1_index = np.where(y_train == 1)[0]
activity_2_index = np.where(y_train == 2)[0]
activity_3_index = np.where(y_train == 3)[0]

In [76]:
dynamic_activity_index = np.concatenate([activity_1_index, activity_2_index, activity_3_index])

In [77]:
# Shuffling static data index
rand_var = random.random()
random.shuffle(dynamic_activity_index, lambda: rand_var)

In [78]:
#getting X_train associated with static activites
X_train_dynamic = X_train.loc[dynamic_activity_index]
y_train_dynamic = y_train.loc[dynamic_activity_index]

In [85]:
#converting y_train from (1,2,3) to (0,1,2)
y_train_dynamic = y_train_dynamic - 1

### Preparing test data for dynamic HAR Model

In [67]:
activity_1_index = np.where(y_test == 1)[0]
activity_2_index = np.where(y_test == 2)[0]
activity_3_index = np.where(y_test == 3)[0]

In [68]:
dynamic_activity_index = np.concatenate([activity_1_index, activity_2_index, activity_3_index])

In [69]:
# Shuffling static data index
rand_var = random.random()
random.shuffle(dynamic_activity_index, lambda: rand_var)

In [74]:
#getting X_train associated with static activites
X_test_dynamic = X_test.loc[dynamic_activity_index]
y_test_dynamic = y_test.loc[dynamic_activity_index]

In [86]:
#converting y_test from (1,2,3) to (0,1,2)
y_test_dynamic = y_test_dynamic - 1

In [81]:
print(f'Shape of data for dynamic HAR: X_train --> {X_train_dynamic.shape}, y_train --> {y_train_dynamic.shape}, X_test --> {X_test_dynamic.shape}, y_test --> {y_test_dynamic.shape}')

Shape of data for dynamic HAR: X_train --> (3285, 561), y_train --> (3285,), X_test --> (1387, 561), y_test --> (1387,)


In [82]:
print(f'Shape of data for static HAR: X_train --> {X_train_static.shape}, y_train --> {y_train_static.shape}, X_test --> {X_test_static.shape}, y_test --> {y_test_static.shape}')

Shape of data for static HAR: X_train --> (4067, 561), y_train --> (4067,), X_test --> (1560, 561), y_test --> (1560,)


# Training Model for static HAR data

In [155]:
no_class = 3
y_train_static_ohe = to_categorical(y_train_static, no_class)
y_test_static_ohe = to_categorical(y_test_static, no_class)

In [156]:
model = Sequential()
model.add(Conv1D(30, 3, input_shape=(X_train_static.shape[1], 1), activation='relu'))
model.add(Conv1D(50, 3, activation='relu'))
model.add(Conv1D(100, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(3, activation='softmax'))
model.add(Dropout(0.50))

adam = Adam(lr=0.0001)
model.compile(loss='mean_squared_error', optimizer=adam, metrics=['accuracy'])

In [157]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_15 (Conv1D)           (None, 559, 30)           120       
_________________________________________________________________
conv1d_16 (Conv1D)           (None, 557, 50)           4550      
_________________________________________________________________
conv1d_17 (Conv1D)           (None, 555, 100)          15100     
_________________________________________________________________
flatten_5 (Flatten)          (None, 55500)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 166503    
_________________________________________________________________
dropout_5 (Dropout)          (None, 3)                 0         
Total params: 186,273
Trainable params: 186,273
Non-trainable params: 0
________________________________________________

In [158]:
model.fit(np.expand_dims(X_train_static, axis=2), y_train_static_ohe,
          batch_size=32, epochs=100, verbose=2, validation_split=0.2)

Train on 3253 samples, validate on 814 samples
Epoch 1/100
 - 12s - loss: 0.3119 - acc: 0.4402 - val_loss: 0.1304 - val_acc: 0.9521
Epoch 2/100
 - 11s - loss: 0.2589 - acc: 0.5192 - val_loss: 0.1008 - val_acc: 0.9201
Epoch 3/100
 - 11s - loss: 0.2491 - acc: 0.5278 - val_loss: 0.0914 - val_acc: 0.9423
Epoch 4/100
 - 11s - loss: 0.2397 - acc: 0.5472 - val_loss: 0.0736 - val_acc: 0.9595
Epoch 5/100
 - 10s - loss: 0.2434 - acc: 0.5232 - val_loss: 0.0685 - val_acc: 0.9619
Epoch 6/100
 - 10s - loss: 0.2340 - acc: 0.5487 - val_loss: 0.0715 - val_acc: 0.9533
Epoch 7/100
 - 11s - loss: 0.2356 - acc: 0.5417 - val_loss: 0.0790 - val_acc: 0.9582
Epoch 8/100
 - 11s - loss: 0.2339 - acc: 0.5457 - val_loss: 0.0675 - val_acc: 0.9693
Epoch 9/100
 - 11s - loss: 0.2365 - acc: 0.5413 - val_loss: 0.0729 - val_acc: 0.9619
Epoch 10/100
 - 11s - loss: 0.2332 - acc: 0.5481 - val_loss: 0.0704 - val_acc: 0.9545
Epoch 11/100
 - 11s - loss: 0.2350 - acc: 0.5453 - val_loss: 0.0711 - val_acc: 0.9595
Epoch 12/100
 - 

Epoch 96/100
 - 10s - loss: 0.2254 - acc: 0.5478 - val_loss: 0.0707 - val_acc: 0.9558
Epoch 97/100
 - 10s - loss: 0.2254 - acc: 0.5546 - val_loss: 0.0748 - val_acc: 0.9570
Epoch 98/100
 - 11s - loss: 0.2297 - acc: 0.5417 - val_loss: 0.0709 - val_acc: 0.9570
Epoch 99/100
 - 10s - loss: 0.2219 - acc: 0.5712 - val_loss: 0.0736 - val_acc: 0.9619
Epoch 100/100
 - 10s - loss: 0.2250 - acc: 0.5546 - val_loss: 0.0767 - val_acc: 0.9570


<keras.callbacks.History at 0x2034ab64160>

In [159]:
score = model.evaluate(np.expand_dims(X_test_static, axis=2), y_test_static_ohe)



In [160]:
score

[0.07686070127364916, 0.958974358974359]

In [161]:
#saving static model
model.save("trained_models/static_hra_model.h5")

# Training Model for Dynamic HAR data

In [162]:
no_class = 3
y_train_dynamic_ohe = to_categorical(y_train_dynamic, no_class)
y_test_dynamic_ohe = to_categorical(y_test_dynamic, no_class)

In [163]:
model = Sequential()
model.add(Conv1D(100, 3, input_shape=(X_train_dynamic.shape[1], 1), activation='relu'))
model.add(MaxPooling1D(3))
model.add(Flatten())
model.add(Dense(3, activation='softmax'))
model.add(Dropout(0.5))

adam = Adam(lr=0.0004)
model.compile(loss='mean_squared_error', optimizer=adam, metrics=['accuracy'])

In [164]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_18 (Conv1D)           (None, 559, 100)          400       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 186, 100)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 18600)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 55803     
_________________________________________________________________
dropout_6 (Dropout)          (None, 3)                 0         
Total params: 56,203
Trainable params: 56,203
Non-trainable params: 0
_________________________________________________________________


In [165]:
model.fit(
    np.expand_dims(X_train_dynamic, axis=2), 
    y_train_dynamic_ohe,
    batch_size=32, 
    epochs=50, 
    verbose=2, 
    validation_split=0.2
)

Train on 2628 samples, validate on 657 samples
Epoch 1/50
 - 3s - loss: 0.2695 - acc: 0.5156 - val_loss: 0.1578 - val_acc: 0.7610
Epoch 2/50
 - 3s - loss: 0.2515 - acc: 0.5342 - val_loss: 0.0997 - val_acc: 0.9559
Epoch 3/50
 - 3s - loss: 0.2376 - acc: 0.5537 - val_loss: 0.1129 - val_acc: 0.9269
Epoch 4/50
 - 3s - loss: 0.2386 - acc: 0.5495 - val_loss: 0.0843 - val_acc: 0.9574
Epoch 5/50
 - 3s - loss: 0.2366 - acc: 0.5544 - val_loss: 0.0922 - val_acc: 0.9665
Epoch 6/50
 - 3s - loss: 0.2364 - acc: 0.5460 - val_loss: 0.0879 - val_acc: 0.9787
Epoch 7/50
 - 3s - loss: 0.2295 - acc: 0.5681 - val_loss: 0.0749 - val_acc: 0.9924
Epoch 8/50
 - 3s - loss: 0.2321 - acc: 0.5563 - val_loss: 0.0793 - val_acc: 0.9878
Epoch 9/50
 - 3s - loss: 0.2288 - acc: 0.5677 - val_loss: 0.0800 - val_acc: 0.9802
Epoch 10/50
 - 3s - loss: 0.2306 - acc: 0.5559 - val_loss: 0.1056 - val_acc: 0.9072
Epoch 11/50
 - 3s - loss: 0.2289 - acc: 0.5681 - val_loss: 0.0805 - val_acc: 0.9848
Epoch 12/50
 - 3s - loss: 0.2280 - acc

<keras.callbacks.History at 0x2034b344fd0>

In [166]:
score = model.evaluate(np.expand_dims(X_test_dynamic, axis=2), y_test_dynamic_ohe)



In [167]:
score

[0.08834828513190311, 0.9689978370583994]

In [169]:
#saving dynamic model
model.save("trained_models/dynamic_hra_model.h5")

# Training first stage classifier

- it is a binary classifier which predict static(laying, sitting, standing) or dynamic(walking, up stairs, down stairs) activity
- 0: dynamic, 1: static

## Preparing data

In [131]:
def return_y(arr):
    y_to_return = []
    for y in arr:
        if y < 4:
            y_to_return.append(0)
        else:
            y_to_return.append(1)
    return y_to_return

In [132]:
binary_y_train = return_y(y_train)
binary_y_test = return_y(y_test)

In [None]:
max_depth = [1, 5, 10, 50, 100, 500];
min_samples_split = [5, 10, 100, 500];

In [137]:
from sklearn.tree import DecisionTreeClassifier
parameters = {
    'max_depth': [2, 3, 5, 7, 10, 50, 100],
    'min_samples_split': [5, 10, 100, 500]     
}
dt = DecisionTreeClassifier(class_weight="balanced")
dt_grid = GridSearchCV(dt,param_grid=parameters, n_jobs=-1)
dt_grid.fit(X_train, binary_y_train)
# dt_grid_results = perform_model(dt_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# print_grid_search_attributes(dt_grid_results['model'])



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=DecisionTreeClassifier(class_weight='balanced',
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort=False, random_state=None,
                                              splitter='best'),
             iid='warn', n_jobs=-1,
             param_grid={'max_depth': [2, 3, 5, 7, 10, 50, 100],
                         'min_samples_split': [5, 10, 100, 500

In [138]:
dt_grid.best_params_

{'max_depth': 7, 'min_samples_split': 10}

In [139]:
dt = DecisionTreeClassifier(max_depth=7, min_samples_split=10, class_weight="balanced")
dt.fit(X_train, binary_y_train)

DecisionTreeClassifier(class_weight='balanced', criterion='gini', max_depth=7,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [141]:
y_pred = dt.predict(X_test)
accuracy = accuracy_score(y_true=binary_y_test, y_pred=y_pred)

## saving first stage classifier

In [151]:
import pickle

filename = 'trained_models/first_stage_model.sav'
pickle.dump(dt, open(filename, 'wb'))