# Smartphone motion detection
# Precision ~ 98%
# Using simple Sequential NN

#### This simple neural network was trained on the public dataset made by UCI ML:

https://www.kaggle.com/uciml/human-activity-recognition-with-smartphones 

#### Brief description from the data provider:

"The Human Activity Recognition database was built from the recordings of 30 study participants performing activities of daily living (ADL) while carrying a waist-mounted smartphone with embedded inertial sensors. The objective is to classify activities into one of the six activities performed.

The experiments have been carried out with a group of **30 volunteers within an age bracket of 19-48 years. Each person performed six activities (WALKING, WALKINGUPSTAIRS, WALKINGDOWNSTAIRS, SITTING, STANDING, LAYING) wearing a smartphone (Samsung Galaxy S II) on the waist.** Using its embedded accelerometer and gyroscope, we captured 3-axial linear acceleration and 3-axial angular velocity at a constant rate of 50Hz. The experiments have been video-recorded to label the data manually. The obtained dataset has been randomly partitioned into two sets, where 70% of the volunteers was selected for generating the training data and 30% the test data."

Neural network architecture:

 - 70% of data (train set) is split into train and test with 0.2 ratio
 - 30% of data (test set) is loaded as **validation set**
 - X sample has shape [1, 562], Standard-scaled
 - y is 563rd One Hot Encoded column, 
 - Keras start model is built and compiled with 1 layer and 6 neurons, then:
     - Wrapped in scikit-learn KerasClassifier wrapper
     - Cross-validated with Randomized Seach CV grid (15 min run on regular laptop)
 - Resulting model is saved in repository

In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np

## Import and preprocess data

In [None]:
df = pd.read_csv("../input/human-activity-recognition-with-smartphones/train.csv")
df.shape

In [None]:
df.head(10)

In [None]:
# Shuffle original data to get rid of possible patterns

from sklearn.utils import shuffle
df = shuffle(df)
df.reset_index(inplace=True)
df.drop(columns='index', axis=1, inplace=True)

In [None]:
# Define X and y

X = df.iloc[:, 0:562]
y = df.iloc[:, -1:]

In [None]:
# Scale x

from sklearn.preprocessing import OneHotEncoder, StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)
X.shape

In [None]:
# Encode y

ohe = OneHotEncoder()
y_encoded = ohe.fit_transform(y)
y_encoded.shape

## Split to train / test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# Train/test size is 0.8 / 0.2

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2)
print("Train data shape: ", X_train.shape, y_train.shape, "\nTest data shape: ", X_test.shape, y_test.shape)

In [None]:
# Check one hot encoded labels

ohe.categories_

## Validation set

In [None]:
# Load validation set

df2 = df = pd.read_csv("../input/human-activity-recognition-with-smartphones/train.csv")
df2.shape

In [None]:
# Create X_val, y_val; shuffle, scale, encode

df2 = shuffle(df2)
X_val = df2.iloc[:, 0:562]
y_val = df2.iloc[:, -1:]

X_val = scaler.fit_transform(X_val)
y_val_encoded = ohe.fit_transform(y_val)

## Keras Model

### Callbacks

In [None]:
# Early Stopping callback

early_stopping_callback = keras.callbacks.EarlyStopping(patience=15)

In [None]:
# Tensorflow callback

import os
root_logdir = os.path.join(os.curdir, "my_logs")

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)
run_logdir = get_run_logdir()

tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)


### Training and tuning

#### Randomized GridSearchCV using sklearn KerasClassifier wrapper

In [None]:
from keras.wrappers import scikit_learn
from sklearn.model_selection import RandomizedSearchCV

# Function

def build_model(n_layers=1, n_neurons=6, activation='relu', optimizer='Nadam'):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[562])),    # always specify "input_shape"
    for n in range(n_layers):
        model.add(keras.layers.Dense(n_neurons, kernel_initializer='he_normal', activation=activation)),

    model.add(keras.layers.Dense(6, kernel_initializer='he_normal', activation="softmax"))

    model.compile(loss="categorical_crossentropy", metrics="accuracy", optimizer=optimizer)
    
    return model

In [None]:
# These parameters are already pre-selected based on previous cross validations

params = {"n_layers": [1,2,3,],
         "n_neurons": [17,19],
         }

In [None]:
# Wrap function in scikit-learn

keras_clf = keras.wrappers.scikit_learn.KerasClassifier(build_model)

In [None]:
# Feeding grid of parameters to wrapper

rnd_clf = RandomizedSearchCV(keras_clf, param_distributions=params)

In [None]:
# Cross validating classifier - long run

rnd_clf.fit(X_train, y_train.toarray(), epochs=100,
            validation_data=(X_test, y_test.toarray()), 
            callbacks=[early_stopping_callback,  tensorboard_cb],)

In [None]:
# Get best parameters
rnd_clf.best_params_

In [None]:
# Check best score
rnd_clf.best_score_

## Visualize training graphs

In [None]:
# Optional
# %load_ext tensorboard
# %tensorboard --logdir=./my_logs --port=6012

## Saving the model

In [None]:
# Create new model with best_params

model_saved = build_model(n_neurons=rnd_clf.best_params_['n_neurons'], 
                     n_layers=rnd_clf.best_params_['n_layers'],
                     activation='relu',
                     optimizer='Nadam')

In [None]:
# Train model

model_saved.fit(X_train, y_train.toarray(), epochs=400, callbacks=tensorboard_cb)

In [None]:
# Intermediate evaluation:

model_saved.evaluate(X_test, y_test.toarray(), verbose=1)

In [None]:
# Save model

config = model_saved.get_config()
keras.models.save_model(model=model_saved,
                        filepath='Motion_detector.h5')

In [None]:
# Restore model:

model_f = keras.models.Sequential.from_config(config)
model_f.compile(loss="categorical_crossentropy", metrics="accuracy", optimizer='Nadam')

In [None]:
model_f.load_weights('Motion_detector.h5')
# model_f.set_weights()
model_f.summary()

In [None]:
# Verify correct compiling model:
np.testing.assert_allclose(
    model_saved.predict(X_test), model_f.predict(X_test))

In [None]:
# Restored model evaluation:
model_f.evaluate(X_val, y_val_encoded.toarray(), 
                   verbose=1, 
                   callbacks=[early_stopping_callback, tensorboard_cb])

In [None]:
# See how many predictions of "WALKING" in the first 1000 rows

prediction = model_f.predict(X_val)
res = list(ohe.inverse_transform(prediction)[0:1000])
res.count('WALKING')

In [None]:
# See how many true 'WALKING' label are in the first 100o rows

list(ohe.inverse_transform(y_val_encoded[0:1000])).count('WALKING')

### The model can be restored and provide about 98% accuracy, or can be used for transfer learning 