<a href="https://colab.research.google.com/github/khataei/PE-classification-DeepLearning/blob/master/Tunned-Talos-1-CNN-activity-classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Talos Tuner for CNN Activity Classifier

In this notebook, we use SKlearn  to tune a CNN neural net to classify PE activity.

#### Load dependencies

In [None]:
import os  
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

import tensorflow
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.layers import AveragePooling1D, LeakyReLU , MaxPool1D, GlobalAveragePooling1D
from tensorflow.keras.callbacks import ModelCheckpoint 
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

from sklearn.metrics import roc_auc_score, roc_curve 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())
import tensorflow as tf
print("# GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tensorflow.__version__

#### Set hyperparameters

In [None]:
# output directory name:
output_dir = 'model_output/cnn2'
input_dir =  'Z:/Research/dfuller/Walkabilly/studies/smarphone_accel/data/Ethica_Jaeger_Merged/pocket/'
input_file_name = 'pocket-NN-data.npz'

# from the data preparation section we have:
window_size_second = 3
frequency = 30
lenght_of_each_seq = window_size_second * frequency


In [None]:
# sklearn hyperparams
params = {
    'n_conv_1':[256, 512, 768], # filters, a.k.a. kernels
    'k_conv_1':[3, 5], # kernel length
    'n_conv_2':[256, 512, 768], # filters, a.k.a. kernels
    'k_conv_2':[3, 5], # kernel length
    'n_conv_3':[256, 512, 768], # filters, a.k.a. kernels
    'k_conv_3':[3, 5], # kernel length
    'maxpooling_pool_size':[2, 4],
    'avepooling_pool_size':[2, 4],
    'n_dense_1':[256, 512],
    'dropout_1':[0.2, 0.4],
    'n_dense_2':[256, 512],
    'dropout_2':[0.2, 0.4],
    'activation':['elu', 'relu']
}

# training:
n_tune_iter = 20
cv = 4
epochs = 30
batch_size = 256


#### Load data

##### For this notebook we use the acceleration data gathered from the pocket location. It was prepared in the DataPrep-Deep notebook

In [None]:
# read the raw file and get the keys:
raw_data = np.load(file=input_dir+input_file_name,allow_pickle=True)
for k in raw_data.keys():
    print(k)

In [None]:
# import the data

accel_array = raw_data['acceleration_data']
meta_array = raw_data['metadata']
labels_array = raw_data['labels']
input_shape = list(accel_array.shape)


#### Preprocess data

#### Convert the  labels to integer.
In the raw data format of the labels is String and there are 6 classes. 'Lying', 'Sitting', 'Self Pace walk', 'Running 3 METs',
       'Running 5 METs', 'Running 7 METs' <br>




In [None]:
n_class = len(np.unique(labels_array))
class_list, labels_array_int = np.unique(labels_array,return_inverse=True)

In [None]:
y = to_categorical(labels_array_int, num_classes=n_class)


### Splitting and shuffeling the data

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(
     accel_array, y, test_size=0.1, random_state=65)


#### Design neural network architecture

In [None]:
params

In [None]:
def create_model(n_conv_1=256, k_conv_1=3, n_conv_2=256, k_conv_2=3, n_conv_3=256, k_conv_3=3,
                 maxpooling_pool_size = 2, avepooling_pool_size = 2, n_dense_1=256, dropout_1=0.2,
                 n_dense_2=256, dropout_2=0.2, activation= 'elu'
                ):
    model = Sequential()
    model.add(Conv1D(n_conv_1, k_conv_1, activation=activation, input_shape=input_shape[1:]))
    model.add(MaxPool1D(pool_size = maxpooling_pool_size))
    model.add(Conv1D(n_conv_2, k_conv_2, activation=activation))
    model.add(AveragePooling1D(pool_size = avepooling_pool_size))
    model.add(Conv1D(n_conv_3, k_conv_3, activation=activation))
    # model.add(GlobalMaxPooling1D())
    model.add(GlobalAveragePooling1D())
    model.add(Dense(n_dense_1, activation=activation))
    model.add(Dropout(dropout_1))
    model.add(Dense(n_dense_2, activation=activation))
    model.add(Dropout(dropout_2))
    model.add(Dense(n_class, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])

    return model

In [None]:
model_default = create_model()
model_default.summary()

In [None]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch_size)

In [None]:
%%time
rscv = RandomizedSearchCV(model, param_distributions=params, cv=cv, n_iter=n_tune_iter)
rscv_results = rscv.fit(X_train,y_train)

In [None]:
print('Best score is: {} using {}'.format(rscv_results.best_score_,
rscv_results.best_params_))

In [None]:
best_model = rscv_results.best_estimator_

### Test the best model based on the validation data

In [None]:
y_hat = best_model.predict(X_valid)

In [None]:
y_hat = to_categorical(y_hat)
y_hat[0]

In [None]:
y_valid[0]

In [None]:
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')

In [None]:
from sklearn.metrics import  accuracy_score as score
y_pred_classes = y_hat.round()
print(y_valid[0], y_hat[0], y_pred_classes[0])
acc = score(y_valid, y_pred_classes) * 100
acc

In [None]:
pct_auc = roc_auc_score(y_valid, y_hat)*100.0

In [None]:
"{:0.2f}".format(pct_auc)

In [None]:
float_y_hat = []
for y in y_hat:
    float_y_hat.append(y[0:6].round(3))

In [None]:
ydf = pd.DataFrame(list(zip(float_y_hat, y_valid)), columns=['y_hat', 'y'])

In [None]:
ydf.head(10)

In [None]:
results_df = pd.DataFrame(rscv_results.cv_results_['params'])
results_df['mean'] = rscv_results.cv_results_['mean_test_score']
results_df['std'] = rscv_results.cv_results_['std_test_score']
results_df.sort_values('mean', ascending=False, ignore_index=False)