<a href="https://colab.research.google.com/github/jonkrohn/DLTFpT/blob/master/notebooks/dense_sentiment_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dense Activity Classifier

In this notebook, we build a dense neural net to classify PE activity.

#### Load dependencies

In [43]:
import tensorflow
from tensorflow.keras.datasets import imdb # new!  # delete later
from tensorflow.keras.preprocessing.sequence import pad_sequences #new!
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.layers import Embedding # new!
from tensorflow.keras.callbacks import ModelCheckpoint # new! 
import os # new! 
from sklearn.metrics import roc_auc_score, roc_curve # new!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # new!

#### Set hyperparameters

In [None]:
# from sklearn.preprocessing import OneHotEncoder
# encoder = OneHotEncoder()
# encoder.fit(label_array)
# encoder.categories_

# label_array.shape

In [41]:
# output directory name:
output_dir = 'model_output/dense'
input_dir =  'Z:/Research/dfuller/Walkabilly/studies/smarphone_accel/data/Ethica_Jaeger_Merged/pocket/'
input_file_name = 'pocket-NN-data.npz'

# from the data preparation section we have:
window_size_second = 3
frequency = 30
lenght_of_each_seq = window_size_second * frequency




# convolutional layer architecture:
n_conv = 256 # filters, a.k.a. kernels
k_conv = 3 # kernel length

# dense layer architecture: 
n_dense = 256
dropout = 0.2

# training:
epochs = 4
batch_size = 128

# vector-space embedding: 
n_dim = 64
n_unique_words = 5000 # as per Maas et al. (2011); may not be optimal
n_words_to_skip = 50 # ditto
max_review_length = 100
pad_type = trunc_type = 'pre'

# neural network architecture: 
n_dense = 64
dropout = 0.5

#### Load data

##### For this notebook we use the acceleration data gathered from the pocket location. It was prepared in the DataPrep-Deep notebook

In [29]:
# read the raw file and get the keys:
raw_data = np.load(file=input_dir+input_file_name,allow_pickle=True)
for k in raw_data.keys():
    print(k)

acceleration_data
metadata
labels


In [30]:
# import the data

accel_array = raw_data['acceleration_data']
meta_array = raw_data['metadata']
labels_Array = raw_data['labels']


#### Preprocess data

In [56]:
# x_train = pad_sequences(x_train, maxlen=max_review_length, 
#                         padding=pad_type, truncating=trunc_type, value=0)
# x_valid = pad_sequences(x_valid, maxlen=max_review_length, 
#                         padding=pad_type, truncating=trunc_type, value=0)

input_shape = list(accel_array.shape)
# input_shape=input_shape[1:]
input_shape

[70392, 90, 3]

#### Design neural network architecture

In [33]:
# model = Sequential()
# # model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))
# model.add(Flatten())
# model.add(Dense(n_dense, activation='relu'))
# model.add(Dropout(dropout))
# # model.add(Dense(n_dense, activation='relu'))
# # model.add(Dropout(dropout))
# model.add(Dense(1, activation='sigmoid')) # mathematically equivalent to softmax with two classes

In [58]:
# a conv model!

model = Sequential()
model.add(Conv1D(n_conv, k_conv, activation='relu', input_shape=input_shape[1:]))
# model.add(Conv1D(n_conv, k_conv, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(n_dense, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(1, activation='softmax'))

In [59]:
model.summary() # so many parameters!

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 88, 256)           2560      
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)                16448     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
Total params: 19,073
Trainable params: 19,073
Non-trainable params: 0
_________________________________________________________________


# Continue from here:

prepare the test train data by shuffeling and then

</br>
change the loss function and ...

In [None]:
# ...flatten:
max_review_length, n_dim, n_dim*max_review_length

In [None]:
# ...dense:
n_dense, n_dim*max_review_length*n_dense + n_dense # weights + biases

In [None]:
# ...and output:
n_dense + 1 

#### Configure model

In [None]:
model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])

In [None]:
modelcheckpoint = ModelCheckpoint(filepath=output_dir+
                                  "/weights.{epoch:02d}.hdf5")

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

#### Train!

In [None]:
model.fit(x_train, y_train, 
         batch_size=batch_size, epochs=epochs, verbose=1, 
         validation_data=(x_valid, y_valid), 
         callbacks=[modelcheckpoint])

# model.fit(x_train, y_train, 
#           batch_size=batch_size, epochs=epochs, verbose=1, 
#           validation_data=(x_valid, y_valid))

#### Evaluate

In [None]:
model.load_weights(output_dir+"/weights.02.hdf5") # NOT zero-indexed

In [None]:
y_hat = model.predict_proba(x_valid)

In [None]:
len(y_hat)

In [None]:
y_hat[0]

In [None]:
y_valid[0]

In [None]:
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')

In [None]:
pct_auc = roc_auc_score(y_valid, y_hat)*100.0

In [None]:
"{:0.2f}".format(pct_auc)

In [None]:
float_y_hat = []
for y in y_hat:
    float_y_hat.append(y[0])

In [None]:
ydf = pd.DataFrame(list(zip(float_y_hat, y_valid)), columns=['y_hat', 'y'])

In [None]:
ydf.head(10)

In [None]:
' '.join(index_word[id] for id in all_x_valid[0])

In [None]:
' '.join(index_word[id] for id in all_x_valid[6]) 

In [None]:
ydf[(ydf.y == 0) & (ydf.y_hat > 0.9)].head(10)

In [None]:
' '.join(index_word[id] for id in all_x_valid[386]) 

In [None]:
ydf[(ydf.y == 1) & (ydf.y_hat < 0.1)].head(10)

In [None]:
' '.join(index_word[id] for id in all_x_valid[224]) 