# Initialization

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
%cd /gdrive/My Drive/phuc_code_file

# Libraries Import

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np 
import pandas as pd 
from google.colab import widgets
import matplotlib.pyplot as plt
from sklearn import preprocessing 
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from utils import file_helper, feature_extraction

#  Data Pipeline

In [None]:
train_X = np.load('Dataset/split_data/Binary Split/X_train.npy')
train_y = np.load('Dataset/split_data/Binary Split/y_train.npy')
val_X = np.load('Dataset/split_data/Binary Split/X_val.npy')
val_y = np.load('Dataset/split_data/Binary Split/y_val.npy')
test_X = np.load('Dataset/split_data/Binary Split/X_test.npy')
test_y = np.load('Dataset/split_data/Binary Split/y_test.npy')

#Note:
#class       label_encoded
# Human:          1
# Non_human:      0 

In [None]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y)).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((val_X, val_y)).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_X, test_y)).batch(BATCH_SIZE)


# Data Pipeline for OHE

In [None]:
train_X = np.load('Dataset/split_data/Binary_Split_OHE/X_train.npy')
train_y = np.load('Dataset/split_data/Binary_Split_OHE/y_train.npy')
val_X = np.load('Dataset/split_data/Binary_Split_OHE/X_val.npy')
val_y = np.load('Dataset/split_data/Binary_Split_OHE/y_val.npy')
test_X = np.load('Dataset/split_data/Binary_Split_OHE/X_test.npy')
test_y = np.load('Dataset/split_data/Binary_Split_OHE/y_test.npy')

#Note:
#class       label_encoded
# Human:        0  [1,0]
# Non_human:    1  [0,1] 

In [None]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y)).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((val_X, val_y)).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_X, test_y)).batch(BATCH_SIZE)


# Build CNN Model: 

In [None]:
tf.keras.backend.clear_session()
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),

]

model = tf.keras.models.Sequential([
    #1st
    tf.keras.layers.Conv1D(32,kernel_size=3,input_shape = (1,5704), padding = 'same'), 
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    #2nd
    tf.keras.layers.Conv1D(64,3, padding = 'same'), 
    tf.keras.layers.Activation('relu'), 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128,activation = 'relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation = 'sigmoid') #For binary 
    # tf.keras.layers.Dense(2, activation = 'softmax') #For binary OHE 
])




## Optimizer

In [None]:
from tensorflow.keras.optimizers import RMSprop,Adam
optimizer = tf.keras.optimizers.SGD(lr = 1e-7,momentum = 0.9)
model.compile(optimizer = RMSprop(learning_rate = 1e-2), loss = tf.keras.losses.binary_crossentropy, metrics =METRICS)


In [None]:
model.summary()

## Create Checkpoint and callback usage

In [None]:
import os 
from tensorflow.keras.callbacks import EarlyStopping
#Early stop to avoid overfitting
early_stop = EarlyStopping(patience = 5, verbose = 1)

#Provides unique names for checkpoints and adjust the checkpointing frequency
checkpoint_path = "CNN_Binary_OHE/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

#Create a call back that save the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_path,
                                                 save_weights_only = True,
                                                 verbose = 1,
                                                 period = 10)
#Reduce learning rate
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10,verbose = 1, min_lr=1e-9)


# Train Model 

In [None]:
model_train = model.fit(train_dataset,
                        epochs = 100,
                        validation_data = val_dataset,
                        callbacks = [reduce_lr,cp_callback]
                        )#,callbacks = [lr_schedule],callbacks = [early_stop],callbacks = [cp_callback]

In [None]:
model.save("CNN_Binary.h5")

# Plot loss vs epoch and accuracy vs epoch charts

In [None]:
plt.figure(figsize =(15,4))
plt.plot(model_train.epoch, model_train.history["loss"], label="Train")
plt.plot(model_train.epoch, model_train.history["val_loss"], label="Valid")
plt.xlabel("Epoch")
plt.ylabel("loss")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize =(15,4))
plt.plot(model_train.epoch, model_train.history["accuracy"], label="Train")
plt.plot(model_train.epoch, model_train.history["val_accuracy"], label="Valid")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# Test the model

In [None]:
# train_eval = model.predict(train_X, batch_size = 128)
test_eval = model.predict(test_X, batch_size = 64)
model.evaluate(test_X, test_y, batch_size= 64, verbose = 1)

# 183/183 [==============================] - 1s 6ms/step - loss: 0.1065 - accuracy: 0.9687 - precision: 0.9577 - recall: 0.9658 - auc: 0.9920
# [0.10650496929883957,
#  0.9686751365661621,
#  0.9577171206474304,
#  0.965763509273529,
#  0.9919524788856506]

In [None]:
from sklearn.metrics import classification_report
pred = np.round(test_eval, 0)
pred[pred >= 0.5] = 1
pred[pred < 0.5] = 0
label = ["NON_HUMAN","HUMAN"] # For binary 
# label = ["HUMAN","NON_HUMAN"] #For binary OHE
target = ["Class {}".format(i) for i in range(5)]
classification_metrics = classification_report(test_y ,pred, target_names = label)#
# print(test_label)
print(classification_metrics)
#               precision    recall  f1-score   support

#    NON_HUMAN       0.98      0.97      0.97      6923
#        HUMAN       0.96      0.97      0.96      4761

#     accuracy                           0.97     11684
#    macro avg       0.97      0.97      0.97     11684
# weighted avg       0.97      0.97      0.97     11684

In [None]:
from utils.confusion_matrix_pretty_print import plot_confusion_matrix_from_data

columns = ["NON_HUMAN","HUMAN"]
annot = True
cmap = 'Oranges'
fmt = '.2f'
lw = 0.5
cbar = False
show_null_values = 2
pred_val_axis = 'y'
#size::
fz = 12
figsize = [9,9]
if(len(test_y) > 10):
  fz=9; figsize=[14,14]
plot_confusion_matrix_from_data(test_y, pred, columns,
      annot, cmap, fmt, fz, lw, cbar, figsize, show_null_values, pred_val_axis)