# Artificial Neural Networks with the help of Tensor Flow

In [1]:
from IPython.core.display import display, HTML
display(HTML("""<a href="https://www.tensorflow.org/tutorials/keras/classification">Tensor Flow Documentation</a>"""))

In [None]:
# Tensorflow ANN
# https://www.tensorflow.org/tutorials/keras/classification

from keras import backend as K

# Defing F1 function as new documentation doesn't have function to predict F1-Matric.
# Calculate Recall
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
    
# Calculating Precision for F1-Score
    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    
# Calculating F1 by putting recall and precision in the formulae
# Epsilon is an arbitrary smaller value used to avoid devide by 0 error.
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# Loading data from test and train files from google drive
from joblib import load
X_tr=load("/content/drive/My Drive/Data set/X_tr.joblib")
X_test=load("/content/drive/My Drive/Data set/X_test.joblib")
y_tr=load("/content/drive/My Drive/Data set/y_tr.joblib")
y_test=load("/content/drive/My Drive/Data set/y_test.joblib")

In [None]:
print(X_tr.shape,y_tr.shape,X_test.shape,y_test.shape)

(125973, 122) (125973,) (22544, 122) (22544,)


In [None]:
!pip install optuna



In [None]:
from keras.utils import np_utils

# converting class labels to one hot encoded features
y_tr=np_utils.to_categorical(y_tr)
y_test=np_utils.to_categorical(y_test)
print(y_tr.shape,y_test.shape)

# Getting the input and output from the data set.
input_dimensions=X_tr.shape[1]
output_dimension=y_tr.shape[1]

(125973, 5) (22544, 5)


In [6]:
from IPython.core.display import display, HTML
display(HTML("""<a href="https://optuna.org/#code_examples">Optuna</a>"""))

In [None]:
# Optuna citation - https://optuna.org/#code_examples
import keras
import tensorflow as tf
from tensorflow.keras.layers import Activation, Dropout, Dense, BatchNormalization
import optuna

# Defining an objective function which needs to be maximized.
def objective(trial):

    # Hard coded values (not changing with respect to code)
    batch_size=128
    epochs=15
    batch_normalization_after_layer=2

    # define search space; number of hidden layers, dropout rate, activation function.
    num_hidden_layers=trial.suggest_int("number_of_hidden_layers",1,10)
    drop_out_rate=trial.suggest_uniform("dr_rate",0,1)
    activation_list=["tanh","sigmoid","swish","relu"]
    activation_type=trial.suggest_categorical("activation_type",activation_list)
    print(num_hidden_layers,drop_out_rate,activation_type)

    # Initializing the model
    model=tf.keras.Sequential()

      # Here, 1 is the hidden layer. So, we are starting from the 1st hidden layer
      # Because 0th layer is the input of dimensions i.e. number of columns
    for layer in range(1,num_hidden_layers+1):
        # We are going to add number of hidden units.
        num_hidden_units=trial.suggest_int("num_hidden_units_{}".format(layer),2,96)
        if(layer==1):
          model.add(Dense(num_hidden_units,activation=activation_type,input_shape=(input_dimensions,)))
        else:
          model.add(Dense(num_hidden_units,activation=activation_type))
        # Adding dropout for randomly discarding neurons
        model.add(Dropout(drop_out_rate))
        # Batch normalization after 3rd layer
        if(layer%batch_normalization_after_layer+1==0):
          model.add(BatchNormalization())

    # For out put we use SOFTMAX
    # Softmax by default which provides probabilities
    # Softmax function inbuilt in tensorflow
    model.add(Dense(output_dimension,activation="softmax"))
    
    # Compilation of the model
    # Choosing optimiser "adam" whcih is better than "Gradient Descent".
    model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=[f1,"accuracy"])
    
    # Fitting the model
    # Returning from F1-Score
    model.fit(X_tr,y_tr,batch_size=batch_size,epochs=epochs,validation_data=(X_test,y_test))
    results=model.evaluate(X_test,y_test)
    print("="*20)
    print("this is my loss:{}".format(results[0]))
    print("this is my accuracy:{}".format(results[1]))
    print("="*20)

    return results[2]

# Creating a study object and optimizing the objective function.
study = optuna.create_study(direction='maximize',study_name="NN_tuning",storage="sqlite:///nn_net.db",load_if_exists=True)
# n_trials means number of trials
study.optimize(objective, n_trials=100)

[I 2020-08-03 01:26:25,715] Using an existing study with name 'NN_tuning' instead of creating a new one.


9 0.032642182568550754 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.5316128730773926
this is my accuracy:0.7338550090789795


[I 2020-08-03 01:28:28,735] Trial 203 finished with value: 0.7334989309310913 and parameters: {'number_of_hidden_layers': 9, 'dr_rate': 0.032642182568550754, 'activation_type': 'tanh', 'num_hidden_units_1': 20, 'num_hidden_units_2': 15, 'num_hidden_units_3': 62, 'num_hidden_units_4': 67, 'num_hidden_units_5': 12, 'num_hidden_units_6': 9, 'num_hidden_units_7': 50, 'num_hidden_units_8': 78, 'num_hidden_units_9': 82}. Best is trial 41 with value: 0.7686746120452881.


8 0.1646254382716193 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.2745413780212402
this is my accuracy:0.7388160824775696


[I 2020-08-03 01:30:27,215] Trial 204 finished with value: 0.7385113835334778 and parameters: {'number_of_hidden_layers': 8, 'dr_rate': 0.1646254382716193, 'activation_type': 'tanh', 'num_hidden_units_1': 27, 'num_hidden_units_2': 11, 'num_hidden_units_3': 59, 'num_hidden_units_4': 64, 'num_hidden_units_5': 42, 'num_hidden_units_6': 11, 'num_hidden_units_7': 77, 'num_hidden_units_8': 75}. Best is trial 41 with value: 0.7686746120452881.


8 0.0942059599658536 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.4838354587554932
this is my accuracy:0.737483561038971


[I 2020-08-03 01:32:24,600] Trial 205 finished with value: 0.7300834059715271 and parameters: {'number_of_hidden_layers': 8, 'dr_rate': 0.0942059599658536, 'activation_type': 'tanh', 'num_hidden_units_1': 24, 'num_hidden_units_2': 16, 'num_hidden_units_3': 53, 'num_hidden_units_4': 71, 'num_hidden_units_5': 30, 'num_hidden_units_6': 16, 'num_hidden_units_7': 67, 'num_hidden_units_8': 93}. Best is trial 41 with value: 0.7686746120452881.


5 0.0660467509193451 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.5514171123504639
this is my accuracy:0.7505310773849487


[I 2020-08-03 01:34:05,680] Trial 206 finished with value: 0.7503992319107056 and parameters: {'number_of_hidden_layers': 5, 'dr_rate': 0.0660467509193451, 'activation_type': 'tanh', 'num_hidden_units_1': 28, 'num_hidden_units_2': 8, 'num_hidden_units_3': 70, 'num_hidden_units_4': 69, 'num_hidden_units_5': 48}. Best is trial 41 with value: 0.7686746120452881.


6 0.018462746578715685 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.6061855554580688
this is my accuracy:0.7373029589653015


[I 2020-08-03 01:36:32,756] Trial 207 finished with value: 0.7372249960899353 and parameters: {'number_of_hidden_layers': 6, 'dr_rate': 0.018462746578715685, 'activation_type': 'tanh', 'num_hidden_units_1': 26, 'num_hidden_units_2': 24, 'num_hidden_units_3': 63, 'num_hidden_units_4': 59, 'num_hidden_units_5': 14, 'num_hidden_units_6': 23}. Best is trial 41 with value: 0.7686746120452881.


9 0.04269502517665 relu
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.2421414852142334
this is my accuracy:0.7518510818481445


[I 2020-08-03 01:38:52,778] Trial 208 finished with value: 0.7511089444160461 and parameters: {'number_of_hidden_layers': 9, 'dr_rate': 0.04269502517665, 'activation_type': 'relu', 'num_hidden_units_1': 23, 'num_hidden_units_2': 12, 'num_hidden_units_3': 47, 'num_hidden_units_4': 62, 'num_hidden_units_5': 33, 'num_hidden_units_6': 74, 'num_hidden_units_7': 57, 'num_hidden_units_8': 85, 'num_hidden_units_9': 60}. Best is trial 41 with value: 0.7686746120452881.


8 0.4711830549088746 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.2797602415084839
this is my accuracy:0.6617618799209595


[I 2020-08-03 01:40:46,390] Trial 209 finished with value: 0.6618612408638 and parameters: {'number_of_hidden_layers': 8, 'dr_rate': 0.4711830549088746, 'activation_type': 'tanh', 'num_hidden_units_1': 19, 'num_hidden_units_2': 4, 'num_hidden_units_3': 57, 'num_hidden_units_4': 65, 'num_hidden_units_5': 19, 'num_hidden_units_6': 58, 'num_hidden_units_7': 19, 'num_hidden_units_8': 57}. Best is trial 41 with value: 0.7686746120452881.


6 0.11241511685555448 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.4205650091171265
this is my accuracy:0.7600280046463013


[I 2020-08-03 01:42:30,561] Trial 210 finished with value: 0.7598917484283447 and parameters: {'number_of_hidden_layers': 6, 'dr_rate': 0.11241511685555448, 'activation_type': 'tanh', 'num_hidden_units_1': 30, 'num_hidden_units_2': 81, 'num_hidden_units_3': 50, 'num_hidden_units_4': 75, 'num_hidden_units_5': 16, 'num_hidden_units_6': 66}. Best is trial 41 with value: 0.7686746120452881.


6 0.1349009888933395 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
this is my loss:1.5772992372512817
this is my accuracy:0.7333487272262573


[I 2020-08-03 01:44:16,086] Trial 211 finished with value: 0.7334102392196655 and parameters: {'number_of_hidden_layers': 6, 'dr_rate': 0.1349009888933395, 'activation_type': 'tanh', 'num_hidden_units_1': 30, 'num_hidden_units_2': 81, 'num_hidden_units_3': 51, 'num_hidden_units_4': 74, 'num_hidden_units_5': 17, 'num_hidden_units_6': 65}. Best is trial 41 with value: 0.7686746120452881.


6 0.1136652994209555 tanh
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Buffered data was truncated after reaching the output size limit.

In [None]:
# Again loading the object from the nn_net file and updating it again.
study = optuna.create_study(direction='maximize',study_name="NN_tuning",storage="sqlite:///nn_net.db",load_if_exists=True)
import pandas as pd
print("best_params",study.best_params)
print("f1--",study.best_value)
print("best trial--",study.best_trial)

[I 2020-08-03 03:45:42,163] Using an existing study with name 'NN_tuning' instead of creating a new one.


best_params {'activation_type': 'tanh', 'dr_rate': 0.03587723263680438, 'num_hidden_units_1': 29, 'num_hidden_units_2': 49, 'num_hidden_units_3': 62, 'num_hidden_units_4': 77, 'num_hidden_units_5': 20, 'num_hidden_units_6': 70, 'number_of_hidden_layers': 6}
f1-- 0.7686746120452881
best trial-- FrozenTrial(number=41, value=0.7686746120452881, datetime_start=datetime.datetime(2020, 8, 2, 18, 35, 9, 922652), datetime_complete=datetime.datetime(2020, 8, 2, 18, 36, 15, 386685), params={'activation_type': 'tanh', 'dr_rate': 0.03587723263680438, 'num_hidden_units_1': 29, 'num_hidden_units_2': 49, 'num_hidden_units_3': 62, 'num_hidden_units_4': 77, 'num_hidden_units_5': 20, 'num_hidden_units_6': 70, 'number_of_hidden_layers': 6}, distributions={'activation_type': CategoricalDistribution(choices=('tanh', 'sigmoid', 'swish', 'relu')), 'dr_rate': UniformDistribution(high=1, low=0), 'num_hidden_units_1': IntUniformDistribution(high=96, low=2, step=1), 'num_hidden_units_2': IntUniformDistribution(h

In [None]:
# Recreating the ANN to check is everything alright
from tensorflow.keras.layers import Activation, Dropout, Dense, BatchNormalization
import tensorflow as tf

# Best parameters we obtained from optuna
# Now, running again to make sure is everything fine
drop_out_rate=0.0358
batch_size=128
epochs=20

print(type(f1))
callback = tf.keras.callbacks.EarlyStopping(monitor="accuracy")
# Here, 29, 49, 62, 77 are number of hidden neurons
model=tf.keras.Sequential()
model.add(Dense(29,activation="tanh",input_shape=(input_dimensions,)))
model.add(Dropout(drop_out_rate))
model.add(Dense(49,activation="tanh"))
model.add(Dropout(drop_out_rate))
model.add(Dense(62,activation="tanh"))
model.add(Dropout(drop_out_rate))
model.add(BatchNormalization())
model.add(Dense(77,activation="tanh"))
model.add(Dropout(drop_out_rate))
model.add(Dense(20,activation="tanh"))
model.add(Dropout(drop_out_rate))
model.add(Dense(70,activation="tanh"))
model.add(Dropout(drop_out_rate))
model.add(BatchNormalization())
# For generating output we select 5 nodes as we have 5 classes.
model.add(Dense(5,activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=[f1,"accuracy"])

model.fit(X_tr,y_tr,batch_size=batch_size,epochs=epochs,validation_data=(X_test,y_test),callbacks=[callback])
results=model.evaluate(X_test,y_test)

print("="*20)
print("this is my loss:{}".format(results[0]))
print("this is my accuracy:{}".format(results[1]))
print("this is my f1:{}".format(results[2]))
print("="*20)


<class 'function'>
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20

In [None]:
# Saving the model in the google drive
model.save('my_model.h5')