<a href="https://colab.research.google.com/github/shuchimishra/Tensorflow_projects/blob/main/Tensorflow_Code/NLP/Sarcasm_w_GRU_LSTM_Conv1D_%26_KerasTuner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **# Training a Sarcasm Detection Model using a Convolution Layer**

You will be doing the same steps here as the previous lab but will be using a convolution layer instead. As usual, try tweaking the parameters and observe how it affects the results.


In [None]:
#Install keras-tuner library; uncomment if necessary
!pip install keras-tuner -q

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import RMSprop, SGD, Adam
import keras_tuner
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import urllib

# **Helper Functions**

In [None]:

%matplotlib inline
%config InlineBackend.fugure_format = 'retina'
def plot_loss_acc(history):
  #-----------------------------------------------------------
  # Retrieve a list of list results on training and test data
  # sets for each training epoch
  #-----------------------------------------------------------
  acc      = history.history[     'accuracy' ]
  val_acc  = history.history[ 'val_accuracy' ]
  loss     = history.history[    'loss' ]
  val_loss = history.history['val_loss' ]
  epochs   = range(len(acc)) # Get number of epochs
  #------------------------------------------------
  # Plot training and validation accuracy per epoch
  #------------------------------------------------
  plt.plot  ( epochs,     acc, label='Training accuracy' )
  plt.plot  ( epochs, val_acc, label='Validation accuracy' )
  plt.title ('Training and validation accuracy')
  plt.grid()
  plt.legend()
  plt.xlabel("Epochs")
  plt.ylabel("Accuracy")
  plt.figure()
  #------------------------------------------------
  # Plot training and validation loss per epoch
  #------------------------------------------------
  plt.plot  ( epochs,     loss, label='Training loss' )
  plt.plot  ( epochs, val_loss, label='Validation loss' )
  plt.grid()
  plt.legend()
  plt.xlabel("Epochs")
  plt.ylabel("Loss")
  plt.title ('Training and validation loss'   )

# **Download the Dataset**

In [None]:
# # Download the dataset
# !wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json

In [None]:
# Download the dataset
url = 'https://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json'
urllib.request.urlretrieve(url, 'sarcasm.json')

# **Parsing Sentences and Labels**

In [None]:
import json

# Load the JSON file
with open("./sarcasm.json", 'r') as f:
    datastore = json.load(f)

# Initialize the lists
sentences = []
labels = []

# Collect sentences and labels into the lists
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

# **Parameters**

In [None]:
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"
training_size = 20000

# **Split the Dataset**

In [None]:
# split_size = 0.8
# training_size = round(len(labels) * split_size)

# Split the sentences
training_sentences = sentences[:training_size]
testing_sentences = sentences[training_size:]

# Split the labels
training_labels = labels[:training_size]
testing_labels = labels[training_size:]

# **Tokenize Sentences**

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# vocab_size = 10000
# max_length = 120
# trunc_type='post'
# padding_type='post'
# oov_tok = "<OOV>"


# Initialize the Tokenizer class
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

# Generate the word index dictionary
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

# Generate and pad the training sequences
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Generate and pad the testing sequences
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Convert the labels lists into numpy arrays
training_labels = np.array(training_labels)
testing_labels = np.array(testing_labels)

# **Keras Hyperparameter tuning**

In [None]:
tf.keras.backend.clear_session()

In [None]:
#Build the model

def build_model(hp):

  model_tune = tf.keras.Sequential()
  model_tune.add(tf.keras.layers.Embedding(input_dim=vocab_size,
                                          #  output_dim=hp.Choice("output_dim", values=[8,16,32,64]),
                                           output_dim=embedding_dim,
                                           input_length=max_length))

  model_type = hp.Choice("model_type", ["DNN", "CNN", "GRU", "LSTM"])

  with hp.conditional_scope("model_type", ["DNN"]):
    if model_type == "DNN":
      model_tune.add(tf.keras.layers.Flatten())

      # Tune whether to use dropout.
      if hp.Boolean("dropout-1"):
        model_tune.add(tf.keras.layers.Dropout(rate=hp.Float('rate-1', min_value=0, max_value=1.0, step=0.1, sampling='linear')))

      # Tune whether to use dense layer.
      if hp.Boolean("dense"):
        model_tune.add(tf.keras.layers.Dense(units=hp.Choice("unit-7", values=[16,32,64,128,256,512,1024]),activation="relu"))

  with hp.conditional_scope("model_type", ["CNN"]):

    if model_type == "CNN":
      model_tune.add(tf.keras.layers.Conv1D(filters=hp.Choice("filters", values=[16,32,64,128,256]),
                     kernel_size=hp.Int('kernel_size', min_value=1, max_value=7, step=1, sampling='linear'),activation='relu'))
      model_tune.add(tf.keras.layers.GlobalAveragePooling1D())

      # Tune whether to use dropout.
      if hp.Boolean("dropout-2"):
        model_tune.add(tf.keras.layers.Dropout(rate=hp.Float('rate-2', min_value=0, max_value=1.0, step=0.1, sampling='linear')))

  with hp.conditional_scope("model_type", ["GRU"]):

    if model_type == "GRU":

      # Tune whether to use additional GRU.
      if hp.Boolean("gru"):
        model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(units=hp.Choice("unit-1", values=[16,32,64,128,256]), return_sequences=True)))

      model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(units=hp.Choice("unit-2", values=[16,32,64,128,256]), return_sequences=True)))
      model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(units=hp.Choice("unit-3", values=[16,32,64,128,256]))))

  with hp.conditional_scope("model_type", ["LSTM"]):

    if model_type == "LSTM":

      if hp.Boolean("lstm"):
        model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=hp.Choice("unit-4", values=[16,32,64,128,256]), return_sequences=True)))

      model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=hp.Choice("unit-5", values=[16,32,64,128,256]), return_sequences=True)))
      model_tune.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=hp.Choice("unit-6", values=[16,32,64,128,256]))))

  # Dense layers
  # model_tune.add(tf.keras.layers.Dense(units=hp.Choice("unit", values=[16,32,64,128,256,512,1024]),activation="relu"))
  model_tune.add(tf.keras.layers.Dense(units=hp.Choice("unit-8", values=[16,32,64,128,256,512,1024]),activation="relu"))
  model_tune.add(tf.keras.layers.Dense(1, activation='sigmoid'))



  # Select optimizer
  optimizer=hp.Choice('optimizer', values=['adam', 'RMSprop', 'SGD'])

  # Conditional for each optimizer
  if optimizer == 'adam':
    learning_rate = hp.Float('lrate', min_value=1e-6, max_value=1e-1, sampling='LOG')

  elif optimizer == 'RMSprop':
    learning_rate = hp.Float('lrate', min_value=1e-6, max_value=1e-1, sampling='LOG')

  elif optimizer == 'SGD':
    learning_rate = hp.Float('lrate', min_value=1e-6, max_value=1e-1, sampling='LOG')
    momentum = hp.Float('momentum', min_value=0, max_value=1.0, step=0.1, sampling='linear')

  #compile the model
  model_tune.compile(optimizer=optimizer,
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=['accuracy'])

  model_tune.summary()

  return model_tune

In [None]:
build_model(keras_tuner.HyperParameters())

In [None]:
tuner = keras_tuner.BayesianOptimization( #can be Hyperband, RandomSearch, or BayesianOptimization
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=1,
    overwrite=True, #Control whether to overwrite the previous results in the same directory or resume the previous search instead.
    directory="./Model-Tuner",
    project_name="KerasTuning",
)

In [None]:
#Print summary of search space
tuner.search_space_summary()

## **Callbacks**

In [None]:
#just clean the sessio, recomendable if we execute some times the model.
keras.backend.clear_session()

#This callback saves the best model based in val_accuracy


MCP = keras.callbacks.ModelCheckpoint(filepath='bestmodel.h5',monitor='val_accuracy',
                                          mode='auto',save_best_only=True,save_weights_only=False,verbose=1)

RLP = keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy",factor=0.1,patience=5,
                                            verbose=1,mode="auto",min_lr=0.000000001)

ES = keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='auto', min_delta=1e-4,patience=5,verbose=1)

In [None]:
tf.keras.backend.clear_session()

#Train the model
num_epochs = 30

# Train the model
history = tuner.search(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels),
                       callbacks=[ES, MCP, RLP, keras.callbacks.TensorBoard("./tb_logs")])

# **View logs on Tensorboard**

In [None]:
#Code to see the results in Tensorboard

%load_ext tensorboard
# %reload_ext tensorboard

%tensorboard --logdir ./tb_logs

# **Query the results**

In [None]:
from keras_tuner.engine.hyperparameters import HyperParameters

# Get the top 2 models.
models = tuner.get_best_models(num_models=2).expect_partial()
best_model = models[0]
best_model.summary()


In [None]:
# Return best 5 trials
best_trials = tuner.oracle.get_best_trials(num_trials=10)

for trial in best_trials:
    print("**********Trail id: ", trial.trial_id)
    trial.summary()
    print('\n')

In [None]:
# After hyperparameter tuning, retrieve the best hyperparameters.
best_hp = tuner.get_best_hyperparameters()[0]
best_hp.values

# **Save the model**

In [None]:
from pprint import pprint
pprint(best_hp.get_config(), compact=True)

In [None]:
# save the model
best_model.save('./best/best_model.h5')
best_model.save('./best/best_model_new_version')

# Override the best model
load first K best models then we need to use tuner's get_best_models method as below

In [None]:
# This will load 10 best hyper tuned models with the weights
# corresponding to their best checkpoint (at the end of the best epoch of best trial).
best_model_count = 10
bo_tuner_best_models = tuner.get_best_models(num_models=best_model_count).expect_partial()

Then you can access a specific best model as below:

In [None]:
best_model_id = 1
override_model = bo_tuner_best_models[best_model_id]
override_model.summary()

This method is for querying the models trained during the search. For best performance, it is recommended to retrain your Model on the full dataset using the best hyperparameters found during search, which can be obtained using tuner.get_best_hyperparameters().

The best model according to me is second best model

In [None]:
tuner_best_hyperparameters = tuner.get_best_hyperparameters(num_trials=10)
best_hp = tuner_best_hyperparameters[1]
model_override = tuner.hypermodel.build(best_hp)
best_hp.values

# **Visualize the model**

In [None]:
# !pip install graphviz
from tensorflow.keras.utils import plot_model
plot_model(model_override, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# **Retrain the model(optional)**

In [None]:
# Build the model with the best hp.
# model = build_model(best_hps)
model_override = tuner.hypermodel.build(best_hp)

num_epochs = 30
# Fit with the entire dataset.
history = model_override.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels),
                       callbacks=[ES, MCP, RLP], verbose=2)

In [None]:
# Plot training results
plot_loss_acc(history)

In [None]:
model_override.evaluate(testing_padded, testing_labels)

Trial 09 summary

Hyperparameters:

model_type: GRU

unit-8: 128

optimizer: RMSprop

lrate: 0.013022460723757403

gru: False

unit-2: 64

unit-3: 64

unit-1: 256

momentum: 0.30000000000000004

Score: 0.8394693732261658

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(input_dim=vocab_size,
                                           output_dim=embedding_dim,
                                           input_length=max_length))

# model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(256, return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)))

model.add(tf.keras.layers.Dense(128,activation="relu"))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()