In [None]:
!pip install wget --quiet

In [None]:
!pip install --upgrade tensorflow==2.12.0 tensorflow_hub==0.13.0 --quiet

In [None]:
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py --quiet

In [None]:
!wget "https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip" --quiet

In [5]:
# Import series of helper functions for the notebook
from helper_functions import unzip_data

unzip_data("nlp_getting_started.zip")

In [6]:
# Importing TensorFlow and Keras libraries
import tensorflow as tf
from keras import Sequential
from keras.layers import Input, GlobalAveragePooling1D, Dense, LSTM, Conv1D, GlobalMaxPool1D
from keras.layers import TextVectorization, Embedding
from keras.optimizers import Adam
import tensorflow_hub as hub

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd

In [7]:
class PreprocessData:

    def __init__(self):
        self.train_df = pd.read_csv('train.csv')
        self.test_df = pd.read_csv('test.csv')

        self.train_df_shuffled = self.train_df.sample(frac=1, random_state=42)

        self.train_sentences = None
        self.train_labels = None
        self.val_sentences = None
        self.val_labels = None

        self.max_vocab_length = 10000
        self.max_output_sequence_length = 15

    def get_train_val_data(self):

        self.train_sentences, self.val_sentences, self.train_labels, self.val_labels = train_test_split(self.train_df_shuffled['text'].to_numpy(),
                                                                                                        self.train_df_shuffled['target'].to_numpy(),
                                                                                                        test_size=0.1,
                                                                                                        random_state=42)
        return self.train_sentences, self.train_labels, self.val_sentences, self.val_labels


In [8]:
    # Initialize dataset for this file
    preprocess_data = PreprocessData()

    train_sentences, train_labels, val_sentences, val_labels = preprocess_data.get_train_val_data()

    max_vocab_length = preprocess_data.max_vocab_length
    max_output_length = preprocess_data.max_output_sequence_length


In [9]:
def calculate_results(y_true, y_pred):
    """
    Calculates model accuracy, precision, recall and f1-score of a binary classification model;
    :param y_true: true labels in the form of a 1D array;
    :param y_pred: predicted labels in the form of a 1D array;
    :return: a dictionary of accuracy, precision, recall, f1-score.
    """
    # Calculate model accuracy
    model_accuracy = accuracy_score(y_true, y_pred) * 100
    # Calculate model precision, recall and f1-score using "weighted" average
    model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    model_results = {"accuracy": model_accuracy,
                     "precision": model_precision,
                     "recall": model_recall,
                     "f1-score": model_f1}

    return model_results

## Exercise - 3

In [10]:
# Creating tensorflow hub model encoding layer
sentence_encoder_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                                        input_shape=[],
                                        dtype=tf.string,
                                        trainable=True,
                                        name="USE")

# Building Sequential of model
ex_model_use = Sequential([
    sentence_encoder_layer,
    Dense(64, activation="relu"),
    Dense(1, activation="sigmoid")
], name="model_use")

# Compile the model
ex_model_use.compile(loss="binary_crossentropy",
                      optimizer=Adam(),
                      metrics=["accuracy"])

In [11]:
# Training the model
ex_model_use_history = ex_model_use.fit(train_sentences,
                                        train_labels,
                                        epochs=5,
                                        validation_data=(val_sentences, val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
print("|=== Model USE Results ===|")
# Getting prediction probabilities
ex_model_use_pred_probs = ex_model_use.predict(val_sentences)
print(ex_model_use_pred_probs[:10])

# Convert probabilities to labels (numbers)
ex_model_use_preds = tf.squeeze(tf.round(ex_model_use_pred_probs))
print(ex_model_use_preds[:20])

# Calculate model results (accuracy, precision, recall, f1-score)
ex_model_use_results = calculate_results(val_labels, ex_model_use_preds)
print(ex_model_use_results)

|=== Model USE Results ===|
[[0.22073375]
 [0.98080504]
 [0.9995185 ]
 [0.02332857]
 [0.9643864 ]
 [0.99902713]
 [0.9993274 ]
 [0.9995679 ]
 [0.99925286]
 [0.01513849]]
tf.Tensor([0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0.], shape=(20,), dtype=float32)
{'accuracy': 79.52755905511812, 'precision': 0.7953774831544114, 'recall': 0.7952755905511811, 'f1-score': 0.7953210416227474}
