In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers, Model
from sklearn.preprocessing import OneHotEncoder
import pickle
import numpy as np
from typing import Tuple

In [None]:
hyperparameters = {"DROPOUT": 0.1, "LABEL_COUNT": 3, "LEARNING_RATE": 0.00005, "BATCH_SIZE":128,
                  "USE_URL": "https://tfhub.dev/google/universal-sentence-encoder/4",
                   "TRAINABLE": True, "EPOCHS":5
                  }

In [None]:
onehotencoder_path = "/content/drive/MyDrive/SAIL Exam Datasets/label-encoder.pickle"

with open(onehotencoder_path, 'rb') as handle:
    label_encoder = pickle.load(handle)

In [None]:
import pandas as pd

In [None]:
train_df = pd.read_csv("/content/drive/MyDrive/SAIL Exam Datasets/tweets_reviews/train.csv", sep=';')
val_df = pd.read_csv("/content/drive/MyDrive/SAIL Exam Datasets/tweets_reviews/val.csv", sep=';')

In [None]:
class USEClassifier(Model):
    def __init__(self, **kwargs):
        super().__init__()
        # self.inputs = layers.Input(kwargs['SEQ_LEN'])
        self.use_layer = hub.KerasLayer(kwargs["USE_URL"], trainable=kwargs['TRAINABLE'])
        self.dropout_1 = layers.Dropout(kwargs['DROPOUT'])
        self.dense_layer_1 = layers.Dense(1024, activation='relu')
        self.dropout_2 = layers.Dropout(kwargs['DROPOUT'])
        self.dense_layer_2 = layers.Dense(512, activation='relu')
        self.output_layer = layers.Dense(kwargs['LABEL_COUNT'], activation='softmax')


    def call(self, inputs, training):
        output = self.use_layer(inputs)
        output = self.dropout_1(output, training=training)
        output = self.dense_layer_1(output)
        output = self.dropout_2(output, training=training)
        output = self.dense_layer_2(output)
        final_output = self.output_layer(output)


        return final_output


    def get_config(self):
        return {
            'use_layer': self.use_layer,
            'dropout_1': self.dropout_1,
            'dense_layer_1': self.dense_layer_1,
            'dropout_2': self.dropout_2,
            'dense_layer_2': self.dense_layer_2,
            'output_layer': self.output_layer
        }




In [None]:
model = USEClassifier(**hyperparameters)

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=hyperparameters["LEARNING_RATE"], amsgrad=True)

In [None]:
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
def prepare_dataset(df: pd.DataFrame) -> tuple:
  X = np.array(list(df['text']))
  Y = label_encoder.transform(np.array(list(df['expected_sentiment'])).reshape(-1,1)).toarray()
  return X,Y

In [None]:
X_train, Y_train = prepare_dataset(train_df)
val_data = prepare_dataset(val_df)

model.fit(x=X_train,y=Y_train,epochs=hyperparameters["EPOCHS"],validation_data=val_data,batch_size=hyperparameters['BATCH_SIZE'])

Epoch 1/5


In [None]:
model_filename = f"/content/drive/MyDrive/SAIL Exam Datasets/finetuned_use_model_{hyperparameters['EPOCHS']}_epochs_{hyperparameters['LEARNING_RATE']}_lr_{hyperparameters['BATCH_SIZE']}_batch_trainable_use_tweets_reviews_dataset"
model.save(model_filename, save_format='tf')

In [None]:
test_df = pd.read_csv("/content/drive/MyDrive/SAIL Exam Datasets/sentiment_test_cases.csv")

In [None]:
from sklearn.metrics import classification_report

In [None]:
test_X = np.array(list(test_df['text']))

In [None]:
test_Y_prediction_raw = model(test_X)

In [None]:
test_Y_pred = label_encoder.inverse_transform(test_Y_prediction_raw)

In [None]:
print(classification_report(np.array(list(test_df['expected_sentiment'])).reshape(-1,1), test_Y_pred))