<a href="https://colab.research.google.com/github/meti-94/CodingQuestions/blob/main/Predictive%20Analysis/Text%20Classification/BiLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kaggle -q 
!pip install tensorflow-addons -q 

In [2]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
!mkdir ~/.kaggle
!cp /content/gdrive/MyDrive/kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d williamscott701/memotion-dataset-7k

Downloading memotion-dataset-7k.zip to /content
100% 695M/695M [00:33<00:00, 23.4MB/s]
100% 695M/695M [00:33<00:00, 21.9MB/s]


In [5]:
!unzip -qq /content/memotion-dataset-7k.zip

In [1]:
import re
import string
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Activation, Dropout
from tensorflow.keras.layers import Conv1D, Embedding
from tensorflow.keras.optimizers import Adam, RMSprop

import tensorflow_addons as tfa

from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('/content/memotion_dataset_7k/labels.csv')
df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
df = df.drop(columns = ['text_ocr'])
df.head()

Unnamed: 0,image_name,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment
0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,very_positive
1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,very_positive
2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive
3,image_4.png,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive
4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral


In [3]:
df = df.replace({'humour': {'not_funny': 0, 'funny': 1, 'very_funny': 2, 'hilarious':3},
            'sarcasm': {'not_sarcastic': 0, 'general': 1, 'twisted_meaning': 2, 'very_twisted': 3},
            'offensive': {'not_offensive': 0, 'slight': 1, 'very_offensive': 2, 'hateful_offensive': 3},
            'motivational': {'not_motivational': 0, 'motivational': 1},
            'overall_sentiment': {'very_negative': 0, 'negative': 1, 'neutral': 2, 'positive': 3, 'very_positive': 4}})

In [4]:
cleaned = df.copy()
cleaned.dropna(inplace=True, how='any')
cleaned.isnull().any()

image_name           False
text_corrected       False
humour               False
sarcasm              False
offensive            False
motivational         False
overall_sentiment    False
dtype: bool

In [5]:
def standardization(data):
    data = data.apply(lambda x: x.lower())
    data = data.apply(lambda x: re.sub(r'\d+', '', x))
    data = data.apply(lambda x: re.sub(r'.com', '', x, flags=re.MULTILINE))
    data = data.apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
    return data

cleaned['text_corrected'] = standardization(cleaned.text_corrected)

In [6]:
Y = cleaned.iloc[:,2:]
X = cleaned.iloc[:, 1]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)

In [7]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
#####################################
vocab_size = 50000
sequence_length = 100
#####################################
vectorize_layer = TextVectorization(
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=sequence_length)

text_ds = np.asarray(X_train)
vectorize_layer.adapt(tf.convert_to_tensor(text_ds))

In [8]:
#####################################
embedding_dim=16
#####################################

text_input = tf.keras.Input(shape=(1,), dtype=tf.string, name='text')
text_layers = vectorize_layer(text_input)
text_layers = tf.keras.layers.Embedding(vocab_size, embedding_dim, name="embedding", mask_zero=True)(text_layers)
text_layers = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(text_layers)
text_layers = tf.keras.layers.Dense(64, activation='relu')(text_layers)

prediction_layer_1 = tf.keras.layers.Dense(4, activation='softmax', name = 'sarcasm')
prediction_layer_2 = tf.keras.layers.Dense(4, activation='softmax', name = 'humuor')
prediction_layer_3 = tf.keras.layers.Dense(4, activation='softmax', name = 'offensive')
prediction_layer_4 = tf.keras.layers.Dense(1, activation='sigmoid', name = 'motivational')
prediction_layer_5 = tf.keras.layers.Dense(5, activation='softmax', name = 'overall')

output_1 = prediction_layer_1(text_layers)
output_2 = prediction_layer_2(text_layers)
output_3 = prediction_layer_3(text_layers)
output_4 = prediction_layer_4(text_layers)
output_5 = prediction_layer_5(text_layers)

model = tf.keras.Model(inputs = [text_input] , outputs = [output_1, output_2, output_3, output_4, output_5])

In [9]:
losses = {
      "humuor": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
      "sarcasm": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
      "offensive": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
      "motivational": tf.keras.losses.BinaryCrossentropy(from_logits=True),
      "overall": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
}

lossWeights = {
      "humuor": 1.0, 
      "sarcasm": 1.0, 
      "offensive": 1.0, 
      "motivational": 1.0,
      "overall": 1.0
}

metric = {
    "humuor": ['acc',tfa.metrics.F1Score(num_classes=4, average="micro", threshold = 0.9)],
    "sarcasm": ['acc',tfa.metrics.F1Score(num_classes=4, average="micro", threshold = 0.9)],
    "offensive": ['acc',tfa.metrics.F1Score(num_classes=4, average="micro", threshold = 0.9)],
    "motivational": ['acc',tfa.metrics.F1Score(num_classes=1, average="micro", threshold = 0.9)],
    "overall": ['acc',tfa.metrics.F1Score(num_classes=5, average="micro", threshold = 0.9)]
}


In [10]:
learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001]

for i in learning_rates:
    print('\n', '*'*10, f' Learning Rate is set to {i}')
    model.compile(optimizer=tf.keras.optimizers.Adam(i),
              loss = losses,
              loss_weights= lossWeights,
              metrics=['accuracy'])
    
    history = model.fit(x = {"text": X_train},
                    y = {"sarcasm": y_train.sarcasm, 
                         "humuor": y_train.humour, 
                         "offensive": y_train.offensive, 
                         "motivational": y_train.motivational, 
                         "overall": y_train.overall_sentiment},
                    batch_size=256,
                    epochs=10,
                    verbose=2
                   )


 **********  Learning Rate is set to 0.1
Epoch 1/10


  return dispatch_target(*args, **kwargs)
  return dispatch_target(*args, **kwargs)


22/22 - 11s - loss: 6.0825 - sarcasm_loss: 1.2628 - humuor_loss: 1.4035 - offensive_loss: 1.2977 - motivational_loss: 0.6812 - overall_loss: 1.4373 - sarcasm_accuracy: 0.4761 - humuor_accuracy: 0.3197 - offensive_accuracy: 0.3659 - motivational_accuracy: 0.6191 - overall_accuracy: 0.4070 - 11s/epoch - 495ms/step
Epoch 2/10
22/22 - 0s - loss: 5.5621 - sarcasm_loss: 1.1713 - humuor_loss: 1.2992 - offensive_loss: 1.1687 - motivational_loss: 0.6451 - overall_loss: 1.2777 - sarcasm_accuracy: 0.5001 - humuor_accuracy: 0.3403 - offensive_accuracy: 0.3927 - motivational_accuracy: 0.6416 - overall_accuracy: 0.4471 - 433ms/epoch - 20ms/step
Epoch 3/10
22/22 - 0s - loss: 5.3970 - sarcasm_loss: 1.1365 - humuor_loss: 1.2790 - offensive_loss: 1.1184 - motivational_loss: 0.6254 - overall_loss: 1.2376 - sarcasm_accuracy: 0.5003 - humuor_accuracy: 0.3435 - offensive_accuracy: 0.4228 - motivational_accuracy: 0.6418 - overall_accuracy: 0.4477 - 422ms/epoch - 19ms/step
Epoch 4/10
22/22 - 0s - loss: 5.2657

In [11]:
evaluate = model.evaluate(x = {"text": X_test},
                    y = {"sarcasm": y_test.sarcasm, 
                         "humuor": y_test.humour, 
                         "offensive": y_test.offensive, 
                         "motivational": y_test.motivational, 
                         "overall": y_test.overall_sentiment},
                    batch_size=256,
                    verbose=2
                   )

6/6 - 3s - loss: 8.2470 - sarcasm_loss: 1.9325 - humuor_loss: 1.3384 - offensive_loss: 2.7735 - motivational_loss: 0.8078 - overall_loss: 1.3949 - sarcasm_accuracy: 0.3798 - humuor_accuracy: 0.3441 - offensive_accuracy: 0.2761 - motivational_accuracy: 0.5100 - overall_accuracy: 0.4385 - 3s/epoch - 451ms/step
