In [185]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, Bidirectional,  Layer, LSTM, Dense, Dropout, Input, Lambda, LayerNormalization, GlobalAveragePooling1D, MultiHeadAttention
from gensim.models import KeyedVectors, Word2Vec
from transformers import TFBertModel, BertTokenizer
import tensorflow as tf
import pandas as pd
import numpy as np
EMBEDDING_DIM = 300
import tensorflow_hub as hub
import tensorflow_text

df_train = pd.read_csv("C19_train.csv", encoding_errors="ignore")
df_test = pd.read_csv("C19_test.csv", encoding_errors="ignore")


df_train["Sentiment"] = df_train["Sentiment"].apply(
    lambda x: "Positive" if x in ["Positive", "Extremely Positive"] else "Negative"
)
df_test["Sentiment"] = df_test["Sentiment"].apply(
    lambda x: "Positive" if x in ["Positive", "Extremely Positive"] else "Negative"
)
EMBEDDING_DIM = 100
MAX_LEN = 100

df_train["Sentiment"] = df_train["Sentiment"].str.capitalize()
df_test["Sentiment"] = df_test["Sentiment"].str.capitalize()

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(df_train["Sentiment"])
y_test = label_encoder.transform(df_test["Sentiment"])
num_classes = len(label_encoder.classes_)

In [None]:
def word_2_vec(with_grad: bool):
    print("Loading Word2Vec model...")
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(df_train["OriginalTweet"])

    X_train_seq = tokenizer.texts_to_sequences(df_train["OriginalTweet"])
    X_test_seq = tokenizer.texts_to_sequences(df_test["OriginalTweet"])

    X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN)
    X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN)

    y_train_cat = to_categorical(y_train, num_classes)
    y_test_cat = to_categorical(y_test, num_classes)

    word_index = tokenizer.word_index

    w2v_model = KeyedVectors.load_word2vec_format(
        "GoogleNews-vectors-negative300.bin", binary=True
    )
    sentences = df_train["OriginalTweet"].apply(lambda x: x.split()).tolist()
    w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
    EMBEDDING_DIM = 100
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))

    for word, i in word_index.items():
        if word in w2v_model.wv:
            embedding_matrix[i] = w2v_model.wv[word]
        model = Sequential()
    model.add(
        Embedding(
            input_dim=len(word_index) + 1,
            output_dim=EMBEDDING_DIM,
            weights=[embedding_matrix],
            input_length=MAX_LEN,
            trainable=with_grad
        )
    )
    model.add(Bidirectional(LSTM(64, return_sequences=True)))
    model.add(Bidirectional(LSTM(64)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))

    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    model.summary()

    # Train
    history = model.fit(
        X_train_pad, y_train_cat,
        validation_split=0.1,
        epochs=5,
        batch_size=32,
        shuffle=True
    )


    loss, acc = model.evaluate(X_test_pad, y_test_cat)
    print(f"Test Accuracy: {acc:.2f}")


In [43]:
def glove(with_grad: bool):
    print("Loading GloVe...")
    GLOVE_PATH = "glove.6B.100d.txt"  
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(df_train["OriginalTweet"])

    X_train_seq = tokenizer.texts_to_sequences(df_train["OriginalTweet"])
    X_test_seq = tokenizer.texts_to_sequences(df_test["OriginalTweet"])

    X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN)
    X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN)

    y_train_cat = to_categorical(y_train, num_classes)
    y_test_cat = to_categorical(y_test, num_classes)

    word_index = tokenizer.word_index
    embeddings_index = {}
    with open(GLOVE_PATH, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))

    for word, i in word_index.items():
        vector = embeddings_index.get(word)
        if vector is not None:
            embedding_matrix[i] = vector



    model = Sequential()
    model.add(
        Embedding(
            input_dim=len(word_index) + 1,
            output_dim=EMBEDDING_DIM,
            weights=[embedding_matrix],
            input_length=MAX_LEN,
            trainable=False
        )
    )
    model.add(Bidirectional(LSTM(64, return_sequences=True)))
    model.add(Bidirectional(LSTM(64)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))

    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    model.summary()
    history = model.fit(
        X_train_pad, y_train_cat,
        validation_split=0.1,
        epochs=5,
        batch_size=32,
        shuffle=True
    )
    loss, acc = model.evaluate(X_test_pad, y_test_cat)
    print(f"Test Accuracy: {acc:.2f}")


In [32]:
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, num_heads, key_dim):
        super(SelfAttention, self).__init__()
        self.attn = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)

    def call(self, inputs):
        return self.attn(inputs, inputs)

def glove_bilstm_attention():
    print("Loading GloVe...")
    GLOVE_PATH = "glove.6B.100d.txt"  
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(df_train["OriginalTweet"])

    X_train_seq = tokenizer.texts_to_sequences(df_train["OriginalTweet"])
    X_test_seq = tokenizer.texts_to_sequences(df_test["OriginalTweet"])

    X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN)
    X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN)

    y_train_cat = to_categorical(y_train, num_classes)
    y_test_cat = to_categorical(y_test, num_classes)

    word_index = tokenizer.word_index
    embeddings_index = {}
    with open(GLOVE_PATH, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))

    for word, i in word_index.items():
        vector = embeddings_index.get(word)
        if vector is not None:
            embedding_matrix[i] = vector


    model = Sequential()
    model.add(
        Embedding(
            input_dim=len(word_index) + 1,
            output_dim=EMBEDDING_DIM,
            weights=[embedding_matrix],
            input_length=MAX_LEN,
            trainable=False
        )
    )
    model.add(Bidirectional(LSTM(64, return_sequences=True)))
    model.add(SelfAttention(num_heads=3, key_dim=64)) 
    model.add(LayerNormalization())
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.5))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))

    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    model.summary()
    history = model.fit(
        X_train_pad, y_train_cat,
        validation_split=0.1,
        epochs=5,
        batch_size=32,
        shuffle=True

    )
    loss, acc = model.evaluate(X_test_pad, y_test_cat)
    print(f"Test Accuracy: {acc:.2f}")


In [191]:
def bert(with_grad=False): #TODO: GPU
    label_encoder = LabelEncoder()
    df_train["label"] = label_encoder.fit_transform(df_train["Sentiment"])
    df_test["label"] = label_encoder.transform(df_test["Sentiment"])
    num_classes = len(label_encoder.classes_)
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    
    def tokenize_texts(texts, max_len=128):
        return tokenizer(
            list(texts),
            truncation=True,
            padding='max_length',
            max_length=max_len,
            return_tensors='tf'
        )
    
    train_encodings = tokenize_texts(df_train["OriginalTweet"])
    test_encodings = tokenize_texts(df_test["OriginalTweet"])
    
    y_train = tf.keras.utils.to_categorical(df_train["label"], num_classes)
    y_test = tf.keras.utils.to_categorical(df_test["label"], num_classes)
    
    bert_model = TFBertModel.from_pretrained("bert-base-uncased")
    input_ids = Input(shape=(128,), dtype=tf.int32, name="input_ids")
    attention_mask = Input(shape=(128,), dtype=tf.int32, name="attention_mask")


    bert_output = tf.keras.layers.Lambda(
        lambda inputs: bert_model(
            input_ids=inputs[0],
            attention_mask=inputs[1],
            training=with_grad
        ).last_hidden_state,
        output_shape=(128, 768)  # (sequence_length, hidden_size)
        )([input_ids, attention_mask])

    

    cls_token = bert_output[:, 0, :]
    
    x = Dropout(0.5)(cls_token)
    x = Dense(64, activation="relu")(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=[input_ids, attention_mask], outputs=output)
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    
    model.fit(
        {
            "input_ids": train_encodings["input_ids"],
            "attention_mask": train_encodings["attention_mask"]
        },
        y_train,
        validation_data=(
            {
                "input_ids": test_encodings["input_ids"],
                "attention_mask": test_encodings["attention_mask"]
            },
            y_test
        ),
        epochs=3,
        batch_size=32
    )
    model.summary()
    return model


In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs detected: {len(gpus)}")
else:
    print("No GPUs detected.")


In [33]:
print("GLOVE WITH BILSTM and MULTIHEAD ATTENTION")
glove_bilstm_attention()

GLOVE WITH BILSTM and MULTIHEAD ATTENTION
Loading GloVe...


Epoch 1/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 41ms/step - accuracy: 0.6489 - loss: 0.6313 - val_accuracy: 0.7660 - val_loss: 0.4801
Epoch 2/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 42ms/step - accuracy: 0.7881 - loss: 0.4699 - val_accuracy: 0.8324 - val_loss: 0.3844
Epoch 3/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 42ms/step - accuracy: 0.8324 - loss: 0.3955 - val_accuracy: 0.8586 - val_loss: 0.3336
Epoch 4/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 42ms/step - accuracy: 0.8669 - loss: 0.3287 - val_accuracy: 0.8690 - val_loss: 0.3224
Epoch 5/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 42ms/step - accuracy: 0.8851 - loss: 0.2862 - val_accuracy: 0.8710 - val_loss: 0.3153
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.8621 - loss: 0.3397
Test Accuracy: 0.86


In [34]:
print("W2V NO GRAD + 2 BiLSTM")
word_2_vec(with_grad = False)

W2V NO GRAD + 2 BiLSTM
Loading Word2Vec model...


Epoch 1/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 24ms/step - accuracy: 0.6164 - loss: 0.6515 - val_accuracy: 0.7070 - val_loss: 0.5854
Epoch 2/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 23ms/step - accuracy: 0.7106 - loss: 0.5695 - val_accuracy: 0.7281 - val_loss: 0.5439
Epoch 3/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 24ms/step - accuracy: 0.7400 - loss: 0.5355 - val_accuracy: 0.6849 - val_loss: 0.5772
Epoch 4/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 25ms/step - accuracy: 0.7517 - loss: 0.5173 - val_accuracy: 0.7410 - val_loss: 0.5175
Epoch 5/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 24ms/step - accuracy: 0.7649 - loss: 0.4973 - val_accuracy: 0.7553 - val_loss: 0.5025
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7528 - loss: 0.5080
Test Accuracy: 0.76


In [35]:
print("W2V WITH GRAD + 2 BiLSTM")
word_2_vec(with_grad = True)


W2V WITH GRAD + 2 BiLSTM
Loading Word2Vec model...


Epoch 1/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 31ms/step - accuracy: 0.6469 - loss: 0.6158 - val_accuracy: 0.8316 - val_loss: 0.3989
Epoch 2/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9265 - loss: 0.2011 - val_accuracy: 0.6951 - val_loss: 0.9595
Epoch 3/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9851 - loss: 0.0468 - val_accuracy: 0.8049 - val_loss: 0.6106
Epoch 4/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9946 - loss: 0.0178 - val_accuracy: 0.8253 - val_loss: 0.7327
Epoch 5/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9980 - loss: 0.0063 - val_accuracy: 0.7680 - val_loss: 1.3074
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7626 - loss: 1.3541
Test Accuracy: 0.77


In [36]:
print("GLOVE WITH NO GRAD + 2 BiLSTM")
glove(with_grad = False)

GLOVE WITH NO GRAD + 2 BiLSTM
Loading GloVe...


Epoch 1/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 24ms/step - accuracy: 0.6570 - loss: 0.6199 - val_accuracy: 0.7405 - val_loss: 0.5182
Epoch 2/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 24ms/step - accuracy: 0.7643 - loss: 0.4978 - val_accuracy: 0.7898 - val_loss: 0.4595
Epoch 3/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 23ms/step - accuracy: 0.8045 - loss: 0.4375 - val_accuracy: 0.8054 - val_loss: 0.4367
Epoch 4/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 23ms/step - accuracy: 0.8193 - loss: 0.4110 - val_accuracy: 0.8265 - val_loss: 0.3963
Epoch 5/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 23ms/step - accuracy: 0.8392 - loss: 0.3765 - val_accuracy: 0.8319 - val_loss: 0.3823
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8306 - loss: 0.3923
Test Accuracy: 0.83


In [37]:
print("GLOVE WITH GRAD + 2 BiLSTM")
glove(with_grad = True)

GLOVE WITH GRAD + 2 BiLSTM
Loading GloVe...


Epoch 1/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 31ms/step - accuracy: 0.6826 - loss: 0.5769 - val_accuracy: 0.8652 - val_loss: 0.3244
Epoch 2/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 30ms/step - accuracy: 0.9225 - loss: 0.2143 - val_accuracy: 0.8652 - val_loss: 0.3433
Epoch 3/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 30ms/step - accuracy: 0.9792 - loss: 0.0618 - val_accuracy: 0.8139 - val_loss: 0.7755
Epoch 4/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9917 - loss: 0.0264 - val_accuracy: 0.8785 - val_loss: 0.5203
Epoch 5/5
[1m1158/1158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 31ms/step - accuracy: 0.9959 - loss: 0.0143 - val_accuracy: 0.8688 - val_loss: 0.6093
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8621 - loss: 0.6162
Test Accuracy: 0.87


In [193]:
print("BERT WITH GRAD + 2 BiLSTM")
model, label_encoder = bert(True)


BERT WITH GRAD + 2 BiLSTM


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/3
[1m  27/1287[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23:52[0m 1s/step - accuracy: 0.4933 - loss: 0.8687

KeyboardInterrupt: 

In [192]:
print("BERT NO GRAD + 2 BiLSTM")
model, label_encoder = bert(False)


BERT NO GRAD + 2 BiLSTM


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/3
[1m  29/1287[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22:33[0m 1s/step - accuracy: 0.4719 - loss: 0.8356

KeyboardInterrupt: 