# RNN model with gloVe embeddings

### inports

In [3]:
import sys
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Bidirectional, Embedding, LSTM, Dropout, Dense, Input

sys.path.append('../')
from utils import *


## Load the training input and dictionaries

In [4]:
X, y = load_training_input()
word_to_vector, words_to_index, index_to_words = load_dictionary()

MAX_TWEET_LENGTH = 30


## Split data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)


### Create an embedding layer with gloVe vectors

In [6]:
vocab_size = len(words_to_index) + 1
emb_dim = word_to_vector[list(words_to_index.keys())[0]].shape[0]

emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
    emb_matrix[index, :] = word_to_vector[word]

embedding_layer = Embedding(vocab_size, emb_dim, trainable=False)
embedding_layer.build((None,))
embedding_layer.set_weights([emb_matrix])


2022-02-27 19:14:36.073025: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-02-27 19:14:36.073214: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-27 19:14:36.074500: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


## Tweets Classifier Model

In [21]:
def tweetsModel():
    inputs = Input(MAX_TWEET_LENGTH, dtype=np.float32)
    embeddings = embedding_layer(inputs)

    X = Bidirectional(LSTM(64, return_sequences=True))(embeddings)
    X = Dropout(0.1)(X)

    X = Bidirectional(LSTM(64, return_sequences=True))(X)
    X = Dropout(0.1)(X)

    X = Bidirectional(LSTM(64, return_sequences=True))(X)
    X = Dropout(0.1)(X)

    X = LSTM(64)(X)
    X = Dropout(0.1)(X)

    X = Dense(32, activation='relu')(X)
    X = Dense(1, activation='sigmoid')(X)

    model = tf.keras.Model(inputs=inputs, outputs=X)
    return model


In [22]:
model = tweetsModel()


In [23]:
model.summary()


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 30)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 30, 50)            20000100  
_________________________________________________________________
bidirectional_3 (Bidirection (None, 30, 128)           58880     
_________________________________________________________________
dropout_5 (Dropout)          (None, 30, 128)           0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 30, 128)           98816     
_________________________________________________________________
dropout_6 (Dropout)          (None, 30, 128)           0         
_________________________________________________________________
bidirectional_5 (Bidirection (None, 30, 128)           9881

### Compile

In [26]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
              optimizer=tf.keras.optimizers.RMSprop(), 
              metrics=['accuracy'])
              

## Train

In [27]:
class myCallbacks(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if(logs.get('acc') > 0.6):
            print('60% training accuacy reached!')
            self.model.stop_training = True

callbacks = myCallbacks()

In [28]:
history = model.fit(X_train, y_train, 
                    batch_size=64, 
                    epochs=6)


Epoch 1/6

KeyboardInterrupt: 