# RNN model with gloVe embeddings

### inports

In [1]:
import sys
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense, Input

sys.path.append('../')
from utils import *


## Load the training input and dictionaries

In [2]:
X, y = load_training_input()
word_to_vector, words_to_index, index_to_words = load_dictionary()

MAX_TWEET_LENGTH = 50


In [3]:
y = y.astype(float)

## Split data

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

### Create an embedding layer with gloVe vectors

In [5]:
vocab_size = len(words_to_index) + 1
emb_dim = word_to_vector[list(words_to_index.keys())[0]].shape[0]

emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
    emb_matrix[index, :] = word_to_vector[word]

embedding_layer = Embedding(vocab_size, emb_dim, trainable=False)
embedding_layer.build((None,))
embedding_layer.set_weights([emb_matrix])


2022-02-17 23:13:59.076413: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-02-17 23:13:59.076637: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-17 23:13:59.078030: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


## Tweets Classifier Model

In [6]:
def tweetsModel():
    inputs = Input(MAX_TWEET_LENGTH, dtype=np.float32)
    embeddings = embedding_layer(inputs)

    X = LSTM(128, return_sequences=True)(embeddings)
    X = Dropout(0.5)(X)

    X = LSTM(128, return_sequences=True)(X)
    X = Dropout(0.5)(X)

    X = LSTM(128, return_sequences=False)(X)
    X = Dropout(0.5)(X)

    X = Dense(1, activation='softmax')(X)

    model = tf.keras.Model(inputs=inputs, outputs=X)
    return model


In [7]:
model = tweetsModel()

In [8]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 50)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 50, 100)           40000200  
_________________________________________________________________
lstm (LSTM)                  (None, 50, 128)           117248    
_________________________________________________________________
dropout (Dropout)            (None, 50, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 128)           131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 50, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584

### Compile

In [9]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 
              metrics=['accuracy'])

## Train

In [10]:
history = model.fit(X_train, y_train, batch_size=64, epochs=50)

2022-02-17 23:14:17.366143: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-02-17 23:14:17.383793: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2799925000 Hz


Epoch 1/50
Epoch 2/50
Epoch 3/50

KeyboardInterrupt: 