# RNN model with gloVe embeddings

### inports

In [63]:
import sys
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense, Input

sys.path.append('../')
from utils import *


## Load the training input and dictionaries

In [64]:
X, y = load_training_input()
word_to_vector, words_to_index, index_to_words = load_dictionary()

MAX_TWEET_LENGTH = 50


## Split data

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)


### Create an embedding layer with gloVe vectors

In [68]:
vocab_size = len(words_to_index) + 1
emb_dim = word_to_vector[list(words_to_index.keys())[0]].shape[0]

emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
    emb_matrix[index, :] = word_to_vector[word]

embedding_layer = Embedding(vocab_size, emb_dim, trainable=False)
embedding_layer.build((None,))
embedding_layer.set_weights([emb_matrix])


## Tweets Classifier Model

In [71]:
def tweetsModel():
    inputs = Input(MAX_TWEET_LENGTH, dtype=np.float32)
    embeddings = embedding_layer(inputs)

    X = LSTM(256, return_sequences=True)(embeddings)
    X = Dropout(0.5)(X)

    X = LSTM(256, return_sequences=False)(X)
    X = Dropout(0.5)(X)

    X = Dense(1, activation='softmax')(X)

    model = tf.keras.Model(inputs=inputs, outputs=X)
    return model


In [73]:
model = tweetsModel()


In [75]:
model.summary()


Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 50)]              0         
_________________________________________________________________
embedding_9 (Embedding)      (None, 50, 50)            20000100  
_________________________________________________________________
lstm_18 (LSTM)               (None, 50, 256)           314368    
_________________________________________________________________
dropout_14 (Dropout)         (None, 50, 256)           0         
_________________________________________________________________
lstm_19 (LSTM)               (None, 256)               525312    
_________________________________________________________________
dropout_15 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 257 

### Compile

In [76]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), 
              metrics=['accuracy'])
              

## Train

In [None]:
history = model.fit(X_train, y_train, batch_size=64, epochs=4)
