# <span style="color:#0b486b"> RNNs for sequence modeling and neural embedding</span>
***


## <span style="color:#0b486b">Set random seeds</span>

In [105]:
import numpy as np
import tensorflow as tf

tf.random.set_seed(6789)
np.random.seed(6789)

## <span style="color:#0b486b">Part 1: Download and preprocess the data</span>



The dataset we use for this assignment is a question classification dataset for which the train set consists of $5,500$ questions belonging to 6 coarse question categories including:
- abbreviation (ABBR), 
- entity (ENTY), 
- description (DESC), 
- human (HUM), 
- location (LOC) and 
- numeric (NUM).


Preprocessing data is an inital and important step in any machine learning or deep learning projects. The following *DataManager* class helps you to download data and preprocess data for the later steps of a deep learning project. 

In [106]:
import os
import zipfile
import collections
from six.moves import range
from six.moves.urllib.request import urlretrieve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
plt.style.use('ggplot')

class DataManager:
    def __init__(self, verbose=True, maxlen= 50, random_state=6789):
        self.verbose = verbose
        self.max_sentence_len = 0
        self.str_questions = list()
        self.str_labels = list()
        self.numeral_labels = list()
        self.maxlen = maxlen
        self.numeral_data = list()
        self.random_state = random_state
        self.random = np.random.RandomState(random_state)
        
    @staticmethod
    def maybe_download(dir_name, file_name, url, verbose= True):
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        if not os.path.exists(os.path.join(dir_name, file_name)):
            urlretrieve(url + file_name, os.path.join(dir_name, file_name))
        if verbose:
            print("Downloaded successfully {}".format(file_name))
    
    def read_data(self, dir_name, file_names):
        for file_name in file_names:
            file_path= os.path.join(dir_name, file_name)
            self.str_questions= list(); self.str_labels= list()
            with open(file_path, "r", encoding="latin-1") as f:
                for row in f:
                    row_str= row.split(":")
                    label, question= row_str[0], row_str[1]
                    question= question.lower()
                    self.str_labels.append(label)
                    self.str_questions.append(question[0:-1])
                    if self.max_sentence_len < len(self.str_questions[-1]):
                        self.max_sentence_len= len(self.str_questions[-1])
         
        # turns labels into numbers
        le= preprocessing.LabelEncoder()
        le.fit(self.str_labels)
        self.numeral_labels = np.array(le.transform(self.str_labels))
        self.str_classes= le.classes_
        self.num_classes= len(self.str_classes)
        if self.verbose:
            print("\nSample questions... \n")
            print(self.str_questions[0:5])
            print("Labels {}\n\n".format(self.str_classes))
    
    def manipulate_data(self):
        tokenizer = tf.keras.preprocessing.text.Tokenizer()
        tokenizer.fit_on_texts(self.str_questions)
        self.numeral_data = tokenizer.texts_to_sequences(self.str_questions)
        self.numeral_data = tf.keras.preprocessing.sequence.pad_sequences(self.numeral_data, padding='post', truncating= 'post', maxlen= self.maxlen)
        self.word2idx = tokenizer.word_index
        self.word2idx = {k:v for k,v in self.word2idx.items()}
        self.idx2word = {v:k for k,v in self.word2idx.items()}
        self.vocab_size = len(self.word2idx)
    
    def train_valid_split(self, train_ratio=0.9):
        idxs = np.random.permutation(np.arange(len(self.str_questions)))
        train_size = int(train_ratio*len(idxs)) +1
        self.train_str_questions, self.valid_str_questions = self.str_questions[0:train_size], self.str_questions[train_size:]
        self.train_numeral_data, self.valid_numeral_data = self.numeral_data[0:train_size], self.numeral_data[train_size:]
        self.train_numeral_labels, self.valid_numeral_labels = self.numeral_labels[0:train_size], self.numeral_labels[train_size:]
        self.tf_train_set = tf.data.Dataset.from_tensor_slices((self.train_numeral_data, self.train_numeral_labels))
        self.tf_valid_set = tf.data.Dataset.from_tensor_slices((self.valid_numeral_data, self.valid_numeral_labels))

In [107]:
print('Loading data...')

dm = DataManager(maxlen=100)
dm.read_data("Data/", ["train_set.label"])   # read data

Loading data...

Sample questions... 

['manner how did serfdom develop in and then leave russia ?', 'cremat what films featured the character popeye doyle ?', "manner how can i find a list of celebrities ' real names ?", 'animal what fowl grabs the spotlight after the chinese year of the monkey ?', 'exp what is the full form of .com ?']
Labels ['ABBR' 'DESC' 'ENTY' 'HUM' 'LOC' 'NUM']




In [108]:
dm.manipulate_data()
dm.train_valid_split(train_ratio=0.6)

You now have a data manager, named *dm* containing the training and validiation sets in both text and numeric forms. Your task is to play around and read this code to figure out the meanings of some important attributes that will be used in the next parts.

self.tf_train_set will contain all of the train data set as tensors, it will make sure that all of the objects have the same dimesion. It will also combine the vectors of the labels and questions.


In [None]:
for valid_numeral_data, valid_numeral_labels in dm.tf_train_set:
      print(valid_numeral_data, valid_numeral_labels)


<img src="images/5tensors.png">

self.tf_tvalid_set will contain all of the validation data set as tensors, it will make sure that all of the objects have the same dimesion. It will also combine the vectors of the labels and questions.



In [None]:
for valid_numeral_data, valid_numeral_labels in dm.tf_valid_set:
       print(valid_numeral_data, valid_numeral_labels)


<img src="images/5tensors2.png">

## <span style="color:#0b486b">Building the RNN </span>



 One-directional RNNs for sequence modeling and neural embedding </span> 

We'll test different RNN attributes in order to obtain the most accurate model.



**In this part, we'll construct an RNN to learn from the dataset of interest. Building a basic RNN with the following requirements:**
- Attribute `data_manager (self.data_manager)`: specifies the data manager used to store data for the model.
- Attribute `cell_type (self.cell_type)`: can receive three values including `basic_rnn`, `gru`, and `lstm` which specifies the memory cells formed a hidden layer.
- `state_sizes (self.state_sizes)` indicates the list of the hidden sizes from the second hidden layers of memory cells. For example, $embed\_size =128$ and $state\_sizes = [64, 64]$ means that you have three hidden layers in your network with hidden sizes of $128, 64$ and $64$ respectively.



In [95]:
class UniRNN:
    def __init__(self, cell_type= 'gru', embed_size= 128, state_sizes= [128, 64], data_manager= None):
        self.cell_type = cell_type
        self.state_sizes = state_sizes
        self.embed_size = embed_size
        self.data_manager = data_manager
        self.vocab_size = self.data_manager.vocab_size +1 
        
    #return the correspoding memory cell
    @staticmethod
    def get_layer(cell_type= 'gru', state_size= 128, return_sequences= False, activation = 'tanh'):
        if cell_type=='gru':
            return  tf.keras.layers.GRU(state_size, return_sequences=return_sequences,activation=activation)
        elif cell_type== 'lstm':
            return tf.keras.layers.LSTM(state_size, return_sequences=return_sequences,activation=activation)
        else:
            return tf.keras.layers.SimpleRNN(state_size, return_sequences=return_sequences,activation=activation)
    
    def build(self):
        x = tf.keras.layers.Input(shape=[None])
        h = tf.keras.layers.Embedding(self.vocab_size, self.embed_size,mask_zero=True, trainable= True)(x)
        num_layers = len(self.state_sizes)
        for i in range(num_layers):
            h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=True)(h)
        h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=False)(h)
        h = tf.keras.layers.Dense(dm.num_classes, activation='softmax')(h)
        self.model = tf.keras.Model(inputs=x, outputs=h)
   
    def compile_model(self, *args, **kwargs):
        self.model.compile(*args, **kwargs)
    
    def fit(self, *args, **kwargs):
        return self.model.fit(*args, **kwargs)
    
    def evaluate(self, *args, **kwargs):
        self.model.evaluate(*args, **kwargs)       



**Run with basic RNN ('basic_rnn') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [96]:
uni_rnn = UniRNN(cell_type="basic_rnn",embed_size=128,state_sizes=[128,128],data_manager=dm)
uni_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
uni_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
uni_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d062ee7188>


**GRU ('gru') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [98]:
uni_rnn = UniRNN(cell_type="gru",embed_size=128,state_sizes=[128,128],data_manager=dm)
uni_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
uni_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
uni_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d08a906288>


**LSTM ('lstm') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [99]:
uni_rnn = UniRNN(cell_type="lstm",embed_size=128,state_sizes=[128,128],data_manager=dm)
uni_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
uni_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
uni_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d0a2dd7a48>

From training our model we got excelent results, the lowest accuracy is well above 95%.The accuracy gotten from the model is with the GRU with 96.93%. It is roughly 0.5% more accurate than our simple Rnn which give us great results but having such high accuracy any percetage increase is a huge gain.

As GRU vs LSTM, there is no saying on which one is better as it is adviced to try both to see which one yields more accurate restults. However, GRU is a clear winner on this one. Lets see how they perfom on the next model.


### Bi-directional RNNs for sequence modeling and neural embedding </span> ###





**Building a Bi=directional RNN*.**



In [112]:
class BiRNN:
    def __init__(self, cell_type= 'gru', embed_size= 128, state_sizes= [128, 64], data_manager= None):
        self.cell_type = cell_type
        self.state_sizes = state_sizes
        self.embed_size = embed_size
        self.data_manager = data_manager
        self.vocab_size = self.data_manager.vocab_size +1
        
    @staticmethod
    def get_layer(cell_type= 'gru', state_size= 128, return_sequences= False, activation = 'tanh'):
        if cell_type=='gru':
            return  tf.keras.layers.Bidirectional(tf.keras.layers.GRU(state_size, return_sequences=return_sequences,activation=activation))
        elif cell_type== 'lstm':
            return  tf.keras.layers.Bidirectional((tf.keras.layers.LSTM(state_size, return_sequences=return_sequences,activation=activation)))
        else:
            return tf.keras.layers.Bidirectional(state_size, return_sequences=return_sequences,activation=activation)
    
    def build(self):
        x = tf.keras.layers.Input(shape=[None])
        h = tf.keras.layers.Embedding(self.vocab_size, self.embed_size,mask_zero=True, trainable= True)(x)
        num_layers = len(self.state_sizes)
        for i in range(num_layers):
            h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=True)(h)
        h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=False)(h)
        h = tf.keras.layers.Dense(dm.num_classes, activation='softmax')(h)
        self.model = tf.keras.Model(inputs=x, outputs=h)
        
    
    def compile_model(self, *args, **kwargs):
        self.model.compile(*args, **kwargs)
    
    def fit(self, *args, **kwargs):
        return self.model.fit(*args, **kwargs)
    
    def evaluate(self, *args, **kwargs):
        self.model.evaluate(*args, **kwargs)       



**BiRNN for basic RNN ('basic_rnn') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [None]:
bi_rnn =  BiRNN(cell_type="basic_rnn",embed_size=128,state_sizes=[128,128],data_manager=dm)
bi_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
bi_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
bi_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

<img src="Images/1.BiRNN-Simple.png">


**Running BiRNN for GRU ('gru') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [113]:
bi_rnn = BiRNN(cell_type="gru",embed_size=128,state_sizes=[128,128],data_manager=dm)
bi_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
bi_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
bi_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d0dfb58b08>


**BiRNN for LSTM ('lstm') cell with $embed\_size= 128, state\_sizes= [128, 128], data\_manager= dm$.**



In [105]:
bi_rnn = BiRNN(cell_type="lstm",embed_size=128,state_sizes=[128,128],data_manager=dm)
bi_rnn.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
bi_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
bi_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d0c0c34248>

Same case as before GRU is the clear winner. In this case it got 97.75%, it has been the highest accuracy we've had so far.

It's important to notice that bidirectional RNN consumes alot more resources than unidirectional, this is because the information can go forwards or backwards. As it can be show, each epoch in unidirectional takes around 2 mins while bi directional takes around 6 mins. The pictures that appear to have a faster epoch is because google colab was used to speed up the process. At the end, using Bidirectional RNN gave us a 1% improvement, it's not much but when we have high accuracies close to 100%, 1% shows a huge improvement!

### Testing RNNs with various types, cells, and fine-tuning embedding matrix for sequence modeling and neural embedding </span> ###





**We'll combine the RNN's made before to determine which model gets the highest accuracy.**

**Below are the descriptions of the attributes of the class *RNN*:**
- `run_mode (self.run_mode)` has three values (scratch, init-only, and init-fine-tune).
  - `scratch` means training the embedding matrix from scratch.
  - `init-only` means only initialzing the embedding matrix with a pretrained Word2Vect but not further doing fine-tuning that matrix.
  - `init-fine-tune` means both initialzing the embedding matrix with a pretrained Word2Vect and further doing fine-tuning that matrix.
- `network_type (self.network_type)` has two values (uni-directional and bi-directional) which correspond to either Uni-directional RNN or Bi-directional RNN.
- `cell_type (self.cell_type)` has three values (simple-rnn, gru, and lstm) which specify the memory cell used in the network.
- `embed_model (self.embed_model)` specifes the pretrained Word2Vect model used.
-  `embed_size (self.embed_size)` specifes the embedding size. Note that when run_mode is either init-only' or 'init-fine-tune', this embedding size is extracted from embed_model for dimension compatability.
- `state_sizes (self.state_sizes)` indicates the list of the hidden sizes from the second hidden layers of memory cells. For example, $embed\_size =128$ and $state\_sizes = [64, 64]$ means that you have three hidden layers in your network with hidden sizes of $128, 64$ and $64$ respectively.





In [109]:
import gensim.downloader as api
class RNN:
    def __init__(self, run_mode = 'scratch', cell_type= 'gru', network_type = 'uni-directional', embed_model= 'glove-wiki-gigaword-100', 
                 embed_size= 128, state_sizes = [64, 64], data_manager = None):
        self.run_mode = run_mode
        self.data_manager = data_manager
        self.cell_type = cell_type
        self.network_type = network_type
        self.state_sizes = state_sizes
        self.embed_model = embed_model
        self.embed_size = embed_size
        if self.run_mode != 'scratch':
            self.embed_size = int(self.embed_model.split("-")[-1])
        self.data_manager = data_manager
        self.vocab_size = dm.vocab_size +1
        self.word2idx = dm.word2idx
        self.word2vect = None
        self.embed_matrix = np.zeros(shape= [self.vocab_size, self.embed_size])
    
    def build_embedding_matrix(self):
        if os.path.exists("E.npy"):  
            self.embed_matrix = np.load("E.npy")          
        else: 
            self.word2vect = api.load(self.embed_model)   
            for word, idx in self.word2idx.items():
                try:
                    self.embed_matrix[idx] = self.word2vect.word_vec(word)    
                except KeyError: 
                    pass
            np.save("E.npy", self.embed_matrix)
    
    @staticmethod
    def get_layer(cell_type= 'gru', network_type= 'uni-directional', state_size= 128, return_sequences= False, activation = 'tanh'):
        if network_type== "uni-directional":
            if cell_type=='gru':
                return  tf.keras.layers.GRU(state_size,return_sequences=return_sequences,activation=activation)
            elif cell_type== 'lstm':
                return tf.keras.layers.LSTM(state_size, return_sequences=return_sequences,activation=activation)
            else:
                return tf.keras.layers.SimpleRNN(state_size, return_sequences=return_sequences,activation=activation)
        if network_type== "bi-directional":
            if cell_type=='gru':
                return  tf.keras.layers.Bidirectional(tf.keras.layers.GRU(state_size, return_sequences=return_sequences,activation=activation))
            elif cell_type== 'lstm':
                return  tf.keras.layers.Bidirectional((tf.keras.layers.LSTM(state_size, return_sequences=return_sequences,activation=activation)))
            else:
                return tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(state_size, return_sequences=return_sequences,activation=activation))
        
    
    def build(self):
        inputs = tf.keras.layers.Input(shape=[None])
        if self.run_mode == "scratch":
            self.embedding_layer = tf.keras.layers.Embedding(self.vocab_size, self.embed_size, mask_zero= True, trainable= True)
        else: #fine-tuned
            self.build_embedding_matrix()
            self.embedding_layer = tf.keras.layers.Embedding(self.vocab_size, self.embed_size,mask_zero= True, weights= [self.embed_matrix], trainable= True)
        num_layers = len(self.state_sizes)
        h=tf.keras.layers.Embedding(self.vocab_size, self.embed_size,mask_zero=True, trainable= True)(inputs)
        h = self.embedding_layer(inputs)
        for i in range(num_layers):
            h = self.get_layer(self.cell_type,self.network_type, self.state_sizes[i], return_sequences=True)(h)
        h = self.get_layer(self.cell_type,self.network_type, self.state_sizes[i], return_sequences=False)(h)
        #h = tf.keras.layers.GRU(256, return_sequences=True)(h)
        #h = tf.keras.layers.GRU(128)(h)
        h = tf.keras.layers.Dense(dm.num_classes, activation='softmax')(h)
        self.model = tf.keras.Model(inputs=inputs, outputs=h)
        
    def compile_model(self, *args, **kwargs):
        self.model.compile(*args, **kwargs)
    
    def fit(self, *args, **kwargs):
        return self.model.fit(*args, **kwargs)
    
    def evaluate(self, *args, **kwargs):
        self.model.evaluate(*args, **kwargs)       




**Now we'll compare the 3 models**



In [49]:
rnn2 = RNN(run_mode="scratch", network_type="uni-directional",cell_type="gru",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn2.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn2.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn2.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1332cca7508>

In [54]:
rnn2 = RNN(run_mode="init-fine-tune", network_type="uni-directional",cell_type="gru",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn2.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn2.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn2.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

Train for 52 steps, validate for 35 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x133899e22c8>

In [59]:
rnn2.evaluate(dm.tf_valid_set.batch(64))





**WWe'll run the RNN model changing the 5 parameters**



This are the codes of the models used, it was run in google colab to speed up the process, images are below:

In [None]:
rnn2 = RNN(run_mode="init-fine-tune", network_type="uni-directional",cell_type="lstm",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn2.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn2.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn2.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

In [None]:
rnn3 = RNN(run_mode="init-fine-tune", network_type="bi-directional",cell_type="gru",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn3.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn3.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn3.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

In [None]:
rnn4 = RNN(run_mode="init-fine-tune", network_type="bi-directional",cell_type="lstm",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn4.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn4.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn4.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

In [None]:
rnn5 = RNN(run_mode="init-fine-tune", network_type="bi-directional",cell_type="SimpleRNN",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn5.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn5.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn5.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

In [None]:
rnn6 = RNN(run_mode="init-fine-tune", network_type="uni-directional",cell_type="SimpleRNN",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn6.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn6.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn6.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

#Report your results here

Model 1 (run_mode ='init-fine-tune',...): accuracy =
<img src="Images/RNN1.png">
<img src="Images/RNN2.png">
<img src="Images/RNN3.png">
<img src="Images/RNN4.png">
<img src="Images/RNN5.png">

In [None]:
rnn3 = RNN(run_mode="init-fine-tune", network_type="bi-directional",cell_type="gru",data_manager=dm,embed_size=128,state_sizes=[128,128])
rnn3.build()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
rnn3.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn3.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))

The best model obtaines was a bidirection RNN with GRU and a learning rate of 0.001. This yielded an accuracy of 98.74%