In [None]:
import nltk
import keras
import gensim
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
# download POS data

nltk.download('brown')
nltk.download('treebank')
nltk.download('conll2000')
nltk.download('punkt')
nltk.download('universal_tagset')

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package treebank to /root/nltk_data...
[nltk_data]   Unzipping corpora/treebank.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.


True

## Data preprocessing

In [None]:
from nltk.corpus import brown, treebank, conll2000

In [None]:
treebank_corpus = treebank.tagged_sents(tagset='universal')
brown_corpus = brown.tagged_sents(tagset='universal')
conll_corpus = conll2000.tagged_sents(tagset='universal')

In [None]:
tagged_sentences = treebank_corpus + brown_corpus + conll_corpus
tagged_sentences[0]

[('Pierre', 'NOUN'),
 ('Vinken', 'NOUN'),
 (',', '.'),
 ('61', 'NUM'),
 ('years', 'NOUN'),
 ('old', 'ADJ'),
 (',', '.'),
 ('will', 'VERB'),
 ('join', 'VERB'),
 ('the', 'DET'),
 ('board', 'NOUN'),
 ('as', 'ADP'),
 ('a', 'DET'),
 ('nonexecutive', 'ADJ'),
 ('director', 'NOUN'),
 ('Nov.', 'NOUN'),
 ('29', 'NUM'),
 ('.', '.')]

In [None]:
# create X and y

X = []
y = []

for sentence in tagged_sentences:
  X_sentence = []
  y_sentence = []
  for entity in sentence:
    X_sentence.append(entity[0])
    y_sentence.append(entity[1])
  X.append(X_sentence)
  y.append(y_sentence)

data = pd.DataFrame({
    'Sentence': X,
    'Tags': y
})

In [None]:
data.head()

Unnamed: 0,Sentence,Tags
0,"[Pierre, Vinken, ,, 61, years, old, ,, will, j...","[NOUN, NOUN, ., NUM, NOUN, ADJ, ., VERB, VERB,..."
1,"[Mr., Vinken, is, chairman, of, Elsevier, N.V....","[NOUN, NOUN, VERB, NOUN, ADP, NOUN, NOUN, ., D..."
2,"[Rudolph, Agnew, ,, 55, years, old, and, forme...","[NOUN, NOUN, ., NUM, NOUN, ADJ, CONJ, ADJ, NOU..."
3,"[A, form, of, asbestos, once, used, *, *, to, ...","[DET, NOUN, ADP, NOUN, ADV, VERB, X, X, PRT, V..."
4,"[The, asbestos, fiber, ,, crocidolite, ,, is, ...","[DET, NOUN, NOUN, ., NOUN, ., VERB, ADV, ADJ, ..."


In [None]:
# In this many-to-many problem, the length of each input and output sequence must be the same.
# Since each word is tagged, it's important to make sure that the length of input sequence equals the output sequence
print("Length of first input sequence  : {}".format(len(X[0])))
print("Length of first output sequence : {}".format(len(y[0])))

Length of first input sequence  : 18
Length of first output sequence : 18


## Vectorise X and Y

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [None]:
# encode X

word_tokenizer = Tokenizer()
word_tokenizer.fit_on_texts(X)
X_encoded = word_tokenizer.texts_to_sequences(X)

In [None]:
# encode Y

tag_tokenizer = Tokenizer()
tag_tokenizer.fit_on_texts(y)
Y_encoded = tag_tokenizer.texts_to_sequences(y)

In [None]:
# look at first encoded data point

print("** Raw data point **", "\n", "-"*75, "\n")
print('X: ', X[0], '\n')
print('Y: ', y[0], '\n')
print()
print("** Encoded data point **", "\n", "-"*75, "\n")
print('X: ', X_encoded[0], '\n')
print('Y: ', Y_encoded[0], '\n')

** Raw data point ** 
 --------------------------------------------------------------------------- 

X:  ['Pierre', 'Vinken', ',', '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'nonexecutive', 'director', 'Nov.', '29', '.'] 

Y:  ['NOUN', 'NOUN', '.', 'NUM', 'NOUN', 'ADJ', '.', 'VERB', 'VERB', 'DET', 'NOUN', 'ADP', 'DET', 'ADJ', 'NOUN', 'NOUN', 'NUM', '.'] 


** Encoded data point ** 
 --------------------------------------------------------------------------- 

X:  [6423, 24231, 2, 7652, 102, 170, 2, 47, 1898, 1, 269, 17, 7, 13230, 619, 1711, 2761, 3] 

Y:  [1, 1, 3, 11, 1, 6, 3, 2, 2, 5, 1, 4, 5, 6, 1, 1, 11, 3] 



In [None]:
# make sure that each sequence of input and output is same length

different_length = [1 if len(input) != len(output) else 0 for input, output in zip(X_encoded, Y_encoded)]
print("{} sentences have disparate input-output lengths.".format(sum(different_length)))

0 sentences have disparate input-output lengths.


## Pad sequences

The next step after encoding the data is to define the sequence lengths. As of now, the sentences present in the data are of various lengths. We need to either pad short sentences or truncate long sentences to a fixed length. This fixed length, however, is a hyperparameter.

In [None]:
# get maximum length of sequence

seq_len = [len(seq) for seq in X_encoded]
print('Max sequence length:', max(seq_len))

Max sequence length: 271


In [None]:
# Pad each sequence to MAX_SEQ_LENGTH using KERAS' pad_sequences() function.
# Sentences longer than MAX_SEQ_LENGTH are truncated.
# Sentences shorter than MAX_SEQ_LENGTH are padded with zeroes.

# Truncation and padding can either be 'pre' or 'post'.
# For padding we are using 'pre' padding type, that is, add zeroes on the left side.
# For truncation, we are using 'post', that is, truncate a sentence from right side.

from tensorflow.keras.preprocessing.sequence import pad_sequences

MAX_SEQ_LENGTH = 100  # sequences greater than 100 in length will be truncated

X_padded = pad_sequences(X_encoded, maxlen=MAX_SEQ_LENGTH, padding="pre", truncating="post")
Y_padded = pad_sequences(Y_encoded, maxlen=MAX_SEQ_LENGTH, padding="pre", truncating="post")

In [None]:
# print the first sequence
print(X_padded[0], "\n"*3)
print(Y_padded[0])

[    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0  6423 24231
     2  7652   102   170     2    47  1898     1   269    17     7 13230
   619  1711  2761     3] 



[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  1  1  3 11  1  6  3  2  2  5  1  4  5  6
  1  1 11  3]


In [None]:
# assign padded sequences to X and Y
X, Y = X_padded, Y_padded

## Word embeddings

Currently, each word and each tag is encoded as an integer.

We'll use a more sophisticated technique to represent the input words (X) using what's known as word embeddings.

However, to represent each tag in Y, we'll simply use one-hot encoding scheme since there are only 13 tags in the dataset and the LSTM will have no problems in learning its own representation of these tags.

To use word embeddings, you can go for either of the following models:

word2vec model: https://code.google.com/archive/p/word2vec/

GloVe model : https://nlp.stanford.edu/projects/glove/

We're using the word2vec model for no particular reason. Both of these are very efficient in representing words. You can try both and see which one works better.

Dimensions of a word embedding is: (VOCABULARY_SIZE, EMBEDDING_DIMENSION)

In [None]:
# import word embeddings

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/GoogleNews-vectors-negative300.bin.gz'

In [None]:
from gensim.models import KeyedVectors

# load word2vec using the following function present in the gensim library
word2vec = KeyedVectors.load_word2vec_format(path, binary=True)

In [None]:
# word2vec effectiveness
word2vec.most_similar(positive = ["King", "Woman"], negative = ["Man"])

[('Queen', 0.4929387867450714),
 ('Tupou_V.', 0.45174285769462585),
 ('Oprah_BFF_Gayle', 0.4422132968902588),
 ('Jackson', 0.440250426530838),
 ('NECN_Alison', 0.4331282675266266),
 ('Whitfield', 0.42834725975990295),
 ('Ida_Vandross', 0.42084527015686035),
 ('prosecutor_Dan_Satterberg', 0.420758992433548),
 ('martin_Luther_King', 0.42059651017189026),
 ('Coretta_King', 0.4202733635902405)]

In [None]:
# assign word vectors from word2vec model

EMBEDDING_SIZE  = 300  # each word in word2vec model is represented using a 300 dimensional vector
VOCABULARY_SIZE = len(word_tokenizer.word_index) + 1

# create an empty embedding matix
embedding_weights = np.zeros((VOCABULARY_SIZE, EMBEDDING_SIZE))

# create a word to index dictionary mapping
word2id = word_tokenizer.word_index

# copy vectors from word2vec model to the words present in corpus
for word, index in word2id.items():
    try:
        embedding_weights[index, :] = word2vec[word]
    except KeyError:
        pass

In [None]:
# check embedding dimension
print("Embeddings shape: {}".format(embedding_weights.shape))

Embeddings shape: (59449, 300)


In [None]:
# let's look at an embedding of a word
embedding_weights[word_tokenizer.word_index['joy']]

array([ 0.4453125 , -0.20019531,  0.20019531, -0.03149414,  0.078125  ,
       -0.390625  ,  0.13671875, -0.13867188,  0.05395508,  0.10546875,
       -0.05029297, -0.23730469,  0.19921875,  0.12597656, -0.12695312,
        0.34179688,  0.06347656,  0.26757812, -0.07324219, -0.29101562,
        0.10498047,  0.11914062,  0.23730469,  0.00640869,  0.12451172,
       -0.00939941, -0.02770996,  0.03076172,  0.07421875, -0.22851562,
       -0.08056641, -0.05273438,  0.16894531,  0.19824219, -0.15625   ,
       -0.08740234,  0.10742188, -0.07177734,  0.05200195,  0.25976562,
        0.171875  , -0.13574219,  0.06738281,  0.00531006,  0.15527344,
       -0.03515625,  0.08789062,  0.3359375 , -0.12890625,  0.17578125,
       -0.08642578,  0.32421875, -0.09033203,  0.35351562,  0.24316406,
       -0.07470703, -0.06640625, -0.17578125,  0.06689453, -0.03833008,
        0.0100708 , -0.21484375, -0.03686523,  0.04394531,  0.02209473,
        0.00219727, -0.22460938,  0.03015137, -0.21582031,  0.16

## Use one-hot encoding for output sequences (Y)

In [None]:
from tensorflow.keras.utils import to_categorical

# use Keras' to_categorical function to one-hot encode Y
Y = to_categorical(Y)

print(Y.shape)

(72202, 100, 13)


## Split data in training, validation and tesing sets¶

In [None]:
import sklearn
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=4)

In [None]:
X_train, X_validation, Y_train, Y_validation = train_test_split(X_train, Y_train, test_size=0.15, random_state=4)

In [None]:
# print number of samples in each set
print("TRAINING DATA")
print('Shape of input sequences: {}'.format(X_train.shape))
print('Shape of output sequences: {}'.format(Y_train.shape))
print("-"*50)
print("VALIDATION DATA")
print('Shape of input sequences: {}'.format(X_validation.shape))
print('Shape of output sequences: {}'.format(Y_validation.shape))
print("-"*50)
print("TESTING DATA")
print('Shape of input sequences: {}'.format(X_test.shape))
print('Shape of output sequences: {}'.format(Y_test.shape))

TRAINING DATA
Shape of input sequences: (52165, 100)
Shape of output sequences: (52165, 100, 13)
--------------------------------------------------
VALIDATION DATA
Shape of input sequences: (9206, 100)
Shape of output sequences: (9206, 100, 13)
--------------------------------------------------
TESTING DATA
Shape of input sequences: (10831, 100)
Shape of output sequences: (10831, 100, 13)


Before using RNN, we must make sure the dimensions of the data are what an RNN expects. In general, an RNN expects the following shape

Shape of X: (#samples, #timesteps, #features)

Shape of Y: (#samples, #timesteps, #features)

## Vanila RNN

### Arbitrarily initialised, untrainable embeddings

First let's try running a vanilla RNN. For this RNN we won't use the pre-trained word embeddings. We'll use randomly inititalised embeddings. Moreover, we won't update the embeddings weights.

In [None]:
# total number of tags
NUM_CLASSES = Y.shape[2]

In [None]:
from tensorflow.keras.layers import SimpleRNN, TimeDistributed, Dense, Embedding
from tensorflow.keras.models import Sequential

# create architecture

rnn_model = Sequential()

# create embedding layer - usually the first layer in text problems
rnn_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        trainable     =  False                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
rnn_model.add(SimpleRNN(64,
              return_sequences=True  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
rnn_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
rnn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 300)          17834700  
                                                                 
 simple_rnn (SimpleRNN)      (None, 100, 64)           23360     
                                                                 
 time_distributed (TimeDist  (None, 100, 13)           845       
 ributed)                                                        
                                                                 
Total params: 17858905 (68.13 MB)
Trainable params: 24205 (94.55 KB)
Non-trainable params: 17834700 (68.03 MB)
_________________________________________________________________


In [None]:
rnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
rnn_training = rnn_model.fit(X_train, Y_train, batch_size=128, epochs=10, validation_data=(X_validation, Y_validation))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Arbitrarily initialised, trainable embeddings

In [None]:
# create architecture

rnn_model = Sequential()

# create embedding layer - usually the first layer in text problems
rnn_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        trainable     =  True                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
rnn_model.add(SimpleRNN(64,
              return_sequences=True  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
rnn_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
rnn_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 100, 300)          17834700  
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 100, 64)           23360     
                                                                 
 time_distributed_2 (TimeDi  (None, 100, 13)           845       
 stributed)                                                      
                                                                 
Total params: 17858905 (68.13 MB)
Trainable params: 17858905 (68.13 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
rnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
rnn_training = rnn_model.fit(X_train, Y_train, batch_size=128, epochs=3, validation_data=(X_validation, Y_validation))

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Trainable word2vec embeddings

In [None]:
# create architecture

rnn_model = Sequential()

# create embedding layer - usually the first layer in text problems
rnn_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        weights       =  [embedding_weights],     # word embedding matrix
                        trainable     =  True                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
rnn_model.add(SimpleRNN(64,
              return_sequences=True  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
rnn_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
rnn_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 100, 300)          17834700  
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 100, 64)           23360     
                                                                 
 time_distributed_3 (TimeDi  (None, 100, 13)           845       
 stributed)                                                      
                                                                 
Total params: 17858905 (68.13 MB)
Trainable params: 17858905 (68.13 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
rnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
rnn_training = rnn_model.fit(X_train, Y_train, batch_size=128, epochs=5, validation_data=(X_validation, Y_validation))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## LSTM model

In [None]:
from tensorflow.keras.layers import LSTM

# create architecture

lstm_model = Sequential()

# create embedding layer - usually the first layer in text problems
lstm_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        weights       =  [embedding_weights],     # word embedding matrix
                        trainable     =  True                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
lstm_model.add(LSTM(64,
              return_sequences=True  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
lstm_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
lstm_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 300)          17834700  
                                                                 
 lstm (LSTM)                 (None, 100, 64)           93440     
                                                                 
 time_distributed_4 (TimeDi  (None, 100, 13)           845       
 stributed)                                                      
                                                                 
Total params: 17928985 (68.39 MB)
Trainable params: 17928985 (68.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
lstm_model.fit(X_train, Y_train, batch_size=128, epochs=5, validation_data=(X_validation, Y_validation))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7d44dc1ae680>

## GRU model

In [None]:
from tensorflow.keras.layers import GRU

# create architecture

gru_model = Sequential()

# create embedding layer - usually the first layer in text problems
gru_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        weights       =  [embedding_weights],     # word embedding matrix
                        trainable     =  True                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
gru_model.add(GRU(64,
              return_sequences=True  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
gru_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
gru_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 100, 300)          17834700  
                                                                 
 gru_1 (GRU)                 (None, 100, 64)           70272     
                                                                 
 time_distributed_7 (TimeDi  (None, 100, 13)           845       
 stributed)                                                      
                                                                 
Total params: 17905817 (68.31 MB)
Trainable params: 17905817 (68.31 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
gru_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
gru_model.fit(X_train, Y_train, batch_size=128, epochs=5, validation_data=(X_validation, Y_validation))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7d4740761b70>

## Bidirectional model

In [None]:
from tensorflow.keras.layers import Bidirectional

# create architecture

bidirectional_model = Sequential()

# create embedding layer - usually the first layer in text problems
bidirectional_model.add(Embedding(input_dim     =  VOCABULARY_SIZE,         # vocabulary size - number of unique words in data
                        output_dim    =  EMBEDDING_SIZE,          # length of vector with which each word is represented
                        input_length  =  MAX_SEQ_LENGTH,          # length of input sequence
                        weights       =  [embedding_weights],     # word embedding matrix
                        trainable     =  True                    # False - don't update the embeddings
))

# add an RNN layer which contains 64 RNN cells
bidirectional_model.add(Bidirectional(LSTM(64,
              return_sequences=True)  # True - return whole sequence; False - return single output of the end of the sequence
))

# add time distributed (output at each sequence) layer
bidirectional_model.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))

In [None]:
bidirectional_model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 100, 300)          17834700  
                                                                 
 bidirectional (Bidirection  (None, 100, 128)          186880    
 al)                                                             
                                                                 
 time_distributed_8 (TimeDi  (None, 100, 13)           1677      
 stributed)                                                      
                                                                 
Total params: 18023257 (68.75 MB)
Trainable params: 18023257 (68.75 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
bidirectional_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
bidirectional_model.fit(X_train, Y_train, batch_size=128, epochs=5, validation_data=(X_validation, Y_validation))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7d44fc3f25c0>

## Model evaluation

In [None]:
loss, accuracy = rnn_model.evaluate(X_test, Y_test, verbose = 1)
print("Loss: {0},\nAccuracy: {1}".format(loss, accuracy))

Loss: 0.025305528193712234,
Accuracy: 0.9910275936126709


In [None]:
loss, accuracy = lstm_model.evaluate(X_test, Y_test, verbose = 1)
print("Loss: {0},\nAccuracy: {1}".format(loss, accuracy))

Loss: 0.02409767173230648,
Accuracy: 0.991347074508667


In [None]:
loss, accuracy = gru_model.evaluate(X_test, Y_test, verbose = 1)
print("Loss: {0},\nAccuracy: {1}".format(loss, accuracy))

Loss: 0.023546695709228516,
Accuracy: 0.991503119468689


In [None]:
loss, accuracy = bidirectional_model.evaluate(X_test, Y_test, verbose = 1)
print("Loss: {0},\nAccuracy: {1}".format(loss, accuracy))

Loss: 0.018170785158872604,
Accuracy: 0.993961751461029
