# LSTM on Kaggle's Quora Question Pairs

In [0]:
!pip install numpy
!pip install pandas
!pip install spacy
!pip install nltk
!pip install torch
!pip install spacy



First, lets import all the necessary packages

In [0]:
import numpy as np 
import pandas as pd 
import os
import spacy
import string
import re
import numpy as np

from nltk.corpus import stopwords

import spacy
from spacy.symbols import ORTH
from collections import Counter

from sklearn.model_selection import train_test_split

import torch
import torch.cuda
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### Global variables

In [0]:
from pathlib import Path
PATH = Path("/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data")

In [0]:
# File paths
TRAIN_CSV = PATH/'train.csv'
TEST_CSV = PATH/'test.csv'
EMBEDDING_FILE = PATH/'GoogleNews-vectors-negative300.bin.gz'

## Create embedding matrix

In [0]:
# Load training and test set
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

In [0]:
import nltk
nltk.download('stopwords')
stops = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [0]:
def text_to_word_list(text):
    ''' Pre process and convert texts to a list of words '''
    text = str(text)
    text = text.lower()

    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ! ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ^ ", text)
    text = re.sub(r"\+", " + ", text)
    text = re.sub(r"\-", " - ", text)
    text = re.sub(r"\=", " = ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r"(\d+)(k)", r"\g<1>000", text)
    text = re.sub(r":", " : ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    text = re.sub(r"\0s", "0", text)
    text = re.sub(r" 9 11 ", "911", text)
    text = re.sub(r"e - mail", "email", text)
    text = re.sub(r"j k", "jk", text)
    text = re.sub(r"\s{2,}", " ", text)

    text = text.split()

    return text

In [0]:
# Prepare embedding
vocabulary = dict()
inverse_vocabulary = ['<unk>']  # '<unk>' will never be used, it is only a placeholder for the [0, 0, ....0] embedding


In [0]:
word2vec = KeyedVectors.load_word2vec_format(EMBEDDING_FILE, binary=True)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [0]:
questions_cols = ['question1', 'question2']

# Iterate over the questions only of both training and test datasets
for dataset in [train_df, test_df]:
    for index, row in dataset.iterrows():

        # Iterate through the text of both questions of the row
        for question in questions_cols:

            q2n = []  # q2n -> question numbers representation
            for word in text_to_word_list(row[question]):

                # Check for unwanted words
                if word in stops and word not in word2vec.vocab:
                    continue

                if word not in vocabulary:
                    vocabulary[word] = len(inverse_vocabulary)
                    q2n.append(len(inverse_vocabulary))
                    inverse_vocabulary.append(word)
                else:
                    q2n.append(vocabulary[word])

            # Replace questions as word to question as number representation
            dataset.at[index, question] = q2n

In [0]:
embedding_dim = 300
embeddings = 1 * np.random.randn(len(vocabulary) + 1, embedding_dim)  # This will be the embedding matrix
embeddings[0] = 0  # So that the padding will be ignored

In [0]:
len(vocabulary)

121319

In [0]:
# Build the embedding matrix
for word, index in vocabulary.items():
    if word in word2vec.vocab:
        embeddings[index] = word2vec.word_vec(word)

#del word2vec

In [0]:
len(embeddings), embeddings.size

(121320, 36396000)

In [0]:
embeddings[0].size

300

In [0]:
#saving embeddings to drive to save time
np.savetxt(PATH/"embeddings.csv", embeddings, delimiter=",")

In [0]:
#load embeddings from drive
from numpy import genfromtxt
embeddings = genfromtxt(PATH/'embeddings.csv', delimiter=',')

In [0]:
# Save the embeddings to file
train_df.to_pickle(PATH/"train_df.pkl")
test_df.to_pickle(PATH/"test_df.pkl")

In [0]:
train_df

Unnamed: 0,id,qid1,qid2,question1,question2,is_duplicate
0,0,1,2,"[1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10, 8, 11]","[1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10]",0
1,1,3,4,"[1, 2, 3, 12, 13, 14, 15, 16, 15, 17, 18]","[1, 19, 20, 21, 3, 22, 23, 24, 3, 13, 14, 15, ...",0
2,2,5,6,"[26, 27, 16, 28, 3, 29, 30, 31, 32, 33, 34, 35]","[26, 27, 31, 29, 36, 37, 5, 38, 39, 40]",0
3,3,7,8,"[41, 42, 16, 43, 44, 45, 26, 27, 16, 46, 47]","[48, 3, 49, 50, 51, 52, 53, 54, 51, 2, 55, 5, ...",0
4,4,9,10,"[56, 57, 58, 8, 59, 60, 61, 62, 63, 64, 65, 66]","[56, 67, 19, 68, 8, 62, 59]",0
...,...,...,...,...,...,...
404285,404285,433578,379845,"[26, 184, 3632, 115, 307, 8, 3, 24591, 522, 52...","[26, 184, 3632, 115, 307, 8, 12037, 522, 523, ...",0
404286,404286,18840,155606,"[97, 99, 2441, 307, 2, 598, 180, 1822]","[2, 47, 467, 77, 307, 2, 598, 180, 1822]",1
404287,404287,537928,537929,"[1, 2, 57, 11017]","[1, 2, 83, 11017]",0
404288,404288,537930,537931,"[1, 2, 3, 21210, 12592, 534, 2769, 33, 3114, 8...","[16, 42, 1086, 2877, 2854, 2622, 1220, 16, 173...",0


In [0]:
train_df = pd.read_pickle(PATH/"train_df.pkl")
test_df = pd.read_pickle(PATH/"test_df.pkl")

In [0]:
test_df.shape

(3563475, 3)

In [0]:
# Split test data to smaller chunks to run the model to predict output
def splitDataFrameIntoSmaller(df, chunkSize = 10000): 
    listOfDf = list()
    numberChunks = len(df) // chunkSize + 1
    for i in range(numberChunks):
        listOfDf.append(df[i*chunkSize:(i+1)*chunkSize])
    return listOfDf

In [0]:
chunks = splitDataFrameIntoSmaller(test_df)

In [0]:
C = len(chunks)
C

357

In [0]:
chunks[0].shape, chunks[7].shape

((10000, 3), (10000, 3))

In [0]:
# Save test data chunks to file
def saveChunks2file(chunks):
  for i,chunk in enumerate(chunks):
    filename = "output"+str(i)+".pkl"
    chunk.to_pickle(PATH/filename)

In [0]:
saveChunks2file(chunks)

## Prepare training and validation data

In [0]:
# spacy_tok takes a while. Run it just once
def encode_sentence(q1, q2 , N=50, padding_start=False):
    #x = spacy_tok(path.read_text())
    x1 = np.zeros(N, dtype=np.int32)
    x2 = np.zeros(N, dtype=np.int32)
    #enc1 = np.array([vocab2index.get(w, vocab2index["UNK"]) for w in x])
    l1 = min(N, len(q1))
    l2 = min(N, len(q2))
    if padding_start:
        x1[:l1] = q1[:l1]
        x2[:l2] = q2[:l2]
    else:
        x1[N-l1:] = q1[:l1]
        x2[N-l2:] = q2[:l2]
    return x1, x2

In [0]:
X = np.array(train_df[['question1', 'question2']])
y = np.array(train_df['is_duplicate'])

In [0]:
X_test = np.array(test_df[['question1', 'question2']])
X_test[:5]

array([[list([26, 76, 3, 1663, 1237, 5472, 745, 149, 175, 4949, 1237]),
        list([41, 330, 3863, 331, 1580, 12991, 212, 1580, 6271, 1393, 1663, 1237, 745])],
       [list([84, 16, 401, 942, 11093, 225, 833, 54, 26, 214, 19, 47, 534]),
        list([26, 214, 534, 76, 942, 11093, 1864])],
       [list([1, 1220, 2, 3, 195, 250, 1096, 251, 91, 817, 3, 147]),
        list([1, 99, 1096, 251, 817])],
       [list([56, 818, 212, 31956]), list([1, 388, 11099])],
       [list([26, 86002, 1221, 1204]),
        list([26, 290, 27, 16, 1221, 1204])]], dtype=object)

In [0]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train[:5], y_train[:5]

(array([[list([1, 115, 116, 3, 777, 2800]),
         list([1, 115, 116, 98, 777, 2800, 175, 3295, 834])],
        [list([2, 83, 1848, 1849, 1018]),
         list([115, 1848, 1849, 1824, 3984])],
        [list([1, 2, 3, 278, 525, 2670, 446, 2315, 8, 135, 598]),
         list([1, 115, 3, 278, 525, 697, 99, 401, 2315, 8, 135, 598, 50, 330, 99, 133, 606])],
        [list([56, 1507, 3329, 108, 1188, 4285, 1734, 401, 95, 1755, 3296, 31732, 7046, 8, 2949]),
         list([56, 1507, 3329, 108, 2599, 4285, 1734, 401, 95, 1755, 3296, 31732, 7046, 8, 2949])],
        [list([1, 84, 16, 97, 36, 162, 7478, 8, 372]),
         list([1, 2, 4528, 4017, 8675])]], dtype=object),
 array([0, 0, 1, 1, 0]))

## Dataset

In [0]:
class QuoraDataset(Dataset):
    def __init__(self, X, y):
        self.x = [encode_sentence(q1, q2) for q1, q2 in X]
        self.y = y
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        x1 = self.x[idx][0]
        x2 = self.x[idx][1]
        return x1, x2, self.y[idx]


In [0]:
train_ds = QuoraDataset(X_train, y_train)
valid_ds = QuoraDataset(X_val, y_val)

In [0]:
X_test.shape[0]

3563475

In [0]:
sub_df.shape

(3563474, 3)

In [0]:
y_train

array([0, 0, 1, ..., 1, 0, 1])

In [0]:
train_ds[0]

(array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           1,  115,  116,    3,  777, 2800], dtype=int32),
 array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    1,  115,  116,
          98,  777, 2800,  175, 3295,  834], dtype=int32),
 0)

In [0]:
trn_dl = DataLoader(train_ds, batch_size=1, shuffle=True)
x1,x2, y= next(iter(trn_dl))

x1,x2, y

(tensor([[    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     1,     2,
             47,   139,   715,     8, 13373,    11,  7580,   536,   731,  8894]],
        dtype=torch.int32),
 tensor([[    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     1,     2,    47,   139,
           1026,     8, 13373,    11,  7580,   536,   731,  8894,   638,   197]],
        dtype=torch.int32),
 tensor([1]))

In [0]:
x1.shape,x2.shape, y.shape

(torch.Size([1, 50]), torch.Size([1, 50]), torch.Size([1]))

## LSTM Model

In [0]:
def save_model(m, p): torch.save(m.state_dict(), p)
    
def load_model(m, p): m.load_state_dict(torch.load(p))

In [0]:
class LSTMV0Model(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, pretrained_weights=None) :
        super(LSTMV0Model,self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        if pretrained_weights is not None:
            self.embeddings.weight.data.copy_(torch.from_numpy(pretrained_weights))
            self.embeddings.weight.requires_grad = False ## freeze embeddings

        self.q1_lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.q2_lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        
    def forward(self, x1, x2):
        x1 = self.embeddings(x1)
        x2 = self.embeddings(x2)
        out_pack, (ht1, ct) = self.q1_lstm(x1)
        out_pack, (ht2, ct) = self.q1_lstm(x2)
        
        q1 = ht1[-1]
        q2 = ht2[-1]
        
        dist = torch.norm(q1-q2, dim=1)
        dist = torch.exp(-dist)
        return dist

In [0]:
model_test = LSTMV0Model(len(embeddings), 300, 100, pretrained_weights=embeddings)
y_pred = model_test(x1.long(), x2.long())
print(y_pred, y, y.unsqueeze(1).shape[0])
loss = F.binary_cross_entropy(y_pred.unsqueeze(1), y.float().unsqueeze(1))
print(loss, loss.item())

tensor([0.5726], grad_fn=<ExpBackward>) tensor([1]) 1
tensor(0.5576, grad_fn=<BinaryCrossEntropyBackward>) 0.5576372146606445


In [0]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr


In [0]:
def train_epocs(model, optimizer, train_dl, valid_dl, model_name, epochs=10):
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        best_val_acc = 0.0
        for x1, x2, y in train_dl:
            x1 = x1.long().cuda()
            x2 = x2.long().cuda()
            y = y.float().cuda()
            y_pred = model(x1, x2)
            optimizer.zero_grad()
            loss = F.binary_cross_entropy(y_pred.unsqueeze(1), y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_acc = val_metrics(model, valid_dl)
        print("train loss %.3f val loss %.3f and val accuracy %.3f" % (sum_loss/total, val_loss, val_acc))
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            path = "{0}/quora_models/model_{2}_acc_{1:.0f}.pth".format(PATH, 100*val_acc, model_name) 
            save_model(model, path)
            print(path)

In [0]:
def val_metrics(model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    for x1, x2, y in valid_dl:
        x1 = x1.long().cuda()
        x2 = x2.long().cuda()
        y = y.float().cuda()
        y_hat = model(x1, x2)
        loss = F.binary_cross_entropy(y_hat.unsqueeze(1), y.unsqueeze(1))
        y_pred = y_hat > 0.5
        correct += (y_pred.unsqueeze(1).float() == y.unsqueeze(1)).float().sum()
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
    return sum_loss/total, correct/total

## Training model

In [0]:
batch_size = 128
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size)

In [0]:
model = LSTMV0Model(len(embeddings), 300, 100, pretrained_weights=embeddings).cuda()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.001)



In [0]:
train_epocs(model, optimizer, train_dl, valid_dl, 'LSTM', epochs=15)

train loss 0.536 val loss 0.514 and val accuracy 0.776
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_78.pth
train loss 0.483 val loss 0.488 and val accuracy 0.801
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_80.pth
train loss 0.461 val loss 0.474 and val accuracy 0.809
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_81.pth
train loss 0.445 val loss 0.465 and val accuracy 0.811
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_81.pth
train loss 0.433 val loss 0.459 and val accuracy 0.817
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_82.pth
train loss 0.423 val loss 0.455 and val accuracy 0.819
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_82.pth
train loss

## Testing model

In [0]:
# Predict test data using best model
def test_val(model, test_dl):
  model.eval()
  y_t = []
  for x1, x2, y in test_dl:
    x1 = x1.long().cuda()
    x2 = x2.long().cuda()
    y_pred = model(x1, x2)
    y_t.append(y_pred.tolist())
  flat_list = [item for sublist in y_t for item in sublist]
  return flat_list

In [0]:
best_model = LSTMV0Model(len(embeddings), 300, 100, pretrained_weights=embeddings).cuda()
load_model(best_model, '/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_LSTM_acc_83.pth')


In [0]:
# Predict test data and write to file
def predTest(best_model, n):
  for i in range(n):
    filename = "output"+str(i)+".pkl"
    test_df = pd.read_pickle(PATH/filename)
    X_test = np.array(test_df[['question1', 'question2']])
    y_test = np.zeros(X_test.shape[0])
    test_ds = QuoraDataset(X_test, y_test)
    test_dl = DataLoader(test_ds, batch_size=100)
    res = test_val(best_model, test_dl)
    res = np.asarray(res)
    res = pd.DataFrame(res)
    pred_filename = "submission_"+str(i)+".csv"
    res.to_csv(PATH/pred_filename)

In [0]:
predTest(best_model, 357)

In [0]:
for i in range(357):
  pred_filename = "submission_"+str(i)+".csv"
  sub_df = pd.read_csv(PATH/pred_filename)
  sub_df.to_csv(PATH/'pred10.csv', mode='a',header=False)

In [0]:
sub_df = pd.read_csv(PATH/'pred10.csv', header=None)

In [441]:
sub_df.shape

(3563475, 3)

In [0]:
sub_df.shape

(3563475, 3)

In [442]:
sub_df[0].duplicated().any()

True

In [0]:
del sub_df[0]

In [0]:
sub_df[1] = test_df["test_id"]

In [0]:
sub_df.rename(columns = {1:'test_id', 2:'is_duplicate' }, inplace = True)

In [0]:
sub_df_C = sub_df.head(2345796)

In [447]:
sub_df_C.shape

(2345796, 2)

In [0]:
sub_df_C.to_csv(PATH/"submission.csv")
sub_df_C = pd.read_csv(PATH/"submission.csv") 

In [0]:
#sub_df_C

In [0]:
sub_df_C['test_id'] = sub_df_C['Unnamed: 0']
del sub_df_C['Unnamed: 0']

In [451]:
sub_df_C['test_id'].duplicated().any()


False

In [455]:
sub_df_C.head(5)

Unnamed: 0,test_id,is_duplicate
0,0,0.013765
1,1,0.116521
2,2,0.556008
3,3,0.038203
4,4,0.587574


In [0]:
sub_df_C.to_csv(PATH/"submission.csv", index = False)

## GRU Model

In [0]:
class GRUV0Model(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, glove_weights=None) :
        super(GRUV0Model,self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        if glove_weights is not None:
            self.embeddings.weight.data.copy_(torch.from_numpy(glove_weights))
            self.embeddings.weight.requires_grad = False ## freeze embeddings

        self.q1_gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.q2_gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        
    def forward(self, x1, x2):
        x1 = self.embeddings(x1)
        x2 = self.embeddings(x2)
        out_pack, ht1 = self.q1_gru(x1)
        out_pack, ht2 = self.q2_gru(x2)
        
        q1 = ht1[-1]
        q2 = ht2[-1]
        
        dist = torch.norm(q1-q2, dim=1)
        dist = torch.exp(-dist)
        return dist

In [0]:
batch_size = 128
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size)

In [0]:
model = GRUV0Model(len(embeddings), 300, 100, glove_weights=embeddings).cuda()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.001)

In [459]:
train_epocs(model, optimizer, train_dl, valid_dl, 'GRU', epochs=15)

train loss 0.544 val loss 0.505 and val accuracy 0.759
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_76.pth
train loss 0.490 val loss 0.484 and val accuracy 0.772
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_77.pth
train loss 0.468 val loss 0.473 and val accuracy 0.778
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_78.pth
train loss 0.454 val loss 0.464 and val accuracy 0.784
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_78.pth
train loss 0.444 val loss 0.456 and val accuracy 0.790
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_79.pth
train loss 0.436 val loss 0.457 and val accuracy 0.788
/content/drive/My Drive/Colab Notebooks/DL/deep-learning-with-pytorch/data/quora_models/model_GRU_acc_79.pth
train loss 0.429

## Conclusion

Best validation accuracy is 0.83 using LSTM baseline model compared to GRU baseline which is 0.80