# Train RNN_RNN

In [1]:
import os

import pandas as pd

import torch
import torch.nn as nn
from torch.nn.utils import clip_grad_norm_

from tqdm import tqdm

from utils.GloveMgr import GloveMgr
from utils.Dataset import Dataset
from utils.DataLoader import DataLoader
from utils.preprocess_df import preprocess_df
from utils.accuracy_nb_sent_per_doc import accuracy_nb_sent_per_doc_fn
from utils.accuracy_prop_sent_per_doc import accuracy_prop_sent_per_doc_fn

#from models.RNN_RNN import RNN_RNN

from time import time

import gc

In [2]:
vocab_size = 150000
batch_size = 32
epochs = 10
learning_rate = 1e-3
model_name = "RNN_RNN"
average_proportion_of_sentences_per_document = 0.2670278281534701
average_number_of_sentences_per_document = 6.061850780738518

In [3]:
# Check if a GPU is available
if torch.cuda.is_available():
    # Display the number of available GPUs
    print(f"Number of available GPUs: {torch.cuda.device_count()}")
    # Display the name of each GPU
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPU available.")

Number of available GPUs: 1
GPU 0: NVIDIA GeForce RTX 3060


In [4]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu" 

device = torch.device(dev)
device

device(type='cuda', index=0)

In [5]:
glovemgr = GloveMgr("./data/glove.6B/glove.6B.100d.txt", vocab_size=vocab_size)

In [6]:
train_dataset = Dataset(preprocess_df(pd.read_json("./data/train.json"), glovemgr=glovemgr, is_sep_n=True, remove_stop_word=True, stemming=False, trunc_sent=50, padding_sent=50, trunc_doc=100))
train_iter = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)

In [7]:
val_dataset = Dataset(preprocess_df(pd.read_json("./data/val.json"), glovemgr=glovemgr, is_sep_n=True, remove_stop_word=True, stemming=False, trunc_sent=50, padding_sent=50, trunc_doc=100))
val_iter = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

In [8]:
from models.BasicModel import BasicModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class RNN_RNN(BasicModel):
    def __init__(self, device, vocab_size, word_embed = None):
        super(RNN_RNN, self).__init__(device)

        self.device = device

        self.word_embedding = nn.Embedding(vocab_size+2, 100, padding_idx=0)
        # Load word embedding if specified
        if word_embed is not None:
            self.word_embedding = torch.nn.Embedding.from_pretrained(torch.from_numpy(word_embed).float())

        # 100 : word2vec embedding size
        self.word_GRU = nn.GRU(input_size = 100, hidden_size = 200, batch_first = True, bidirectional = True)
        self.sent_GRU = nn.GRU(input_size = 2*200, hidden_size=200, batch_first = True, bidirectional = True)

        # 10: relative position range size, with segment size = 10
        self.rel_pos_emb = nn.Embedding(11, 100)
        self.abs_pos_emb = nn.Embedding(100, 100)

        self.Wdoc = nn.Linear(2*200,2*200,bias=True)

        self.Wcontent = nn.Linear(2*200,1,bias=False)
        self.Wsalience = nn.Bilinear(2*200,2*200,1,bias=False)
        self.Wnovelty = nn.Bilinear(2*200,2*200,1,bias=False)
        self.Wabs_pos = nn.Linear(100,1,bias=False)
        self.Wrel_pos = nn.Linear(100,1,bias=False)
        self.bias = nn.Parameter(torch.empty(1).uniform_(-0.1, 0.1))

    def avg_pool1d(self,x,seq_lens):
        out = []
        for index,t in enumerate(x):
            if seq_lens[index] == 0:
                t = t[:1]
            else:
                t = t[:seq_lens[index],:]
            t = torch.t(t).unsqueeze(0)
            out.append(F.avg_pool1d(t,t.size(2)))
        
        out = torch.cat(out).squeeze(2)
        return out

    def forward(self, arr_x, doc_lens):
        probs = []

        sent_lens = torch.sum(torch.sign(arr_x),dim=1).data
        arr_x = self.word_embedding(arr_x)
        arr_x = self.word_GRU(arr_x)[0]
        arr_x = self.avg_pool1d(arr_x, sent_lens)

        arr_x = self.pad_doc(arr_x, doc_lens)

        arr_x = self.sent_GRU(arr_x)[0]
        docs = self.avg_pool1d(arr_x, doc_lens)

        # for each document, compute probabilities
        for idx, doc_len in enumerate(doc_lens):
            sents = arr_x[idx,:doc_len,:]
            d = torch.tanh(self.Wdoc(docs[idx])).unsqueeze(0)
            s = torch.zeros(1,2*200)
            s = s.to(self.device)
            #prob_doc = []
            for position, h in enumerate(sents):
                h = h.view(1, -1) # resize
                # Compute position embedding
                abs_pos = torch.tensor([[position]], dtype=torch.long)
                abs_pos = abs_pos.to(self.device)
                abs_pos = self.abs_pos_emb(abs_pos).squeeze(0)

                # Compute relative position embedding
                rel_pos = int(round(position / 10))
                rel_pos = torch.tensor([[rel_pos]], dtype=torch.long)
                rel_pos = rel_pos.to(self.device)
                rel_pos = self.rel_pos_emb(rel_pos).squeeze(0)

                # Compute proba
                content = self.Wcontent(h)
                salience = self.Wsalience(h, d)
                novelty = -1 * self.Wnovelty(h,torch.tanh(s))
                ap = self.Wabs_pos(abs_pos)
                rp = self.Wrel_pos(rel_pos)
                prob = torch.sigmoid(content+salience+novelty+ap+rp+self.bias)

                #prob_doc.append(prob)
                probs.append(prob)

                s = s + torch.mm(prob,h)

            #probs.append(torch.tensor(prob_doc))

        probs = torch.cat(probs).squeeze()
        #probs = probs.to(self.device)
        return probs

In [9]:
model = RNN_RNN(device=device, vocab_size=vocab_size, word_embed=glovemgr.getEmbeddings())

In [10]:
model.to(device)

RNN_RNN(
  (word_embedding): Embedding(150002, 100)
  (word_GRU): GRU(100, 200, batch_first=True, bidirectional=True)
  (sent_GRU): GRU(400, 200, batch_first=True, bidirectional=True)
  (rel_pos_emb): Embedding(11, 100)
  (abs_pos_emb): Embedding(100, 100)
  (Wdoc): Linear(in_features=400, out_features=400, bias=True)
  (Wcontent): Linear(in_features=400, out_features=1, bias=False)
  (Wsalience): Bilinear(in1_features=400, in2_features=400, out_features=1, bias=False)
  (Wnovelty): Bilinear(in1_features=400, in2_features=400, out_features=1, bias=False)
  (Wabs_pos): Linear(in_features=100, out_features=1, bias=False)
  (Wrel_pos): Linear(in_features=100, out_features=1, bias=False)
)

In [11]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

mae_fn = nn.L1Loss()

In [12]:
if not os.path.exists("./checkpoints"):
    os.makedirs("./checkpoints")

In [13]:
t1 = time()
for epoch in range(1, epochs+1):
    # train 1 epoch
    model.train()
    nb_batch_train = 0
    total_train_loss = 0
    total_train_mae = 0
    total_train_acc = 0
    with tqdm(train_iter, unit="batch", total=len(train_iter)) as tepoch:
        for batch in tepoch:
            tepoch.set_description(f"Epoch {epoch}")
            features = []
            doc_lens = []

            for j in range(batch_size):
                doc_lens.append(len(batch[j]["doc"]))
                features = features + batch[j]["doc"]
            
            features = torch.tensor(features, dtype=torch.long).to(device)

            targets = [torch.tensor(batch[j]["labels"], dtype=torch.float) for j in range(batch_size)]
            targets = torch.cat(targets)
            targets = targets.to(device)
            
            probs = model(features, doc_lens)
            
            loss = loss_fn(probs, targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            nb_batch_train += 1
            total_train_loss += loss.item()
            total_train_mae += mae_fn(probs, targets).item()
            total_train_acc += accuracy_prop_sent_per_doc_fn(probs=probs.cpu().detach().numpy(), targets=targets.cpu().detach().numpy(), doc_lens=doc_lens)
            tepoch.set_postfix(loss=total_train_loss/nb_batch_train, mae=total_train_mae/nb_batch_train, accuracy=total_train_acc/nb_batch_train)
    # Save model
    model.save("./checkpoints/RNN_RNN-" + str(epoch) + ".pt")
    # Show train and val score
    model.eval()
    nb_batch_val = 0
    total_val_loss = 0
    total_val_mae = 0
    total_val_acc = 0
    for i,batch in enumerate(val_iter):
        features = []
        doc_lens = []

        for j in range(batch_size):
            doc_lens.append(len(batch[j]["doc"]))
            features = features + batch[j]["doc"]
            
        features = torch.tensor(features, dtype=torch.long).to(device)

        targets = [torch.tensor(batch[j]["labels"], dtype=torch.float) for j in range(batch_size)]
        targets = torch.cat(targets)
        targets = targets.to(device)
        
        probs = model(features, doc_lens)
        loss = loss_fn(probs, targets)
        nb_batch_val += 1
        total_val_loss += loss.item()
        total_val_mae += mae_fn(probs, targets).item()
        total_val_acc += accuracy_prop_sent_per_doc_fn(probs=probs.cpu().detach().numpy(), targets=targets.cpu().detach().numpy(), doc_lens=doc_lens)
    print("Epoch {} : train loss = {:.3f}, val loss = {:.3f}, train mae = {:.3f}, val mae = {:.3f}, train accuracy = {:.3f}, val accuracy = {:.3f}".format(epoch, total_train_loss / nb_batch_train, total_val_loss / nb_batch_val, total_train_mae / nb_batch_train, total_val_mae / nb_batch_val, total_train_acc / nb_batch_train, total_val_acc / nb_batch_val))

t2 = time()
print("Training duration =", t2-t1)

Epoch 1: 100%|██████████| 6062/6062 [1:46:08<00:00,  1.05s/batch, accuracy=0.813, loss=0.366, mae=0.221]  


Epoch 1 : train loss = 0.366, val loss = 0.354, train mae = 0.221, val mae = 0.216, train accuracy = 0.813, val accuracy = 0.820


Epoch 2: 100%|██████████| 6062/6062 [1:46:51<00:00,  1.06s/batch, accuracy=0.816, loss=0.359, mae=0.217]  


Epoch 2 : train loss = 0.359, val loss = 0.352, train mae = 0.217, val mae = 0.213, train accuracy = 0.816, val accuracy = 0.820


Epoch 3: 100%|██████████| 6062/6062 [1:46:40<00:00,  1.06s/batch, accuracy=0.818, loss=0.355, mae=0.215]  


Epoch 3 : train loss = 0.355, val loss = 0.352, train mae = 0.215, val mae = 0.212, train accuracy = 0.818, val accuracy = 0.820


Epoch 4: 100%|██████████| 6062/6062 [1:47:36<00:00,  1.07s/batch, accuracy=0.82, loss=0.351, mae=0.213]  


Epoch 4 : train loss = 0.351, val loss = 0.353, train mae = 0.213, val mae = 0.211, train accuracy = 0.820, val accuracy = 0.819


Epoch 5: 100%|██████████| 6062/6062 [1:47:38<00:00,  1.07s/batch, accuracy=0.823, loss=0.346, mae=0.21]   


Epoch 5 : train loss = 0.346, val loss = 0.358, train mae = 0.210, val mae = 0.209, train accuracy = 0.823, val accuracy = 0.818


Epoch 6: 100%|██████████| 6062/6062 [1:47:01<00:00,  1.06s/batch, accuracy=0.826, loss=0.341, mae=0.207]  


Epoch 6 : train loss = 0.341, val loss = 0.364, train mae = 0.207, val mae = 0.207, train accuracy = 0.826, val accuracy = 0.815


Epoch 7: 100%|██████████| 6062/6062 [1:48:47<00:00,  1.08s/batch, accuracy=0.83, loss=0.335, mae=0.204]   


Epoch 7 : train loss = 0.335, val loss = 0.368, train mae = 0.204, val mae = 0.204, train accuracy = 0.830, val accuracy = 0.814


Epoch 8: 100%|██████████| 6062/6062 [1:44:20<00:00,  1.03s/batch, accuracy=0.833, loss=0.329, mae=0.2]    


Epoch 8 : train loss = 0.329, val loss = 0.375, train mae = 0.200, val mae = 0.200, train accuracy = 0.833, val accuracy = 0.813


Epoch 9: 100%|██████████| 6062/6062 [1:44:08<00:00,  1.03s/batch, accuracy=0.836, loss=0.325, mae=0.198]  


Epoch 9 : train loss = 0.325, val loss = 0.378, train mae = 0.198, val mae = 0.202, train accuracy = 0.836, val accuracy = 0.811


Epoch 10: 100%|██████████| 6062/6062 [1:43:58<00:00,  1.03s/batch, accuracy=0.838, loss=0.32, mae=0.195]   


Epoch 10 : train loss = 0.320, val loss = 0.377, train mae = 0.195, val mae = 0.205, train accuracy = 0.838, val accuracy = 0.811


Epoch 11: 100%|██████████| 6062/6062 [1:44:09<00:00,  1.03s/batch, accuracy=0.841, loss=0.316, mae=0.192] 


Epoch 11 : train loss = 0.316, val loss = 0.384, train mae = 0.192, val mae = 0.203, train accuracy = 0.841, val accuracy = 0.809


Epoch 12: 100%|██████████| 6062/6062 [1:44:04<00:00,  1.03s/batch, accuracy=0.843, loss=0.312, mae=0.19]  


Epoch 12 : train loss = 0.312, val loss = 0.378, train mae = 0.190, val mae = 0.208, train accuracy = 0.843, val accuracy = 0.811


Epoch 13: 100%|██████████| 6062/6062 [1:45:07<00:00,  1.04s/batch, accuracy=0.845, loss=0.309, mae=0.188] 


Epoch 13 : train loss = 0.309, val loss = 0.389, train mae = 0.188, val mae = 0.209, train accuracy = 0.845, val accuracy = 0.807


Epoch 14:  69%|██████▉   | 4205/6062 [1:13:41<32:32,  1.05s/batch, accuracy=0.846, loss=0.306, mae=0.186]  


KeyboardInterrupt: 

In [None]:
tmp = train_iter.__getitem__(2463)[0]["doc"]
for e in tmp:
  if (e[0] == 0):
    print("vide")

IndexError: list index out of range

In [None]:
len(tmp)

25