## Definindo a RNN

### Arquitetura:

Palavra -> Embedding -> GRU -> Dense

No Pytorch existe a possibilidade do desenvolvimento de RNNs com tamanhos diferentes (sem a necessiade do 0 padding) isso econimiza tempo de processamento.

As etapas para o uso dessa função são as seguintes:
    1. Pad de todoas as sequências do dataset;
    2. Unpad dessas sequências utilizando o pack_padded_sequence;
    3. Input na GRU;
    4. Refaz o pad para entrar na camada de previsão utilizando o pad_packed_sequence;

In [43]:
import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
from data_preparator import *
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

class DAC(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, n_classes):
        super(DAC, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
        #Inicializacao da rede
        self.embedding = nn.Embedding(self.vocab_size+1, self.embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_size)#, bidirectional=True)
        self.fc = nn.Linear(hidden_size, n_classes)

    def forward(self, seq, lengths, gpu = False):
        print("Sequence shape: ", seq.shape)
        print('Lengths',lengths)
        bs = seq.size(1)
        print("Batch size: ", bs)
        self.hidden = self._init_hidden(bs, gpu)
        
        embeds = self.embedding(seq)
        embeds = pack_padded_sequence(embeds, lengths) #faz o unpad da sequencia
        
        gru_out, self.hidden = self.gru(embeds, self.hidden) #retorna o hidden_state de todos os timesteps
        
        gru_out, lenghts = pad_packed_sequence(gru_out) # faz o pad da sequencia para o tamanho maximo do batch
        
        print('GRU output(all timesteps)', gru_out.shape)
        print(gru_out)
        
        #Como é um problema de classificacao, vou usar a ultima camada hidden
        output = self.fc(self.hidden[-1])
        
        return F.log_softmax(output, dim=-1)
    
    def _init_hidden(self, batch_size, gpu):
        if gpu: return Variable(torch.zeros((1,batch_size,self.hidden_size)).cuda())
        else: return Variable(torch.zeros((1,batch_size,self.hidden_size)))
        return self.create_variable(hidden)

    def create_variable(self, tensor):
        # Do cuda() before wrapping with variable
        if torch.cuda.is_available():
            return Variable(tensor.cuda())
        else:
            return Variable(tensor)

In [44]:
padded, labels, vocab_size, x_lengs = prepare_dataset('./conversas_mexidas.csv')

In [46]:
hidden_size = 30

In [86]:
m = DAC(vocab_size, embedding_dim=20, hidden_size=5, n_classes=7)

print(m)

DAC(
  (embedding): Embedding(2492, 20)
  (gru): GRU(20, 5)
  (fc): Linear(in_features=5, out_features=7, bias=True)
)


In [87]:
class DialogueDataset(Dataset):
    def __init__(self, encoded_dialogues, labels, x_lengs):
        self.len = encoded_dialogues.shape[0]
        self.x_data = encoded_dialogues
        self.y_data = torch.tensor(labels) #one-hot encoding
        self.x_lengs = x_lengs
        
    def __getitem__(self, index):
        xb = self.x_data[index]
        yb = self.y_data[index]
        lens = self.x_lengs[index]
        return xb, yb, lens
    
    def __len__(self):
        return self.len

In [88]:
def sort_batch(x, y, lenghts):
    lengths,indx = lenghts.sort(dim = 0, descending = True)
    x = x[indx]
    y = y[indx]
    
    return x.transpose(0,1), y, lengths

In [89]:
dataset = DialogueDataset(padded, labels, x_lengs)
train_loader = DataLoader(dataset= dataset,
                         batch_size = 2,
                         shuffle=False)

In [90]:
it = iter(train_loader)

In [91]:
x, y, l = next(it)

In [92]:
xs, ys, ls = sort_batch(x, y, l)

In [93]:
outp = m(xs, ls)

Sequence shape:  torch.Size([90, 2])
Lengths tensor([19, 13])
Batch size:  2
GRU output(all timesteps) torch.Size([19, 2, 5])
tensor([[[ 0.4509,  0.0404,  0.0292, -0.2293, -0.5114],
         [ 0.5787, -0.0287, -0.6487,  0.0546,  0.0541]],

        [[ 0.3351,  0.4517,  0.0061, -0.3870, -0.7261],
         [ 0.4655, -0.0259,  0.0458, -0.0820, -0.1191]],

        [[ 0.2292,  0.7219,  0.1385, -0.0720, -0.6182],
         [ 0.2406,  0.5915,  0.2133, -0.2711, -0.3446]],

        [[ 0.3389,  0.0819,  0.3405, -0.2735, -0.4635],
         [ 0.1298,  0.4775, -0.2844,  0.1993, -0.2503]],

        [[ 0.0436,  0.0637, -0.2007,  0.0418, -0.7701],
         [ 0.0092, -0.1366,  0.1250,  0.1425, -0.2221]],

        [[ 0.3242, -0.2187,  0.1645, -0.4265, -0.3611],
         [-0.1392, -0.4530, -0.8354,  0.0431,  0.0124]],

        [[-0.1454, -0.4246, -0.6825, -0.3004, -0.3033],
         [-0.0116, -0.4997, -0.6354, -0.3558,  0.3546]],

        [[-0.3668, -0.7073, -0.6144, -0.2329,  0.0695],
         [-0.1740, -

In [76]:
#SÃO AS PROBABILIDADES DE CADA CLASSE 
outp

tensor([[0.],
        [0.]], grad_fn=<LogSoftmaxBackward>)

In [77]:
torch.max(outp, dim = 1)

(tensor([0., 0.], grad_fn=<MaxBackward0>), tensor([0, 0]))

In [78]:
top_n, top_i = outp.topk(1) #PEGA AS CLASSES COM MAIOR PROBABILIDADE

In [79]:
top_i

tensor([[0],
        [0]])

In [80]:
opt = optim.Adam(m.parameters(), 1e-2)
loss_fn =  F.nll_loss
model = DAC(vocab_size, embedding_dim=20, hidden_size=30, n_classes=7)


for epoch in range(30):
    y_true_train = list()
    y_pred_train = list()
    total_loss_train = 0
    
    for x, y, lengths in iter(train_loader):
        x, y , lenghts = sort_batch(x,y,lengths)
        
        opt.zero_grad()
        pred = model(x, lenghts)
        loss = loss_fn(pred, y)
        loss.backward()
        opt.step()
        

Sequence shape:  torch.Size([90, 2])
Lengths tensor([19, 13])
Batch size:  2
GRU output(all timesteps) torch.Size([19, 2, 30])
tensor([[[ 0.0308, -0.4410, -0.3945,  ...,  0.0897,  0.2613,  0.1475],
         [ 0.0017,  0.2206,  0.2220,  ...,  0.2697,  0.2443, -0.2768]],

        [[ 0.2025, -0.5089, -0.0540,  ...,  0.3787,  0.3920, -0.2531],
         [-0.4570, -0.0696,  0.2631,  ...,  0.2554,  0.5169,  0.3255]],

        [[ 0.3371, -0.3685,  0.3557,  ..., -0.1337,  0.0140,  0.1656],
         [-0.3873, -0.1054,  0.1323,  ...,  0.4335,  0.3956, -0.0414]],

        ...,

        [[-0.0275,  0.3332, -0.1860,  ..., -0.0986, -0.3066,  0.2082],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0376,  0.2151,  0.3490,  ..., -0.0691,  0.2346,  0.4970],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.4271,  0.2970,  0.0215,  ..., -0.0913, -0.0317,  0.5241],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000

RuntimeError: multi-target not supported at /pytorch/aten/src/THNN/generic/ClassNLLCriterion.c:21

In [27]:
pred

tensor([[-1.7884, -1.6513, -1.9986, -2.0209, -2.0312, -2.1240, -2.1024],
        [-1.8666, -2.0058, -2.1559, -1.9408, -1.8920, -2.0552, -1.7569]],
       grad_fn=<LogSoftmaxBackward>)

tensor([[0, 1, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0]])