In [79]:
import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
from data_preparator import *
from torch.utils.data import Dataset, DataLoader

class DAC(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, n_classes):
        super(DAC, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
        #Inicializacao da rede
        self.embedding = nn.Embedding(self.vocab_size+1, self.embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_size)#, bidirectional=True)
        self.fc = nn.Linear(hidden_size, n_classes)

    def forward(self, seq, lengths, gpu = False):
        print("Sequence shape: ", seq.shape)
        print('Lengths',lengths)
        bs = seq.size(1)
        print("Batch size: ", bs)
        self.hidden = self._init_hidden(bs, gpu)
        
        embeds = self.embedding(seq)
        embeds = pack_padded_sequence(embeds, lengths) #faz o unpad da sequencia
        
        gru_out, self.hidden = self.gru(embeds, self.hidden) #retorna o hidden_state de todos os timesteps
        
        gru_out, lenghts = pad_packed_sequence(gru_out) # faz o pad da sequencia para o tamanho maximo do batch
        
        print('GRU output(all timesteps)', gru_out.shape)
        print(gru_out)
        
        #Como é um problema de classificacao, vou usar a ultima camada hidden
        output = self.fc(self.hidden[-1])
        
        return F.log_softmax(output, dim=-1)
    
    def _init_hidden(self, batch_size, gpu):
        if gpu: return Variable(torch.zeros((1,batch_size,self.hidden_size)).cuda())
        else: return Variable(torch.zeros((1,batch_size,self.hidden_size)))
        return self.create_variable(hidden)

    def create_variable(self, tensor):
        # Do cuda() before wrapping with variable
        if torch.cuda.is_available():
            return Variable(tensor.cuda())
        else:
            return Variable(tensor)

In [18]:
padded, labels, vocab_size, x_lengs = prepare_dataset('./conversas_mexidas.csv')

In [19]:
hidden_size = 30

In [80]:
m = DAC(vocab_size, embedding_dim=20, hidden_size=30, n_classes=7)

print(m)

DAC(
  (embedding): Embedding(2492, 20)
  (gru): GRU(20, 30)
  (fc): Linear(in_features=30, out_features=7, bias=True)
)


In [60]:
class DialogueDataset(Dataset):
    def __init__(self, encoded_dialogues, labels, x_lengs):
        self.len = encoded_dialogues.shape[0]
        self.x_data = encoded_dialogues
        self.y_data = torch.tensor(labels) #one-hot encoding
        self.x_lengs = x_lengs
        
    def __getitem__(self, index):
        xb = self.x_data[index]
        yb = self.y_data[index]
        lens = self.x_lengs[index]
        return xb, yb, lens
    
    def __len__(self):
        return self.len

In [73]:
def sort_batch(x, y, lenghts):
    lengths,indx = lenghts.sort(dim = 0, descending = True)
    x = x[indx]
    y = y[indx]
    
    return x.transpose(0,1), y, lengths

In [74]:
dataset = DialogueDataset(padded, labels, x_lengs)
train_loader = DataLoader(dataset= dataset,
                         batch_size = 2,
                         shuffle=False)

In [75]:
it = iter(train_loader)

In [76]:
x, y, l = next(it)

In [77]:
xs, ys, ls = sort_batch(x, y, l)

In [81]:
outp = m(xs, ls)

Sequence shape:  torch.Size([90, 2])
Lengths tensor([19, 13])
Batch size:  2
GRU output(all timesteps) torch.Size([19, 2, 30])
tensor([[[-0.0174, -0.1684, -0.0289,  ..., -0.2198, -0.2431, -0.3154],
         [-0.1500,  0.2103,  0.1374,  ...,  0.3475,  0.1614,  0.3104]],

        [[-0.2729,  0.3935,  0.0519,  ..., -0.1029,  0.1529,  0.2242],
         [-0.4077, -0.0337, -0.1808,  ..., -0.0506,  0.2561, -0.0474]],

        [[-0.0710,  0.1958,  0.4545,  ..., -0.0450, -0.2108,  0.2041],
         [-0.4253,  0.0620,  0.2413,  ..., -0.2340, -0.1148, -0.2630]],

        ...,

        [[-0.0684,  0.4713,  0.2873,  ...,  0.1249,  0.1252,  0.3841],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0907,  0.2221,  0.2798,  ..., -0.1970, -0.1563,  0.1902],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.3063,  0.1299,  0.1441,  ..., -0.0278, -0.0119,  0.2414],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000

In [82]:
#SÃO AS PROBABILIDADES DE CADA CLASSE 
outp

tensor([[-2.0948, -1.8105, -1.9382, -1.7673, -2.2825, -1.9622, -1.8579],
        [-1.8108, -1.8765, -1.7892, -1.7943, -2.2329, -2.0093, -2.2191]],
       grad_fn=<LogSoftmaxBackward>)

In [86]:
torch.max(outp, dim = 1)

(tensor([-1.7673, -1.7892], grad_fn=<MaxBackward0>), tensor([3, 2]))