In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
sys.path.append('../')
from constant import *

In [74]:
class EncoderSentence(nn.Module):
    
    def __init__(self,word_size,word_dim, hidden_size, pretrained_word_embeds=None, output_type = 'sum'):
        super(EncoderSentence, self).__init__()
        
        self.output_type = output_type
        self.word_size = word_size
        self.word_dim = word_dim
        self.hidden_size = hidden_size
        self.pretrained_word_embeds = pretrained_word_embeds
        self.embedding = nn.Embedding(self.word_size,self.word_dim,padding_idx=0)
        self.lstm = nn.LSTM(self.word_dim,self.hidden_size,batch_first = True,bidirectional=True)
        self._init_weights()
        
    def forward(self,x,input_lengths):
        embedded = self.embedding(x)
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths,batch_first=True)
        outputs, hidden_cell = self.lstm(packed)
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs,batch_first=True)
        if self.output_type == 'sum':
            outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        elif self.output_type =='concat':
            outputs = torch.cat((outputs[:, :, :self.hidden_size], outputs[:, : ,self.hidden_size:]),dim=2)
        else:
            raise NotImplementedError 
        return outputs,hidden_cell

    def _init_weights(self):
        if PRE_TRAINED_EMBEDDING or WORD2VEC_EMBEDDING :
            self.embedding.weight.data.copy_(torch.from_numpy(self.pretrained_word_embeds))
            if NON_TRAINABLE:
                self.embedding.weight.requires_grad = False
            else:
                self.embedding.weight.requires_grad = True
        else:
            nn.init.xavier_uniform_(self.embedding.weight.data)

## Test

In [75]:
from load_data_exp import *

In [76]:
for i in train_dataloader:
    break

In [77]:
i[3]

tensor([63, 45, 45, 45, 45, 45, 39, 38, 35, 35, 35, 35, 35, 35, 34, 34, 33, 33,
        30, 30, 30, 25, 25, 25, 23, 22, 20, 17, 17, 14, 11,  9])

In [78]:
enc = EncoderSentence(len(word_mapping)+1,WORD_DIM,128,pretrained_word_embeds,'sum')

In [79]:
e,f = enc(i[1],i[3])

In [64]:
f

tensor([54, 43, 41, 41, 41, 38, 37, 37, 37, 36, 36, 36, 36, 33, 30, 26, 26, 25,
        23, 23, 21, 21, 21, 18, 18, 17, 17, 16, 16, 16, 15, 15])

In [73]:
e

tensor([[[-0.0087, -0.1990,  0.0752,  ...,  0.0978,  0.0327,  0.0199],
         [-0.0377, -0.0681,  0.0348,  ...,  0.0531, -0.0119, -0.0419],
         [-0.0304,  0.0118,  0.0505,  ...,  0.0566, -0.0231, -0.0367],
         ...,
         [-0.0244,  0.0674,  0.0451,  ...,  0.0480, -0.0072, -0.0264],
         [-0.0271,  0.0571,  0.0224,  ...,  0.0472,  0.0121, -0.0277],
         [-0.0430,  0.0597,  0.0034,  ...,  0.0159,  0.0042, -0.0077]],

        [[-0.0087, -0.1990,  0.0752,  ...,  0.0862,  0.0297,  0.0068],
         [-0.0346, -0.0705,  0.0686,  ...,  0.0284, -0.0110, -0.0756],
         [-0.0250,  0.0049,  0.0641,  ..., -0.0045, -0.0136, -0.0758],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.0087, -0.1990,  0.0752,  ...,  0.1023,  0.0325,  0.0080],
         [-0.0346, -0.0705,  0.0686,  ...,  0