In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pytorch_pretrained_bert import BertModel, BertForMaskedLM

In [3]:
class Net(nn.Module):
    '''
        encoded_layer : [layer, batch, sentence_len, embed_dim] # Bert 12-layer 결과중 하나 선택.
                        Conv1d 의 input로 들어가기 위해, [batch,sentence_len, embed_dim].unsqueeze(1)
                                                     => [batch, 1, sentence_len, embed_dim ]
        
        Conv1d input shape : 3D or 4D
        (sentence x embedding_dim) : 138 x 768 => (kernel_size = (3,768), padding=1)
                                     138 x 1 로 변환, padding로 인해 138유지.
                                     
        1짜리 차원 x.squeeze(3)    : [batch, 16, 138 , 1] =>  [batch, 16 , 138]
                                 
        
        Conv1d input : [batch, 1(number of channel) , sentence_len, emb_dim]
        Conv1d ouput : [batch, out_channel_num , sentence_len, emb_dim]
        
        [batch, out_channel_num , sentence_len, emb_dim].squeeze(3) 
        => [batch, out_channel_num , sentence_len] 
    '''
    def __init__(self, num_classes):
        super(Net,self).__init__()
        self.bert =  BertModel.from_pretrained('bert-base-uncased')
        self.conv1 = nn.Conv1d(1,16,kernel_size=(3,768))#[batch , 16, 138 : (len-filter_size) +1 , 1])
        
        self.cnn = nn.Sequential(
            nn.LeakyReLU(),
            nn.Conv1d(16,16,kernel_size=(3),padding=1), # [batch, 16 , 138]
            nn.LeakyReLU(),
            nn.BatchNorm1d(16), 
            nn.MaxPool1d(2), # [batch, 16, 69]
            
            nn.Conv1d(16,32,kernel_size=(3),padding=1),#[batch , 32, 69 ]
            nn.LeakyReLU(),
            nn.Conv1d(32,32,kernel_size=(3),padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm1d(32),
            nn.MaxPool1d(2),
            
            nn.Conv1d(32,64,kernel_size=(3),padding=1),#[batch , 64, 34 ]
            nn.LeakyReLU(),
            nn.Conv1d(64,64,kernel_size=(3),padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm1d(64),
            nn.MaxPool1d(2),
            
            nn.Conv1d(64,128,kernel_size=(3),padding=1),#[batch , 128, 17 ]
            nn.LeakyReLU(),
            nn.Conv1d(128,128,kernel_size=(3),padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm1d(128),
            nn.MaxPool1d(2),#[batch , 128, 8]
            
        )
        self.classifier = nn.Sequential(
            nn.Linear(1 * 128*8, 160), # width * height * channel_num 
            nn.Linear(160, num_classes)
        )
        self.relu = nn.LeakyReLU()
    def forward(self, inputs, masks):
        with torch.no_grad():
            encoded_layer = self.bert(inputs, masks)[0]
        encoded_layer = encoded_layer[11].unsqueeze(1)
        x = self.conv1(encoded_layer) # [batch , 16, 138 : (len-filter_size) +1 +2padding, 1])
        x = x.squeeze(3)# [batch, 16 , 138]
        x = self.cnn(x)
        x = x.view(-1,128*8)
        x = self.classifier(x)
        return x

In [4]:
class GRU(nn.Module):
    '''
        encoded_layer : [batch, setence_len, embed_dim]
        GRU Input : [sentence_len, batch, embed_dim]
        GRU Output : [n_layer * directions ,batch, hidden_dim]
    '''
    def __init__(self):
        super(Net, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
    #    self.fc1 = nn.Linear(138,138)
        self.rnn = nn.GRU(768,64,1)
        self.fc2 = nn.Linear(64,708)
        
    def forward(self, inputs, masks):
   #     inputs = self.fc1(inputs)
        with torch.no_grad():
            encoded_layer = self.bert(inputs, masks)[0]
        encoded_layer[11] = torch.transpose(encoded_layer[11],0,1)  
        self.rnn.flatten_parameters()
        hidden = self.rnn(encoded_layer[11])
       # hidden = hidden[-1,:,:]
       # output = self.fc2(hidden)
        return hidden