In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import numpy as np
import pandas as pd
import warnings; warnings.filterwarnings('ignore')
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence

Using TensorFlow backend.


# 데이터 Import

In [2]:
from data_loader import MRLoader
batch_size = 64

loader = MRLoader(batch_size)
train_loader, test_loader = loader.get_dataset()
tokenizer = loader.tokenizer

# Glove Embedding

In [3]:
from data_loader import Glove
glove = Glove(300)
vocab_size, embedding_dim = glove.vocab_size, glove.embedding_dim
embedding_matrix = glove.get_embedding(tokenizer)

# 모델

In [4]:
class LSTM(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, batch_size, output_dim=1, num_layers=2):
        super(LSTM, self).__init__()
        self.vocab_size = vocab_size
        self.input_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(self.vocab_size, self.input_dim)

        # Define the LSTM layer
        self.lstm = nn.LSTM(
            self.input_dim, 
            self.hidden_dim, 
            self.num_layers, 
            bidirectional=True,
            batch_first=True, 
            dropout=0.5
        )
        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim * 2, output_dim)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim))

    def set_embedding_weights(self, embedding_matrix):
        self.embedding.weight = nn.Parameter(embedding_matrix)
    
    def forward(self, inp):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        X_embedded = self.embedding(inp.long())
        
        sa, sb, sc = X_embedded.shape
        ha, hb = self.hidden
        if sa < self.batch_size:
            X_embedded = torch.cat([X_embedded, torch.zeros(self.batch_size-sa, sb, sc)])

        lstm_out, self.hidden = self.lstm(X_embedded, (ha, hb))

        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.linear(torch.cat([self.hidden[0][0], self.hidden[0][1]], axis=1))

        y_pred = F.sigmoid(y_pred)
        return y_pred.view(-1)

In [5]:
model = LSTM(
        vocab_size=vocab_size,
        embedding_dim=embedding_dim, 
        hidden_dim=168, 
        batch_size=batch_size, 
        output_dim=1, 
        num_layers=1
)
model.set_embedding_weights(embedding_matrix)

In [6]:
learning_rate = 0.05
num_epochs = 20

In [7]:
loss_fn = torch.nn.MSELoss()
optimiser = torch.optim.Adagrad(model.parameters(), lr=learning_rate)

hist = np.zeros(num_epochs*len(train_loader))
j = 0
for e in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
        # Clear stored gradient
        model.zero_grad()

        # Initialise hidden state
        # Don't do this if you want your LSTM to be stateful
        model.hidden = model.init_hidden()
        
        # Forward pass
        y_pred = model(x.long())
        if len(x) < model.batch_size:
            y_pred = y_pred[:len(x)]
        
        loss = loss_fn(y_pred.view(-1, 1), y.view(-1, 1))
        hist[j] = loss.item()

        # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()

        # Update parameters
        optimiser.step()
        j += 1

In [8]:
full_num = 0
correct_num = 0
with torch.no_grad():
    model.eval()
    losses = []
    for i, (x, y) in enumerate(test_loader):
        # Forward pass
        y_pred = model(x.long())
        if len(y) < model.batch_size:
            y_pred = y_pred[:len(y)]
            
        full_num += len(y)
        correct_num += (y_pred.reshape(-1, 1).round() == y).sum().item()

In [9]:
correct_num / full_num

0.782