In [4]:
import os
import sys
import datetime
import argparse
import torch
from torchtext import data, datasets
from torch import autograd
from torch.autograd import Variable
from torch import nn
import torch.nn.functional as F

In [40]:
# get hyper parameters
batch_size = 32
lr = 0.0001
epochs = 40
cuda = torch.cuda.is_available()

In [41]:

class BasicLSTM(nn.Module):
    """
        Basic LSTM
    """
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout=0.2):
        super(BasicLSTM, self).__init__()
        print("Building Basic LSTM model...")
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.dropout = nn.Dropout(dropout)
        self.lstm = nn.LSTM(embed_dim, self.hidden_dim,
                            num_layers=self.n_layers,
                            dropout=dropout,
                            batch_first=True)
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        h_0 = self._init_state(b_size=batch_size)
        x = self.embed(x)  #  [b, i] -> [b, i, e]
        x, _ = self.lstm(x, h_0)  # [i, b, h]
        h_t = x[:,-1,:]
        self.dropout(h_t)
        logit = self.out(h_t)  # [b, h] -> [b, o]
        print(logit.size())
        return logit

    def _init_state(self, b_size=1):
        weight = next(self.parameters()).data
        return (
            Variable(weight.new(self.n_layers, b_size, self.hidden_dim).zero_()),
            Variable(weight.new(self.n_layers, b_size, self.hidden_dim).zero_())
        )

In [42]:
def train(model, optimizer, train_iter, cuda=True):
    model.train()
    for b, batch in enumerate(train_iter):
        x, y = batch.text, batch.label
        print("input size: ", x.size())
        print("output size: ", y.size())
        y.data.sub_(1)  # index align
        if cuda:
            x, y = x.cuda(), y.cuda()
        optimizer.zero_grad()
        logit = model(x)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()
        if b % 100 == 0:
            corrects = (torch.max(logit, 1)[1].view(y.size()).data == y.data).sum()
            accuracy = 100.0 * corrects / batch.batch_size
            sys.stdout.write(
                '\rBatch[%d] - loss: %.6f  acc: %.4f (%d/%d)' % (
                b, loss.data[0], accuracy, corrects, batch.batch_size))

In [43]:
def evaluate(model, val_iter, cuda=True):
    """evaluate model"""
    model.eval()
    corrects, avg_loss = 0, 0
    for batch in val_iter:
        x, y = batch.text, batch.label
        y.data.sub_(1)  # index align
        if cuda:
            x, y = x.cuda(), y.cuda()
        logit = model(x)
        loss = F.cross_entropy(logit, y, size_average=False)
        avg_loss += loss.data[0]
        corrects += (torch.max(logit, 1)
                     [1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = avg_loss / size
    accuracy = 100.0 * corrects / size
    return avg_loss, accuracy

# IMDB 데이터셋 가져오기

In [44]:
# load data
print("\nLoading data...")
TEXT = data.Field(lower=True)
LABEL = data.Field(sequential=False)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
TEXT.build_vocab(train_data, min_freq=5)
LABEL.build_vocab(train_data)
train_iter, test_iter = data.BucketIterator.splits(
        (train_data, test_data), batch_size=batch_size,
        shuffle=True, repeat=False)
vocab_size = len(TEXT.vocab)
n_classes = len(LABEL.vocab) - 1
print("[TRAIN]: %d \t [TEST]: %d \t [VOCAB] %d \t [CLASSES] %d"
      % (len(train_iter),len(test_iter), vocab_size, n_classes))


Loading data...
[TRAIN]: 782 	 [TEST]: 782 	 [VOCAB] 46159 	 [CLASSES] 2


In [45]:
model = BasicLSTM(1, 256, vocab_size, 128, n_classes, 0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
if cuda:
    model = model.cuda()

best_val_loss = None
for e in range(1, epochs+1):
    train(model, optimizer, train_iter, cuda)
    val_loss, val_accuracy = evaluate(model, test_iter, cuda)
    print("\n[Epoch: %d] val_loss:%5.2f | acc:%5.2f" % (e, val_loss, val_accuracy))

    # Save the model if the validation loss is the best we've seen so far.
    if not best_val_loss or val_loss < best_val_loss:
        print("[!] saving model")
        if not os.path.isdir("snapshot"):
            os.makedirs("snapshot")
        torch.save(model.state_dict(), './snapshot/%d_convcnn.pt' % (e))
        best_val_loss = val_loss

Building Basic LSTM model...
input size:  torch.Size([101, 32])
output size:  torch.Size([32])


  "num_layers={}".format(dropout, num_layers))


RuntimeError: Expected hidden[0] size (1, 101, 256), got (1, 32, 256)