In [1]:
import os
import sys
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data, datasets


In [2]:
# get hyper parameters
BATCH_SIZE = 64
lr = 0.001
EPOCHS = 40
torch.manual_seed(42)
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

In [3]:
# class BasicRNN(nn.Module):
#     """
#         Basic RNN
#     """
#     def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
#         super(BasicRNN, self).__init__()
#         print("Building Basic RNN model...")
#         self.n_layers = n_layers
#         self.hidden_dim = hidden_dim

#         self.embed = nn.Embedding(n_vocab, embed_dim)
#         self.dropout = nn.Dropout(dropout_p)
#         self.rnn = nn.RNN(embed_dim, hidden_dim, n_layers,
#                           dropout=dropout_p, batch_first=True)
#         self.out = nn.Linear(self.hidden_dim, n_classes)

#     def forward(self, x):
#         embedded = self.embed(x)  #  [b, i] -> [b, i, e]
#         _, hidden = self.rnn(embedded)
#         self.dropout(hidden)
#         hidden = hidden.squeeze()
#         logit = self.out(hidden)  # [b, h] -> [b, o]
#         return logit

class BasicLSTM(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
        super(BasicLSTM, self).__init__()
        print("Building Basic LSTM model...")
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(embed_dim, self.hidden_dim,
                            num_layers=self.n_layers,
                            dropout=dropout_p,
                            batch_first=True)
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        x = self.embed(x)  #  [b, i] -> [b, i, e]
        h_0 = self._init_state(batch_size=x.size(0))
        x, _ = self.lstm(x, h_0)  # [i, b, h]
        h_t = x[:,-1,:]
        self.dropout(h_t)
        logit = self.out(h_t)  # [b, h] -> [b, o]
        return logit
    
    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return (
            weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
            weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()
        )

In [4]:
def train(model, optimizer, train_iter):
    model.train()
    for b, batch in enumerate(train_iter):
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1)  # index align
        optimizer.zero_grad()
        logit = model(x)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()
#         if b % 100 == 0:
#             corrects = (logit.max(1)[1].view(y.size()).data == y.data).sum()
#             accuracy = 100.0 * corrects / batch.batch_size
#             sys.stdout.write(
#                 '\rBatch[%d] - loss: %.6f  acc: %.2f' %
#                 (b, loss.item(), accuracy))

In [5]:
def evaluate(model, val_iter):
    """evaluate model"""
    model.eval()
    corrects, avg_loss = 0, 0
    for batch in val_iter:
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1)  # index align
        logit = model(x)
        loss = F.cross_entropy(logit, y, size_average=False)
        avg_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = avg_loss / size
    accuracy = 100.0 * corrects / size
    return avg_loss, accuracy

# IMDB 데이터셋 가져오기

In [6]:
# load data
print("\nLoading data...")
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.Field(sequential=False, batch_first=True)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
TEXT.build_vocab(train_data, min_freq=5)
LABEL.build_vocab(train_data)

train_iter, test_iter = data.BucketIterator.splits(
        (train_data, test_data), batch_size=BATCH_SIZE,
        shuffle=True, repeat=False)

vocab_size = len(TEXT.vocab)
n_classes = len(LABEL.vocab) - 1


Loading data...


In [7]:
print("[TRAIN]: %d \t [TEST]: %d \t [VOCAB] %d \t [CLASSES] %d"
      % (len(train_iter),len(test_iter), vocab_size, n_classes))

[TRAIN]: 391 	 [TEST]: 391 	 [VOCAB] 46159 	 [CLASSES] 2


In [8]:
model = BasicLSTM(1, 256, vocab_size, 128, n_classes, 0.5).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

print(model)

Building Basic LSTM model...


  "num_layers={}".format(dropout, num_layers))


BasicLSTM(
  (embed): Embedding(46159, 128)
  (dropout): Dropout(p=0.5)
  (lstm): LSTM(128, 256, batch_first=True, dropout=0.5)
  (out): Linear(in_features=256, out_features=2, bias=True)
)


In [9]:
best_val_loss = None
for e in range(1, EPOCHS+1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, test_iter)

    print("\n[Epoch: %d] val_loss:%5.2f | acc:%5.2f" % (e, val_loss, val_accuracy))
    
    # Save the model if the validation loss is the best we've seen so far.
#     if not best_val_loss or val_loss < best_val_loss:
#         if not os.path.isdir("snapshot"):
#             os.makedirs("snapshot")
#         torch.save(model.state_dict(), './snapshot/convcnn.pt')
#         best_val_loss = val_loss




[Epoch: 1] val_loss: 0.63 | acc:65.00

[Epoch: 2] val_loss: 0.49 | acc:77.00

[Epoch: 3] val_loss: 0.34 | acc:85.00

[Epoch: 4] val_loss: 0.32 | acc:87.00

[Epoch: 5] val_loss: 0.39 | acc:87.00

[Epoch: 6] val_loss: 0.42 | acc:86.00

[Epoch: 7] val_loss: 0.51 | acc:86.00

[Epoch: 8] val_loss: 0.56 | acc:86.00

[Epoch: 9] val_loss: 0.69 | acc:86.00

[Epoch: 10] val_loss: 0.75 | acc:85.00

[Epoch: 11] val_loss: 0.68 | acc:86.00

[Epoch: 12] val_loss: 0.69 | acc:86.00

[Epoch: 13] val_loss: 0.73 | acc:86.00

[Epoch: 14] val_loss: 0.80 | acc:86.00

[Epoch: 15] val_loss: 0.81 | acc:86.00

[Epoch: 16] val_loss: 0.89 | acc:86.00

[Epoch: 17] val_loss: 0.94 | acc:86.00

[Epoch: 18] val_loss: 0.95 | acc:86.00

[Epoch: 19] val_loss: 0.70 | acc:86.00

[Epoch: 20] val_loss: 0.74 | acc:86.00

[Epoch: 21] val_loss: 0.90 | acc:85.00

[Epoch: 22] val_loss: 0.78 | acc:86.00

[Epoch: 23] val_loss: 0.87 | acc:86.00

[Epoch: 24] val_loss: 0.89 | acc:86.00

[Epoch: 25] val_loss: 0.93 | acc:86.00

[Epoch: 