[reference](https://github.com/bentrevett/pytorch-sentiment-analysis)

In [0]:
# Importing Libraries
import os
import time
import torch
import torchtext
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
import pandas as pd
import numpy as np

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
path = '/content/drive/My Drive/lt-module'
os.chdir(path)

In [0]:
# Custom Libraries
import DataLoader
import Model

## Define functions - to load dataset & model

In [0]:
# load dataset
def load_dataset(dataset, architecture, batch_size, device, path):
    if dataset == "imdb":
      if architecture == "cnn":
        data = DataLoader.IMDB_CNN(batch_size, device, path)
      elif architecture == "lstm":
        data = DataLoader.IMDB_LSTM(batch_size, device, path)

    elif dataset == "agnews":
        data = DataLoader.AGNEWS(batch_size, device, path)

    else:
        raise ValueError(dataset + "is not supported")

    return data

# load model and set hyperparameters
def load_model(architecture, data_choice, batch_size):

    if architecture == "cnn":
      # hyperparameters
      vocab_size = len(dataset.TEXT.vocab)
      embedding_dim = 100
      n_filters = 100
      filter_sizes = [3,4,5]
      dropout = 0.5
      pad_idx = dataset.TEXT.vocab.stoi[dataset.TEXT.pad_token]
      
      if data_choice == "imdb":
        # binary-class
        output_dim = 1
        model = Model.binaryCNN(vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout, pad_idx)
      
      elif data_choice == "agnews":
        # multi-class
        output_dim = 4
        model = Model.multiCNN(vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout, pad_idx)

      unk_idx = dataset.TEXT.vocab.stoi[dataset.TEXT.unk_token]
      model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
      model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

      return model

    elif architecture == "lstm":
      # hyperparameters
      vocab_size = len(dataset.TEXT.vocab)
      embedding_dim = 100
      hidden_dim = 256
      output_dim = 1
      n_layers = 2
      bidirectional = True
      dropout = 0.5
      pad_idx = dataset.TEXT.vocab.stoi[dataset.TEXT.pad_token]
      
      if data_choice == "imdb":
        # binary-class
        output_dim = 1
      
      elif data_choice == "agnews":
        # multi-class
        output_dim = 4
      
      model = Model.LSTM(vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx)

      unk_idx = dataset.TEXT.vocab.stoi[dataset.TEXT.unk_token]
      model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
      model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

      return model

    # temporary
    elif architecture == "bert":
        return None

    else:
        raise ValueError(architecture + "is not supported")

## Choose dataset & model

In [20]:
# colab에서 돌리기 때문에 우선 arg가 아니라 변수로 넘겨주기
data_choice = "imdb" # 데이터셋 선택
arch_choice = "cnn" # 모델 선택
batch_size = 64 # batch size 선택 - 32 or 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device} device.')

Using cuda device.


In [0]:
# load dataset
dataset = load_dataset(data_choice, arch_choice, batch_size, device, path)

#### CNN

In [0]:
# load model and set hyperparameters
model = load_model(arch_choice, data_choice, batch_size)

In [32]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,620,801 trainable parameters


#### LSTM

In [0]:
# load model and set hyperparameters

arch_choice = "lstm" # 모델 선택
model = load_model(arch_choice, data_choice, batch_size)

In [11]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 4,810,857 trainable parameters


## Weight initialization

In [0]:
# weight initializtion
def initialize_xavier_normal(m):
    
  """
	Function to initialize a layer by picking weights from a xavier normal distribution
	Arguments
	---------
	m : The layer of the neural network
	Returns
	-------
	None
	"""
  
  if type(m) == torch.nn.Conv2d:
    torch.nn.init.xavier_normal_(m.weight)
    m.bias.data.fill_(0)

  elif type(m) in [torch.nn.GRU, torch.nn.LSTM, torch.nn.RNN]:
    for name, param in m.named_parameters():
        if 'weight_ih' in name:
          torch.nn.init.xavier_normal_(param.data)
        elif 'weight_hh' in name:
          torch.nn.init.orthogonal_(param.data)
        elif 'bias' in name:
          param.data.fill_(0)

#### CNN

In [34]:
model.apply(initialize_xavier_normal)

binaryCNN(
  (embedding): Embedding(25002, 100, padding_idx=1)
  (convs): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(5, 100), stride=(1, 1))
  )
  (fc): Linear(in_features=300, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [35]:
module = model.convs
print(list(module.named_parameters()))

[('0.weight', Parameter containing:
tensor([[[[-6.8422e-03,  5.1668e-03,  1.9520e-02,  ...,  3.1158e-03,
            2.8506e-03,  6.7613e-03],
          [ 4.9316e-03,  1.1549e-02,  2.7162e-03,  ...,  5.3963e-03,
            3.5725e-03,  1.9182e-03],
          [ 6.2704e-03, -8.4117e-03,  7.7732e-03,  ...,  7.1641e-03,
            2.5619e-03, -6.2230e-03]]],


        [[[ 1.7756e-04,  1.2011e-04, -6.5797e-03,  ..., -1.3224e-03,
            8.2515e-03,  2.8694e-03],
          [ 1.5003e-03, -9.3861e-04, -1.4909e-02,  ..., -6.4327e-04,
            7.1917e-03,  3.5094e-03],
          [ 3.0033e-03, -1.7152e-02,  7.2248e-03,  ...,  4.6923e-04,
           -9.0005e-03,  5.3794e-03]]],


        [[[-1.3992e-02,  5.2189e-03,  1.1135e-02,  ..., -5.7683e-03,
           -2.3867e-03,  6.5969e-04],
          [-1.3771e-02, -6.0960e-03, -1.2732e-02,  ...,  7.4691e-03,
           -4.2921e-03,  2.0553e-03],
          [-6.1572e-03, -1.6178e-02, -3.7117e-03,  ...,  3.0598e-03,
            2.8792e-03, -9.9900

In [36]:
# 모델 정의는 완료. 상태 체크해보기

# 옵티마이저 초기화
optimizer = optim.Adam(model.parameters())

# 모델의 state_dict 출력
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# 옵티마이저의 state_dict 출력
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
embedding.weight 	 torch.Size([25002, 100])
convs.0.weight 	 torch.Size([100, 1, 3, 100])
convs.0.bias 	 torch.Size([100])
convs.1.weight 	 torch.Size([100, 1, 4, 100])
convs.1.bias 	 torch.Size([100])
convs.2.weight 	 torch.Size([100, 1, 5, 100])
convs.2.bias 	 torch.Size([100])
fc.weight 	 torch.Size([1, 300])
fc.bias 	 torch.Size([1])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [139633953249176, 139633953249248, 139633953249392, 139633953249464, 139633953249536, 139633953249608, 139633953249680, 139633953249752, 139633953250256]}]


In [0]:
torch.save(model.state_dict(), path+"/imdb-cnn.pt")

#### LSTM

In [26]:
model.apply(initialize_xavier_normal)

LSTM(
  (embedding): Embedding(25002, 100, padding_idx=1)
  (rnn): LSTM(100, 256, num_layers=2, dropout=0.5, bidirectional=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [45]:
module = model.rnn
print(list(module.named_parameters()))

[('weight_ih_l0', Parameter containing:
tensor([[-0.0277,  0.0149, -0.1019,  ..., -0.0222, -0.0036,  0.0349],
        [ 0.0113, -0.0220, -0.0514,  ..., -0.0155, -0.0718, -0.0285],
        [ 0.0093, -0.0295, -0.0480,  ..., -0.0430, -0.0107,  0.0107],
        ...,
        [ 0.0081,  0.0120,  0.0099,  ..., -0.0577, -0.0074, -0.0334],
        [ 0.0886, -0.0231, -0.0092,  ..., -0.0009,  0.0762,  0.0129],
        [ 0.0135,  0.0006,  0.0677,  ..., -0.0524,  0.0104, -0.0286]],
       requires_grad=True)), ('weight_hh_l0', Parameter containing:
tensor([[ 2.7410e-02, -2.5747e-02,  6.6797e-02,  ...,  2.2362e-02,
          2.3589e-02, -2.3324e-02],
        [-1.1868e-02,  5.1676e-02,  7.7230e-03,  ...,  3.0328e-04,
          1.5866e-02, -3.5923e-02],
        [ 1.0890e-01,  9.7345e-03,  5.6579e-02,  ..., -1.8172e-02,
         -8.2391e-05, -1.2168e-03],
        ...,
        [-2.6911e-02,  3.2036e-02,  9.6031e-03,  ...,  8.0586e-03,
          1.6242e-02, -1.9189e-02],
        [-4.3686e-02, -2.2019e-02

In [18]:
# 모델 정의는 완료. 상태 체크해보기

# 옵티마이저 초기화
optimizer = optim.Adam(model.parameters())

# 모델의 state_dict 출력
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# 옵티마이저의 state_dict 출력
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
embedding.weight 	 torch.Size([25002, 100])
rnn.weight_ih_l0 	 torch.Size([1024, 100])
rnn.weight_hh_l0 	 torch.Size([1024, 256])
rnn.bias_ih_l0 	 torch.Size([1024])
rnn.bias_hh_l0 	 torch.Size([1024])
rnn.weight_ih_l0_reverse 	 torch.Size([1024, 100])
rnn.weight_hh_l0_reverse 	 torch.Size([1024, 256])
rnn.bias_ih_l0_reverse 	 torch.Size([1024])
rnn.bias_hh_l0_reverse 	 torch.Size([1024])
rnn.weight_ih_l1 	 torch.Size([1024, 512])
rnn.weight_hh_l1 	 torch.Size([1024, 256])
rnn.bias_ih_l1 	 torch.Size([1024])
rnn.bias_hh_l1 	 torch.Size([1024])
rnn.weight_ih_l1_reverse 	 torch.Size([1024, 512])
rnn.weight_hh_l1_reverse 	 torch.Size([1024, 256])
rnn.bias_ih_l1_reverse 	 torch.Size([1024])
rnn.bias_hh_l1_reverse 	 torch.Size([1024])
fc.weight 	 torch.Size([1, 512])
fc.bias 	 torch.Size([1])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [139633820904184, 13963382090425

In [0]:
torch.save(model.state_dict(), path+"/imdb-lstm.pt")

#### both

In [0]:
criterion = torch.nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [0]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [0]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

### For CNN

In [0]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
      
      text = batch.text[0]
      target = batch.label

      # if (text.size()[0] is not 64):
        # print(text.size()[0])
        # continue
        
      optimizer.zero_grad()
      predictions = model(text).squeeze(1)
      loss = criterion(predictions, target)
      acc = binary_accuracy(predictions, target)
      loss.backward()
      optimizer.step()
        
      epoch_loss += loss.item()
      epoch_acc += acc.item()
      
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:
          text = batch.text[0]

          # if (text.size()[0] is not 64):
            # print(text.size()[0])
            # continue

          target = batch.label

          predictions = model(batch.text[0]).squeeze(1)
            
          loss = criterion(predictions, batch.label)
            
          acc = binary_accuracy(predictions, batch.label)

          epoch_loss += loss.item()
          epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [42]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, dataset.train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dataset.valid_iter, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 33s
	Train Loss: 0.550 | Train Acc: 71.01%
	 Val. Loss: 0.422 |  Val. Acc: 80.87%
Epoch: 02 | Epoch Time: 0m 33s
	Train Loss: 0.421 | Train Acc: 80.46%
	 Val. Loss: 0.353 |  Val. Acc: 84.37%
Epoch: 03 | Epoch Time: 0m 32s
	Train Loss: 0.350 | Train Acc: 84.60%
	 Val. Loss: 0.299 |  Val. Acc: 87.39%
Epoch: 04 | Epoch Time: 0m 32s
	Train Loss: 0.285 | Train Acc: 88.21%
	 Val. Loss: 0.282 |  Val. Acc: 87.84%
Epoch: 05 | Epoch Time: 0m 32s
	Train Loss: 0.238 | Train Acc: 90.24%
	 Val. Loss: 0.265 |  Val. Acc: 89.05%
Epoch: 06 | Epoch Time: 0m 33s
	Train Loss: 0.196 | Train Acc: 92.15%
	 Val. Loss: 0.270 |  Val. Acc: 89.15%
Epoch: 07 | Epoch Time: 0m 32s
	Train Loss: 0.148 | Train Acc: 94.32%
	 Val. Loss: 0.276 |  Val. Acc: 89.05%
Epoch: 08 | Epoch Time: 0m 32s
	Train Loss: 0.117 | Train Acc: 95.78%
	 Val. Loss: 0.298 |  Val. Acc: 88.98%
Epoch: 09 | Epoch Time: 0m 32s
	Train Loss: 0.083 | Train Acc: 96.99%
	 Val. Loss: 0.317 |  Val. Acc: 89.18%
Epoch: 10 | Epoch T

In [44]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss, test_acc = evaluate(model, dataset.test_iter, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.273 | Test Acc: 88.88%


### For LSTM

In [0]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
  
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            
            predictions = model(text, text_lengths).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [50]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, dataset.train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dataset.valid_iter, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 2m 15s
	Train Loss: 0.677 | Train Acc: 57.14%
	 Val. Loss: 0.651 |  Val. Acc: 62.69%
Epoch: 02 | Epoch Time: 2m 15s
	Train Loss: 0.665 | Train Acc: 59.42%
	 Val. Loss: 0.604 |  Val. Acc: 69.76%
Epoch: 03 | Epoch Time: 2m 14s
	Train Loss: 0.599 | Train Acc: 67.92%
	 Val. Loss: 0.537 |  Val. Acc: 74.31%
Epoch: 04 | Epoch Time: 2m 16s
	Train Loss: 0.538 | Train Acc: 73.48%
	 Val. Loss: 0.577 |  Val. Acc: 70.87%
Epoch: 05 | Epoch Time: 2m 13s
	Train Loss: 0.393 | Train Acc: 82.42%
	 Val. Loss: 0.341 |  Val. Acc: 85.48%
Epoch: 06 | Epoch Time: 2m 14s
	Train Loss: 0.323 | Train Acc: 86.35%
	 Val. Loss: 0.313 |  Val. Acc: 87.14%
Epoch: 07 | Epoch Time: 2m 14s
	Train Loss: 0.284 | Train Acc: 88.27%
	 Val. Loss: 0.276 |  Val. Acc: 88.85%
Epoch: 08 | Epoch Time: 2m 13s
	Train Loss: 0.255 | Train Acc: 89.82%
	 Val. Loss: 0.284 |  Val. Acc: 89.07%
Epoch: 09 | Epoch Time: 2m 14s
	Train Loss: 0.232 | Train Acc: 90.63%
	 Val. Loss: 0.273 |  Val. Acc: 89.74%
Epoch: 10 | Epoch T

In [51]:
model.load_state_dict(torch.load('tut2-model.pt'))

test_loss, test_acc = evaluate(model, dataset.test_iter, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.245 | Test Acc: 90.54%


### Code to load

In [0]:
model.load_state_dict(torch.load(path+"/test.pt"))
model.eval()