In [21]:
!pip install torch==1.13.0 # torchtext.legacy 라이브러리를 불러오기 위해 torch 다시 install
!pip install -U torchtext==0.11.0 #torchtext install

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch==1.13.0
  Downloading torch-1.13.0-cp38-cp38-manylinux1_x86_64.whl (890.2 MB)
[K     |█████▉                          | 161.1 MB 1.3 MB/s eta 0:09:01
[31mERROR: Operation cancelled by user[0m
[?25hLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [31]:
import os
import torch
import torch.nn as nn
from torchtext.legacy import data, datasets 
from google.colab import drive

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# parameters
batch_size = 64
learning_rate = 0.0001


In [59]:
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.Field(sequential=False, batch_first=True)
trainset, validset, testset = datasets.SST.splits(TEXT, LABEL)

TEXT.build_vocab(trainset, min_freq=5)
LABEL.build_vocab(trainset)

#이미 valid가 나뉘어져 있어서 따로 나누지 않음
train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (trainset, validset, testset), batch_size=batch_size,
        shuffle=True, repeat=False)

vocab_size = len(TEXT.vocab)
n_classes = 3 # Positive, Negative Class가 두 개인데 SST는 3개

print("[TrainSet]: %d [ValSet]: %d [TestSet]: %d [Vocab]: %d [Classes] %d"
      % (len(trainset),len(validset), len(testset), vocab_size, n_classes))

[TrainSet]: 8544 [ValSet]: 1101 [TestSet]: 2210 [Vocab]: 3428 [Classes] 3


In [60]:
class BasicGRU(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
        super(BasicGRU, self).__init__()
        self.n_layers = n_layers # 일반적으로는 2

        self.embed = nn.Embedding(n_vocab, embed_dim)

        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)

        self.gru = nn.GRU(embed_dim, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        x = self.embed(x)
        x, _ = self.gru(x)

        h_t = x[:,-1,:]

        self.dropout(h_t)

        out = self.out(h_t)
        return out

In [68]:
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax
batchs_size = [32,64,128] # 변경할 batch
learning_rate = [0.0005, 0.0001, 0.00005] # 변경할 learning rate
total_best = -1 # 전체 중에 가장 정확도가 높은
for ba in batchs_size:
    train_iter.batchs_= ba
    for lr in learning_rate:
        model = BasicGRU(1, 256, vocab_size, 128, n_classes, 0.5).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        count = 0
        best = -1 # 현재 돌리는 batch, learning rate 중에 가장 정확도가 높은
        total_epoch = 0
        epoch = 0

        while count != 3: # count가 3번 그대로일 때 
            epoch = 0
            avg_cost = 0
            for epoch in range(2): #2번씩 돌아가고 validset을 이용해 accuracy를 측정
              total_epoch+=1
              model.train()
              for batch in train_iter:
                  X, Y = batch.text.to(device), batch.label.to(device)
                  Y.data.sub_(1)
                  optimizer.zero_grad()
                  hypothesis = model(X)
                  cost = criterion(hypothesis, Y)
                  cost.backward()
                  optimizer.step()
                  avg_cost += cost / len(train_iter)
                
            with torch.no_grad():
              corrects = 0
              model.eval()
              for batch in val_iter:
                x,y = batch.text.to(device), batch.label.to(device)
                y.data.sub_(1)
                hypothesis = model(x)
                corrects += (hypothesis.max(1)[1].view(y.size()).data == y.data).sum()

            eval_acc = corrects/len(val_iter.dataset)*100.0

            if best < eval_acc: # 현재 batch, learning rate에서 가장 높은 모델 정확도 기억
              best = eval_acc
            else:
              count += 1 # 정확도가 높지 않으면 count 증가 (count가 3이되면 학습 종료 후 다음 batch, learning_rate로 넘어감)
            print(f'[Epoch: {total_epoch}] train_cost = {avg_cost:>.5} eval_acc = {eval_acc:>.5} best = {best:>.5}')

            if total_best<eval_acc:
              total_best = eval_acc
              torch.save(model.state_dict(), '/model_s1.pt') # 전체중에 가장 높은 모델 model_s1에 저장              
              

        print(f'{count}번동안 accuracy 증가가 없어 {total_epoch}epcoh 학습 후 종료합니다.\n')


[Epoch: 2] train_cost = 2.1021 eval_acc = 45.141 best = 45.141
[Epoch: 4] train_cost = 1.8561 eval_acc = 55.767 best = 55.767
[Epoch: 6] train_cost = 1.5231 eval_acc = 59.128 best = 59.128
[Epoch: 8] train_cost = 1.2074 eval_acc = 57.766 best = 59.128
[Epoch: 10] train_cost = 0.89937 eval_acc = 57.493 best = 59.128
[Epoch: 12] train_cost = 0.62754 eval_acc = 53.951 best = 59.128
3번동안 accuracy 증가가 없어 12epcoh 학습 후 종료합니다.

[Epoch: 2] train_cost = 2.103 eval_acc = 39.782 best = 39.782
[Epoch: 4] train_cost = 2.099 eval_acc = 41.417 best = 41.417
[Epoch: 6] train_cost = 2.0953 eval_acc = 39.782 best = 41.417
[Epoch: 8] train_cost = 2.0843 eval_acc = 42.507 best = 42.507
[Epoch: 10] train_cost = 1.9953 eval_acc = 52.861 best = 52.861
[Epoch: 12] train_cost = 1.8415 eval_acc = 53.86 best = 53.86
[Epoch: 14] train_cost = 1.7186 eval_acc = 53.678 best = 53.86
[Epoch: 16] train_cost = 1.6065 eval_acc = 56.403 best = 56.403
[Epoch: 18] train_cost = 1.4804 eval_acc = 56.131 best = 56.403
3번동안 accu

In [70]:
model = BasicGRU(1, 256, vocab_size, 128, n_classes, 0.5).to(device)
model.load_state_dict(torch.load('/model_s1.pt')) # 가장 정확도가 높았던 모델 불러오기

corrects = 0
model.eval()
for batch in test_iter:
    x,y = batch.text.to(device), batch.label.to(device)
    y.data.sub_(1)
    hypothesis = model(x)
    corrects += (hypothesis.max(1)[1].view(y.size()).data == y.data).sum()

test_acc = corrects/len(test_iter.dataset)*100.0
print(f'test_acc = {test_acc:>.9}')

test_acc = 61.1764717
