In [1]:
!pip install transformers
!pip install sentencepiece



In [2]:
import torch
import pandas as pd
import torch.nn as nn
from tqdm import tqdm, tqdm_notebook
import numpy as np
from torch.utils.data import Dataset, DataLoader

from transformers import AutoModel, AutoTokenizer
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [3]:
#GPU 사용
device = torch.device("cuda:0")
device

device(type='cuda', index=0)

In [4]:
train_data = pd.read_csv("/content/drive/MyDrive/자연어처리음성인식/조상연/BERT 감정분류/train_data.csv")
valid_data = pd.read_csv("/content/drive/MyDrive/자연어처리음성인식/조상연/BERT 감정분류/valid_data.csv")
test_data = pd.read_csv("/content/drive/MyDrive/자연어처리음성인식/조상연/BERT 감정분류/test_data.csv")

In [5]:
classes = train_data['label'].unique().tolist()
classes

['기쁨', '불안', '슬픔', '분노']

In [6]:
for i in range(len(train_data)):
    idx = classes.index(train_data.iloc[i][0])
    train_data.iloc[i][0] = idx

In [7]:
for i in range(len(valid_data)):
    idx = classes.index(valid_data.iloc[i][0])
    valid_data.iloc[i][0] = idx

for i in range(len(test_data)):
    idx = classes.index(test_data.iloc[i][0])
    test_data.iloc[i][0] = idx

In [8]:
class BERTDataset(Dataset):
  def __init__(self, data, max_len):
    super(BERTDataset, self).__init__()
    self.data = data
    self.max_len = max_len
    self.tokenizer = AutoTokenizer.from_pretrained("klue/bert-base", use_fast = True)

    self.inputs = [self.convert_token([data.iloc[i][1]]) for i in range(len(self.data))]
    self.label = [np.int32(data.iloc[i]['label']) for i in range(len(self.data))]

  def convert_token(self, data):
    token = self.tokenizer.encode(data[0])
    attention_mask = [1] * len(token) + [0] * (self.max_len - len(token))
    token = token + self.tokenizer.convert_tokens_to_ids(["[PAD]"] * (self.max_len - len(token)))
    return [np.int32(attention_mask), np.int32(token)]

  def __getitem__(self, idx):
    return self.inputs[idx][0], self.inputs[idx][1], self.label[idx]
  
  def __len__(self):
    return len(self.label)

In [9]:
train_dataset = BERTDataset(train_data, 128)

In [10]:
valid_dataset = BERTDataset(valid_data, 128)
test_dataset = BERTDataset(test_data, 128)

In [11]:
class EmotionClassifier(nn.Module):
  def __init__(self, num_classes = 4):
    super(EmotionClassifier, self).__init__()
    self.bert = AutoModel.from_pretrained("klue/bert-base")
    self.bert.requires_grad = True
    self.classifier = nn.Linear(768, num_classes)

  def forward(self, input_ids, attention_mask):
    x = self.bert(input_ids, attention_mask).pooler_output
    x = self.classifier(x)
    return x

In [12]:
model = EmotionClassifier().cuda()

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [14]:
max_len = 128
batch_size = 16
warmup_ratio = 0.1
num_epochs = 20
max_grad_norm = 1
log_interval = 256
learning_rate =  5e-5

In [15]:
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
loss_fn = nn.CrossEntropyLoss()



In [16]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=2)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=2)

In [17]:
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

In [19]:
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [20]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [21]:
best_acc = 0
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (attention_mask, token_ids, label) in enumerate(tqdm_notebook(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        attention_mask = attention_mask.long().to(device)
        label = label.long().to(device)
        out = model(token_ids, attention_mask)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("EPOCH {} [{}/{}]  >>>  loss : {:.6f}\t  train_acc : {:.3f}".format(e+1, batch_id+1,len(train_dataloader),
                                                                           loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("EPOCH {}  >>>  loss : {:.6f}\t  train_acc : {:.3f}".format(e+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    
    model.eval()
    for batch_id, (attention_mask, token_ids, label) in enumerate(tqdm_notebook(valid_dataloader)):
        token_ids = token_ids.long().to(device)
        attention_mask = attention_mask.long().to(device)
        label = label.long().to(device)
        out = model(token_ids, attention_mask)
        test_acc += calc_accuracy(out, label)
    print("EPOCH {}  >>>  test_acc : {:.3f}".format(e+1, test_acc / (batch_id+1)))
    if best_acc < test_acc:
        torch.save(model.state_dict(), '/content/drive/MyDrive/자연어처리음성인식/조상연/BERT 감정분류/KoBert_base_klue.pt')
        best_acc = test_acc

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 1 [1/2158]  >>>  loss : 1.451906	  train_acc : 0.188
EPOCH 1 [257/2158]  >>>  loss : 1.036505	  train_acc : 0.356
EPOCH 1 [513/2158]  >>>  loss : 0.652018	  train_acc : 0.498
EPOCH 1 [769/2158]  >>>  loss : 0.829219	  train_acc : 0.572
EPOCH 1 [1025/2158]  >>>  loss : 0.460903	  train_acc : 0.614
EPOCH 1 [1281/2158]  >>>  loss : 0.704930	  train_acc : 0.642
EPOCH 1 [1537/2158]  >>>  loss : 0.681814	  train_acc : 0.658
EPOCH 1 [1793/2158]  >>>  loss : 0.727212	  train_acc : 0.672
EPOCH 1 [2049/2158]  >>>  loss : 0.958371	  train_acc : 0.682
EPOCH 1  >>>  loss : 0.511582	  train_acc : 0.686


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 1  >>>  test_acc : 0.750


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 2 [1/2158]  >>>  loss : 0.872348	  train_acc : 0.688
EPOCH 2 [257/2158]  >>>  loss : 0.380510	  train_acc : 0.757
EPOCH 2 [513/2158]  >>>  loss : 0.506586	  train_acc : 0.758
EPOCH 2 [769/2158]  >>>  loss : 0.799836	  train_acc : 0.757
EPOCH 2 [1025/2158]  >>>  loss : 0.423356	  train_acc : 0.758
EPOCH 2 [1281/2158]  >>>  loss : 0.594834	  train_acc : 0.763
EPOCH 2 [1537/2158]  >>>  loss : 0.604230	  train_acc : 0.764
EPOCH 2 [1793/2158]  >>>  loss : 0.839139	  train_acc : 0.767
EPOCH 2 [2049/2158]  >>>  loss : 0.907878	  train_acc : 0.770
EPOCH 2  >>>  loss : 0.358375	  train_acc : 0.771


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 2  >>>  test_acc : 0.751


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 3 [1/2158]  >>>  loss : 0.830129	  train_acc : 0.688
EPOCH 3 [257/2158]  >>>  loss : 0.359625	  train_acc : 0.785
EPOCH 3 [513/2158]  >>>  loss : 0.406660	  train_acc : 0.789
EPOCH 3 [769/2158]  >>>  loss : 0.536424	  train_acc : 0.794
EPOCH 3 [1025/2158]  >>>  loss : 0.286575	  train_acc : 0.799
EPOCH 3 [1281/2158]  >>>  loss : 0.363134	  train_acc : 0.806
EPOCH 3 [1537/2158]  >>>  loss : 0.476122	  train_acc : 0.808
EPOCH 3 [1793/2158]  >>>  loss : 0.816877	  train_acc : 0.812
EPOCH 3 [2049/2158]  >>>  loss : 0.770718	  train_acc : 0.815
EPOCH 3  >>>  loss : 0.323722	  train_acc : 0.817


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 3  >>>  test_acc : 0.750


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 4 [1/2158]  >>>  loss : 0.888380	  train_acc : 0.562
EPOCH 4 [257/2158]  >>>  loss : 0.145563	  train_acc : 0.848
EPOCH 4 [513/2158]  >>>  loss : 0.467620	  train_acc : 0.847
EPOCH 4 [769/2158]  >>>  loss : 0.475877	  train_acc : 0.851
EPOCH 4 [1025/2158]  >>>  loss : 0.091233	  train_acc : 0.856
EPOCH 4 [1281/2158]  >>>  loss : 0.123135	  train_acc : 0.860
EPOCH 4 [1537/2158]  >>>  loss : 0.391042	  train_acc : 0.861
EPOCH 4 [1793/2158]  >>>  loss : 0.306924	  train_acc : 0.864
EPOCH 4 [2049/2158]  >>>  loss : 0.690204	  train_acc : 0.868
EPOCH 4  >>>  loss : 0.260597	  train_acc : 0.869


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 4  >>>  test_acc : 0.743


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 5 [1/2158]  >>>  loss : 0.412575	  train_acc : 0.812
EPOCH 5 [257/2158]  >>>  loss : 0.061360	  train_acc : 0.895
EPOCH 5 [513/2158]  >>>  loss : 0.139253	  train_acc : 0.890
EPOCH 5 [769/2158]  >>>  loss : 0.789099	  train_acc : 0.892
EPOCH 5 [1025/2158]  >>>  loss : 0.098815	  train_acc : 0.894
EPOCH 5 [1281/2158]  >>>  loss : 0.064133	  train_acc : 0.897
EPOCH 5 [1537/2158]  >>>  loss : 0.132918	  train_acc : 0.899
EPOCH 5 [1793/2158]  >>>  loss : 0.304352	  train_acc : 0.901
EPOCH 5 [2049/2158]  >>>  loss : 0.312360	  train_acc : 0.902
EPOCH 5  >>>  loss : 0.053696	  train_acc : 0.902


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 5  >>>  test_acc : 0.754


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 6 [1/2158]  >>>  loss : 0.628569	  train_acc : 0.812
EPOCH 6 [257/2158]  >>>  loss : 0.148361	  train_acc : 0.915
EPOCH 6 [513/2158]  >>>  loss : 0.179460	  train_acc : 0.915
EPOCH 6 [769/2158]  >>>  loss : 0.081670	  train_acc : 0.917
EPOCH 6 [1025/2158]  >>>  loss : 0.014859	  train_acc : 0.920
EPOCH 6 [1281/2158]  >>>  loss : 0.098539	  train_acc : 0.922
EPOCH 6 [1537/2158]  >>>  loss : 0.045086	  train_acc : 0.922
EPOCH 6 [1793/2158]  >>>  loss : 0.066190	  train_acc : 0.923
EPOCH 6 [2049/2158]  >>>  loss : 0.352644	  train_acc : 0.925
EPOCH 6  >>>  loss : 0.068816	  train_acc : 0.926


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 6  >>>  test_acc : 0.749


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 7 [1/2158]  >>>  loss : 0.364903	  train_acc : 0.812
EPOCH 7 [257/2158]  >>>  loss : 0.015357	  train_acc : 0.931
EPOCH 7 [513/2158]  >>>  loss : 0.106058	  train_acc : 0.930
EPOCH 7 [769/2158]  >>>  loss : 0.041659	  train_acc : 0.933
EPOCH 7 [1025/2158]  >>>  loss : 0.376177	  train_acc : 0.935
EPOCH 7 [1281/2158]  >>>  loss : 0.005122	  train_acc : 0.937
EPOCH 7 [1537/2158]  >>>  loss : 0.213112	  train_acc : 0.937
EPOCH 7 [1793/2158]  >>>  loss : 0.004725	  train_acc : 0.939
EPOCH 7 [2049/2158]  >>>  loss : 0.004683	  train_acc : 0.940
EPOCH 7  >>>  loss : 0.001450	  train_acc : 0.941


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 7  >>>  test_acc : 0.753


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 8 [1/2158]  >>>  loss : 0.330791	  train_acc : 0.875
EPOCH 8 [257/2158]  >>>  loss : 0.080889	  train_acc : 0.948
EPOCH 8 [513/2158]  >>>  loss : 0.005084	  train_acc : 0.949
EPOCH 8 [769/2158]  >>>  loss : 0.055425	  train_acc : 0.951
EPOCH 8 [1025/2158]  >>>  loss : 0.002558	  train_acc : 0.952
EPOCH 8 [1281/2158]  >>>  loss : 0.011245	  train_acc : 0.951
EPOCH 8 [1537/2158]  >>>  loss : 0.082400	  train_acc : 0.951
EPOCH 8 [1793/2158]  >>>  loss : 0.654130	  train_acc : 0.951
EPOCH 8 [2049/2158]  >>>  loss : 0.165757	  train_acc : 0.952
EPOCH 8  >>>  loss : 0.001865	  train_acc : 0.952


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 8  >>>  test_acc : 0.743


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 9 [1/2158]  >>>  loss : 0.124257	  train_acc : 0.938
EPOCH 9 [257/2158]  >>>  loss : 0.006042	  train_acc : 0.956
EPOCH 9 [513/2158]  >>>  loss : 0.330407	  train_acc : 0.958
EPOCH 9 [769/2158]  >>>  loss : 0.305583	  train_acc : 0.959
EPOCH 9 [1025/2158]  >>>  loss : 0.002559	  train_acc : 0.959
EPOCH 9 [1281/2158]  >>>  loss : 0.221905	  train_acc : 0.960
EPOCH 9 [1537/2158]  >>>  loss : 0.006003	  train_acc : 0.960
EPOCH 9 [1793/2158]  >>>  loss : 0.232333	  train_acc : 0.960
EPOCH 9 [2049/2158]  >>>  loss : 0.013936	  train_acc : 0.961
EPOCH 9  >>>  loss : 0.000764	  train_acc : 0.962


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 9  >>>  test_acc : 0.747


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 10 [1/2158]  >>>  loss : 0.395165	  train_acc : 0.938
EPOCH 10 [257/2158]  >>>  loss : 0.003731	  train_acc : 0.965
EPOCH 10 [513/2158]  >>>  loss : 0.056986	  train_acc : 0.966
EPOCH 10 [769/2158]  >>>  loss : 0.001128	  train_acc : 0.968
EPOCH 10 [1025/2158]  >>>  loss : 0.255641	  train_acc : 0.969
EPOCH 10 [1281/2158]  >>>  loss : 0.006993	  train_acc : 0.968
EPOCH 10 [1537/2158]  >>>  loss : 0.001171	  train_acc : 0.969
EPOCH 10 [1793/2158]  >>>  loss : 0.019122	  train_acc : 0.970
EPOCH 10 [2049/2158]  >>>  loss : 0.001759	  train_acc : 0.971
EPOCH 10  >>>  loss : 0.000473	  train_acc : 0.971


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 10  >>>  test_acc : 0.750


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 11 [1/2158]  >>>  loss : 0.002303	  train_acc : 1.000
EPOCH 11 [257/2158]  >>>  loss : 0.006516	  train_acc : 0.970
EPOCH 11 [513/2158]  >>>  loss : 0.142807	  train_acc : 0.970
EPOCH 11 [769/2158]  >>>  loss : 0.007812	  train_acc : 0.971
EPOCH 11 [1025/2158]  >>>  loss : 0.000900	  train_acc : 0.973
EPOCH 11 [1281/2158]  >>>  loss : 0.001157	  train_acc : 0.974
EPOCH 11 [1537/2158]  >>>  loss : 0.171279	  train_acc : 0.975
EPOCH 11 [1793/2158]  >>>  loss : 0.503950	  train_acc : 0.976
EPOCH 11 [2049/2158]  >>>  loss : 0.005041	  train_acc : 0.976
EPOCH 11  >>>  loss : 0.000664	  train_acc : 0.976


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 11  >>>  test_acc : 0.744


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 12 [1/2158]  >>>  loss : 0.003434	  train_acc : 1.000
EPOCH 12 [257/2158]  >>>  loss : 0.000513	  train_acc : 0.982
EPOCH 12 [513/2158]  >>>  loss : 0.000721	  train_acc : 0.980
EPOCH 12 [769/2158]  >>>  loss : 0.001105	  train_acc : 0.980
EPOCH 12 [1025/2158]  >>>  loss : 0.000878	  train_acc : 0.980
EPOCH 12 [1281/2158]  >>>  loss : 0.004585	  train_acc : 0.981
EPOCH 12 [1537/2158]  >>>  loss : 0.000550	  train_acc : 0.981
EPOCH 12 [1793/2158]  >>>  loss : 0.001102	  train_acc : 0.981
EPOCH 12 [2049/2158]  >>>  loss : 0.000476	  train_acc : 0.982
EPOCH 12  >>>  loss : 0.000280	  train_acc : 0.982


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 12  >>>  test_acc : 0.751


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 13 [1/2158]  >>>  loss : 0.000374	  train_acc : 1.000
EPOCH 13 [257/2158]  >>>  loss : 0.007857	  train_acc : 0.984
EPOCH 13 [513/2158]  >>>  loss : 0.001580	  train_acc : 0.983
EPOCH 13 [769/2158]  >>>  loss : 0.000400	  train_acc : 0.985
EPOCH 13 [1025/2158]  >>>  loss : 0.000583	  train_acc : 0.986
EPOCH 13 [1281/2158]  >>>  loss : 0.001418	  train_acc : 0.986
EPOCH 13 [1537/2158]  >>>  loss : 0.000330	  train_acc : 0.986
EPOCH 13 [1793/2158]  >>>  loss : 0.000159	  train_acc : 0.987
EPOCH 13 [2049/2158]  >>>  loss : 0.000747	  train_acc : 0.987
EPOCH 13  >>>  loss : 0.000149	  train_acc : 0.987


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 13  >>>  test_acc : 0.752


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 14 [1/2158]  >>>  loss : 0.001497	  train_acc : 1.000
EPOCH 14 [257/2158]  >>>  loss : 0.000317	  train_acc : 0.988
EPOCH 14 [513/2158]  >>>  loss : 0.000398	  train_acc : 0.987
EPOCH 14 [769/2158]  >>>  loss : 0.000273	  train_acc : 0.989
EPOCH 14 [1025/2158]  >>>  loss : 0.000253	  train_acc : 0.989
EPOCH 14 [1281/2158]  >>>  loss : 0.000217	  train_acc : 0.989
EPOCH 14 [1537/2158]  >>>  loss : 0.001484	  train_acc : 0.989
EPOCH 14 [1793/2158]  >>>  loss : 0.000151	  train_acc : 0.990
EPOCH 14 [2049/2158]  >>>  loss : 0.000267	  train_acc : 0.990
EPOCH 14  >>>  loss : 0.000116	  train_acc : 0.990


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 14  >>>  test_acc : 0.755


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 15 [1/2158]  >>>  loss : 0.000181	  train_acc : 1.000
EPOCH 15 [257/2158]  >>>  loss : 0.000112	  train_acc : 0.990
EPOCH 15 [513/2158]  >>>  loss : 0.000323	  train_acc : 0.990
EPOCH 15 [769/2158]  >>>  loss : 0.000183	  train_acc : 0.990
EPOCH 15 [1025/2158]  >>>  loss : 0.001274	  train_acc : 0.990
EPOCH 15 [1281/2158]  >>>  loss : 0.000110	  train_acc : 0.990
EPOCH 15 [1537/2158]  >>>  loss : 0.000245	  train_acc : 0.991
EPOCH 15 [1793/2158]  >>>  loss : 0.000117	  train_acc : 0.991
EPOCH 15 [2049/2158]  >>>  loss : 0.000162	  train_acc : 0.991
EPOCH 15  >>>  loss : 0.000056	  train_acc : 0.991


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 15  >>>  test_acc : 0.753


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 16 [1/2158]  >>>  loss : 0.000160	  train_acc : 1.000
EPOCH 16 [257/2158]  >>>  loss : 0.000069	  train_acc : 0.992
EPOCH 16 [513/2158]  >>>  loss : 0.000120	  train_acc : 0.991
EPOCH 16 [769/2158]  >>>  loss : 0.000112	  train_acc : 0.992
EPOCH 16 [1025/2158]  >>>  loss : 0.000124	  train_acc : 0.992
EPOCH 16 [1281/2158]  >>>  loss : 0.000070	  train_acc : 0.992
EPOCH 16 [1537/2158]  >>>  loss : 0.000131	  train_acc : 0.992
EPOCH 16 [1793/2158]  >>>  loss : 0.000285	  train_acc : 0.993
EPOCH 16 [2049/2158]  >>>  loss : 0.000101	  train_acc : 0.992
EPOCH 16  >>>  loss : 0.000043	  train_acc : 0.992


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 16  >>>  test_acc : 0.751


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 17 [1/2158]  >>>  loss : 0.000077	  train_acc : 1.000
EPOCH 17 [257/2158]  >>>  loss : 0.000065	  train_acc : 0.992
EPOCH 17 [513/2158]  >>>  loss : 0.000100	  train_acc : 0.992
EPOCH 17 [769/2158]  >>>  loss : 0.000140	  train_acc : 0.993
EPOCH 17 [1025/2158]  >>>  loss : 0.000180	  train_acc : 0.993
EPOCH 17 [1281/2158]  >>>  loss : 0.000050	  train_acc : 0.994
EPOCH 17 [1537/2158]  >>>  loss : 0.000115	  train_acc : 0.994
EPOCH 17 [1793/2158]  >>>  loss : 0.000069	  train_acc : 0.994
EPOCH 17 [2049/2158]  >>>  loss : 0.000138	  train_acc : 0.994
EPOCH 17  >>>  loss : 0.000044	  train_acc : 0.994


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 17  >>>  test_acc : 0.750


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 18 [1/2158]  >>>  loss : 0.000089	  train_acc : 1.000
EPOCH 18 [257/2158]  >>>  loss : 0.000057	  train_acc : 0.996
EPOCH 18 [513/2158]  >>>  loss : 0.000062	  train_acc : 0.994
EPOCH 18 [769/2158]  >>>  loss : 0.000070	  train_acc : 0.994
EPOCH 18 [1025/2158]  >>>  loss : 0.154562	  train_acc : 0.994
EPOCH 18 [1281/2158]  >>>  loss : 0.000047	  train_acc : 0.994
EPOCH 18 [1537/2158]  >>>  loss : 0.000173	  train_acc : 0.995
EPOCH 18 [1793/2158]  >>>  loss : 0.000165	  train_acc : 0.995
EPOCH 18 [2049/2158]  >>>  loss : 0.001238	  train_acc : 0.994
EPOCH 18  >>>  loss : 0.000041	  train_acc : 0.995


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 18  >>>  test_acc : 0.750


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 19 [1/2158]  >>>  loss : 0.000090	  train_acc : 1.000
EPOCH 19 [257/2158]  >>>  loss : 0.000050	  train_acc : 0.996
EPOCH 19 [513/2158]  >>>  loss : 0.000052	  train_acc : 0.995
EPOCH 19 [769/2158]  >>>  loss : 0.000057	  train_acc : 0.995
EPOCH 19 [1025/2158]  >>>  loss : 0.000062	  train_acc : 0.995
EPOCH 19 [1281/2158]  >>>  loss : 0.000138	  train_acc : 0.996
EPOCH 19 [1537/2158]  >>>  loss : 0.000213	  train_acc : 0.996
EPOCH 19 [1793/2158]  >>>  loss : 0.000077	  train_acc : 0.996
EPOCH 19 [2049/2158]  >>>  loss : 0.000058	  train_acc : 0.996
EPOCH 19  >>>  loss : 0.000041	  train_acc : 0.996


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 19  >>>  test_acc : 0.751


  0%|          | 0/2158 [00:00<?, ?it/s]

EPOCH 20 [1/2158]  >>>  loss : 0.000068	  train_acc : 1.000
EPOCH 20 [257/2158]  >>>  loss : 0.000043	  train_acc : 0.996
EPOCH 20 [513/2158]  >>>  loss : 0.000059	  train_acc : 0.995
EPOCH 20 [769/2158]  >>>  loss : 0.000048	  train_acc : 0.996
EPOCH 20 [1025/2158]  >>>  loss : 0.000050	  train_acc : 0.996
EPOCH 20 [1281/2158]  >>>  loss : 0.000045	  train_acc : 0.996
EPOCH 20 [1537/2158]  >>>  loss : 0.000118	  train_acc : 0.996
EPOCH 20 [1793/2158]  >>>  loss : 0.596808	  train_acc : 0.996
EPOCH 20 [2049/2158]  >>>  loss : 0.000068	  train_acc : 0.996
EPOCH 20  >>>  loss : 0.000041	  train_acc : 0.996


  0%|          | 0/263 [00:00<?, ?it/s]

EPOCH 20  >>>  test_acc : 0.751


In [22]:
model = EmotionClassifier().cuda()
model.load_state_dict(torch.load('/content/drive/MyDrive/자연어처리음성인식/조상연/BERT 감정분류/KoBert_base_klue.pt'))
model.eval()
st = time.time()
test_acc = 0
for batch_id, (attention_mask, token_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
    token_ids = token_ids.long().to(device)
    attention_mask = attention_mask.long().to(device)
    label = label.long().to(device)
    out = model(token_ids, attention_mask)
    max_vals, max_indices = torch.max(out, 1)
    test_acc += (max_indices == label).sum().data.cpu().numpy()
ed = time
print("TEST  >>>  test_acc : {:.3f}".format(test_acc / len(test_dataloader.dataset)))

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/7 [00:00<?, ?it/s]

TEST  >>>  test_acc : 0.790
