# Final Project: 2021년 국립국어원 인공지능 언어능력 평가

- [2021년 국립국어원 인공지능 언어능력 평가](https://corpus.korean.go.kr/task/taskList.do?taskId=1&clCd=END_TASK&subMenuId=sub01) 는 9월 1일부터 시작하여 11월 1일까지 마감된 [네 가지 과제에](https://corpus.korean.go.kr/task/taskDownload.do?taskId=1&clCd=END_TASK&subMenuId=sub02) 대한 언어능력 평가 대회
- 여기서 제시된 과제를 그대로 수행하여 그 결과를 [최종 선정된 결과들](https://corpus.korean.go.kr/task/taskLeaderBoard.do?taskId=4&clCd=END_TASK&subMenuId=sub04)과 비교할 수 있도록 수행
- 아직 테스트 셋의 정답이 공식적으로 공개되고 있지 않아, 네 가지 과제의 자료에서 evaluation dataset으로 가지고 성능을 비교할 계획
- 기말 발표전까지 정답셋이 공개될 경우 이 정답셋을 가지고 성능 검증
- Transformers 기반 방법론, 신경망 등 각자 생각한 방법대로 구현 가능
- 현재 대회기간이 종료되어 자료가 다운로드 가능하지 않으니 첨부된 자료 참조
- 개인적으로 하거나 최대 두명까지 그룹 허용. 
- 이 노트북 화일에 이름을 변경하여 작업하고 제출. 제출시 화일명을 FinalProject_[DS또는 CL]_학과_이름.ipynb
- 마감 12월 6일(월) 23:59분까지.
- 12월 7일, 9일 기말 발표 presentation 예정

## 리더보드

- 최종발표전까지 각조는 각 태스크별 실행성능을 **시도된 여러 방법의 결과들을 지속적으로**  [리더보드](https://docs.google.com/spreadsheets/d/1-uenfp5GolpY2Gf0TsFbODvj585IIiFKp9fvYxcfgkY/edit#gid=0)에 해당 팀명(구성원 이름 포함)을 입력하여 공개하여야 함. 
- 최종 마감일에 이 순위와 실제 제출한 프로그램의 수행 결과를 비교하여 성능을 확인

# Import

In [None]:
import torch
from kobert.pytorch_kobert import get_pytorch_kobert_model
from gluonnlp.data import SentencepieceTokenizer
from kobert.utils import get_tokenizer

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, RandomSampler
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, notebook

import pandas as pd
import numpy as np

#transformers
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup
from transformers import ElectraModel, ElectraTokenizer
from transformers import BertTokenizer, BertModel
from transformers import AutoTokenizer,AutoModel, RobertaPreTrainedModel, AutoConfig
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# dataset

In [2]:
# make dataset
class BERTDataset(Dataset):
    def __init__(self, data_path, tokenizer, max_len):
        # dataframe으로 읽어오기
        dataset = pd.read_csv(data_path, delimiter='\t')
        # label을 int로 변경
        dataset.loc[(dataset['ANSWER'] == False), 'ANSWER'] = 0  #False => 0
        dataset.loc[(dataset['ANSWER'] != False), 'ANSWER'] = 1  #True,Nan => 1
        self.input_ids=[]
        self.attention_mask = []
        self.entity_1_mask = []
        self.entity_2_mask = []
        for target, s1, s2, label, s_s1, e_s1, s_s2, e_s2 in zip(dataset['Target'], dataset['SENTENCE1'], dataset['SENTENCE2'], dataset['ANSWER'], dataset['start_s1'], dataset['end_s1'], dataset['start_s2'], dataset['end_s2']):
            sentence = s1[:s_s1]+' [SS1] '+s1[s_s1:e_s1]+' [ES1] '+s1[e_s1:]+' [SEP] '+s2[:s_s2]+' [SS2] '+s2[s_s2:e_s2]+' [ES2] '+s2[e_s2:]
            input_ids, attention_mask = tokenizer(sentence, max_length= max_len, padding ='max_length', return_token_type_ids=False, return_attention_mask=True, return_tensors='pt').values()
            entity_mask = (input_ids>=32000)[0]
            entity_index_list = [i for i, value in enumerate(entity_mask) if value == True]
            entity_1_mask = torch.tensor([[0]*entity_index_list[0]+[1]*(entity_index_list[1]-entity_index_list[0]+1)+[0]*(len(input_ids[0])-entity_index_list[1]-1)])
            entity_2_mask = torch.tensor([[0]*entity_index_list[2]+[1]*(entity_index_list[3]-entity_index_list[2]+1)+[0]*(len(input_ids[0])-entity_index_list[3]-1)])
            self.input_ids.append(input_ids)
            self.attention_mask.append(attention_mask)
            self.entity_1_mask.append(entity_1_mask)
            self.entity_2_mask.append(entity_2_mask)
        self.labels = [torch.tensor(np.int32(i)) for i in dataset.ANSWER]

    def __getitem__(self, i):
        return self.input_ids[i], self.attention_mask[i], self.entity_1_mask[i], self.entity_2_mask[i], self.labels[i]
    def __len__(self):
        return (len(self.labels))

In [3]:
# model 및 tokenizer 불러오기
device = torch.device("cuda:1")
# bertmodel = BertModel.from_pretrained("monologg/koelectra-base-v3-discriminator")  # KoELECTRA-Base-v3
# tokenizer = BertTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

config = AutoConfig.from_pretrained(
            "klue/roberta-large",
            num_labels= 2
        )
bertmodel = AutoModel.from_pretrained("klue/roberta-large", config=config)
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large", return_token_type_ids=False)

# tokenizer에 special tokens 추가
special_tokens_dict = {'additional_special_tokens': ['[SS1]','[ES1]','[SS2]','[ES2]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
bertmodel.resize_token_embeddings(len(tokenizer))


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Embedding(32004, 1024)

In [4]:
# dataloader 생성
#parameters
max_len = 280
batch_size = 2
data_train = BERTDataset('./data/task2/NIKL_SKT_WiC_Train.tsv', tokenizer, max_len)
data_dev = BERTDataset('./data/task2/NIKL_SKT_WiC_Dev.tsv', tokenizer, max_len)
data_test = BERTDataset('./data/task2/NIKL_SKT_WiC_Test.tsv', tokenizer, max_len)

train_sampler = RandomSampler(data_train)
dev_sampler = RandomSampler(data_dev)
test_sampler = RandomSampler(data_test)

train_dataloader = DataLoader(data_train, sampler=train_sampler, batch_size=batch_size, num_workers=4)
dev_dataloader = DataLoader(data_dev, sampler=dev_sampler, batch_size=batch_size, num_workers=4)
test_dataloader = DataLoader(data_test, sampler=test_sampler, batch_size=batch_size, num_workers=4)


# model

In [5]:
# FCLayer
class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.0, use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)

# Bert Model
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 1024,
                 num_classes=2,   ##클래스 수 조정##
                 dr_rate=0.1,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.sen_layer = FCLayer(hidden_size, hidden_size, dr_rate)
        self.entity_1_layer = FCLayer(hidden_size, hidden_size, dr_rate)
        self.entity_2_layer = FCLayer(hidden_size, hidden_size, dr_rate)
        self.classifier = FCLayer(hidden_size*3, num_classes, dr_rate, use_activation=False)
        self.dr_rate = dr_rate
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    @staticmethod
    def entity_average(hidden_output, e_mask):
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]

        # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)
        avg_vector = sum_vector.float() / length_tensor.float()  # broadcasting
        return avg_vector
    
    def forward(self, input_ids, attention_mask, entity_1_mask, entity_2_mask):
        input_ids = input_ids.squeeze(dim=1)
        attention_mask = attention_mask.squeeze(dim=1)
        entity_1_mask = entity_1_mask.squeeze(dim=1)
        entity_2_mask = entity_2_mask.squeeze(dim=1)
        
        output = self.bert(input_ids = input_ids, attention_mask = attention_mask)
        entity_1_h = self.entity_average(output[0], entity_1_mask)
        entity_2_h = self.entity_average(output[0], entity_2_mask)
        
        sentence_h = self.sen_layer(output[1])
        entity_1_h = self.entity_1_layer(entity_1_h)
        entity_2_h = self.entity_1_layer(entity_2_h)
        
        out = torch.cat((sentence_h, entity_1_h, entity_2_h), 1)
        out =self.classifier(out)
        return out


# Training

In [None]:
num_epochs = 10
learning_rate =  1e-5
dr_rate = 0.4
log_interval = 200
warmup_ratio = 0.1
max_grad_norm = 1
weight_decay = 1e-2
warmup_steps = 64
gradient_accumulation_steps = 2
t_total = len(train_dataloader) // gradient_accumulation_steps * num_epochs

# BERT 모델 불러오기
model = BERTClassifier(bertmodel,  dr_rate=dr_rate).to(device)

#optimizer와 schedule 설정
# no_decay = ['bias', 'LayerNorm.weight']
# optimizer_grouped_parameters = [
#     {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
#     {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
# ]
# t_total = len(train_dataloader) * num_epochs
# warmup_step = int(t_total * warmup_ratio)
# scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": weight_decay,
    },
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, eps=1e-8)
scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=warmup_steps,
            num_training_steps=t_total,
        )

loss_fn = nn.CrossEntropyLoss()
sig = nn.Sigmoid()

#정확도 측정을 위한 함수 정의
def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    acc = (preds == labels).mean()
    return {"acc": acc,}

In [7]:
# 학습 진행
for e in range(num_epochs):
    preds = None
    out_label_ids = None
    model.train()
    for batch_id, (input_ids, attention_mask, entity_1_mask, entity_2_mask, label) in enumerate(notebook.tqdm(train_dataloader)):
        optimizer.zero_grad()
        
        # load input to device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        entity_1_mask = entity_1_mask.to(device)
        entity_2_mask = entity_2_mask.to(device)
        label = label.to(device)
        
        # model output
        out = model(input_ids, attention_mask, entity_1_mask, entity_2_mask)
        loss = loss_fn(out.view(-1, 2), label.long().view(-1))
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        # optimizer.step()
        # scheduler.step()  # Update learning rate schedule
        if (batch_id + 1) % gradient_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            scheduler.step()  # Update learning rate schedule
            model.zero_grad()            
        
        # calc accuracy
        if preds is None:
                preds = out.detach().cpu().numpy()
                out_label_ids = label.detach().cpu().numpy()
        else:
            preds = np.append(preds, out.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, label.detach().cpu().numpy(), axis=0)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {}".format(e+1, batch_id+1, loss.data.cpu().numpy()))
    preds = np.argmax(preds, axis=1)
    result = compute_metrics(preds, out_label_ids)
    print("epoch {} train acc {}".format(e+1, result))
    torch.save(model, 'model_1_{}_{}.pt'.format(e+1, result))
    
    preds = None
    out_label_ids = None
    model.eval()
    for batch_id, (input_ids, attention_mask, entity_1_mask, entity_2_mask, label) in enumerate(notebook.tqdm(dev_dataloader)):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        entity_1_mask = entity_1_mask.to(device)
        entity_2_mask = entity_2_mask.to(device)
        label = label.long().to(device)
        
        # model output
        out = model(input_ids, attention_mask, entity_1_mask, entity_2_mask)
        
        # calc accuracy
        if preds is None:
                preds = out.detach().cpu().numpy()
                out_label_ids = label.detach().cpu().numpy()
        else:
            preds = np.append(preds, out.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, label.detach().cpu().numpy(), axis=0)
    preds = np.argmax(preds, axis=1)
    result = compute_metrics(preds, out_label_ids)
    print("epoch {} validation acc {}".format(e+1, result))

  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 0.790594756603241
epoch 1 batch id 201 loss 0.7467426061630249
epoch 1 batch id 401 loss 1.3428559303283691
epoch 1 batch id 601 loss 0.6542983651161194
epoch 1 batch id 801 loss 0.9069172143936157
epoch 1 batch id 1001 loss 1.4783623218536377
epoch 1 batch id 1201 loss 0.06883548200130463
epoch 1 batch id 1401 loss 0.5499914288520813
epoch 1 batch id 1601 loss 1.4313926696777344
epoch 1 batch id 1801 loss 1.196056842803955
epoch 1 batch id 2001 loss 2.0780880451202393
epoch 1 batch id 2201 loss 0.005443713627755642
epoch 1 batch id 2401 loss 1.7908670902252197
epoch 1 batch id 2601 loss 0.006554496008902788
epoch 1 batch id 2801 loss 0.004062008578330278
epoch 1 batch id 3001 loss 2.0045833587646484
epoch 1 batch id 3201 loss 0.0041614375077188015
epoch 1 batch id 3401 loss 0.06080068647861481
epoch 1 batch id 3601 loss 0.002382589504122734
epoch 1 batch id 3801 loss 0.0033015816006809473
epoch 1 train acc {'acc': 0.7210893133711925}


  0%|          | 0/583 [00:00<?, ?it/s]

epoch 1 validation acc {'acc': 0.8404802744425386}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.00527550745755434
epoch 2 batch id 201 loss 0.0036405175924301147
epoch 2 batch id 401 loss 0.001819691970013082
epoch 2 batch id 601 loss 0.006443818099796772
epoch 2 batch id 801 loss 0.0024621961638331413
epoch 2 batch id 1001 loss 0.002642020583152771
epoch 2 batch id 1201 loss 0.005969279911369085
epoch 2 batch id 1401 loss 0.011766495183110237
epoch 2 batch id 1601 loss 0.0028272320050746202
epoch 2 batch id 1801 loss 0.0035058900248259306
epoch 2 batch id 2001 loss 0.06369456648826599
epoch 2 batch id 2201 loss 0.0006226729601621628
epoch 2 batch id 2401 loss 2.837902069091797
epoch 2 batch id 2601 loss 0.00880520511418581
epoch 2 batch id 2801 loss 0.003517232835292816
epoch 2 batch id 3001 loss 0.008019773289561272
epoch 2 batch id 3201 loss 0.018939699977636337
epoch 2 batch id 3401 loss 0.012873920612037182
epoch 2 batch id 3601 loss 0.0026731425896286964
epoch 2 batch id 3801 loss 0.0007746668998152018
epoch 2 train acc {'acc': 0.8776458440887971}


  0%|          | 0/583 [00:00<?, ?it/s]

epoch 2 validation acc {'acc': 0.8370497427101201}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.0005473165074363351
epoch 3 batch id 201 loss 0.00044859369518235326
epoch 3 batch id 401 loss 3.7474963665008545
epoch 3 batch id 601 loss 0.00046206306433305144
epoch 3 batch id 801 loss 0.000991150038316846
epoch 3 batch id 1001 loss 0.0028611482121050358
epoch 3 batch id 1201 loss 0.003729625139385462
epoch 3 batch id 1401 loss 0.000587645685300231
epoch 3 batch id 1601 loss 0.0007251350907608867
epoch 3 batch id 1801 loss 0.0010655898367986083
epoch 3 batch id 2001 loss 0.0026257354766130447
epoch 3 batch id 2201 loss 0.00253859581425786
epoch 3 batch id 2401 loss 0.001768719870597124
epoch 3 batch id 2601 loss 2.9481699466705322
epoch 3 batch id 2801 loss 0.0017502878326922655
epoch 3 batch id 3001 loss 0.0007944323588162661
epoch 3 batch id 3201 loss 0.0003815598611254245
epoch 3 batch id 3401 loss 3.6257262229919434
epoch 3 batch id 3601 loss 0.0021949734073132277
epoch 3 batch id 3801 loss 0.0008049942553043365
epoch 3 train acc {'acc': 0.924109447599

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 3 validation acc {'acc': 0.8739279588336192}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.0014271220425143838
epoch 4 batch id 201 loss 0.0007870995323173702
epoch 4 batch id 401 loss 0.0006624786765314639
epoch 4 batch id 601 loss 3.163292646408081
epoch 4 batch id 801 loss 0.0005943990545347333
epoch 4 batch id 1001 loss 0.0007640764233656228
epoch 4 batch id 1201 loss 0.001174368429929018
epoch 4 batch id 1401 loss 0.015505165793001652
epoch 4 batch id 1601 loss 0.0013265145244076848
epoch 4 batch id 1801 loss 0.0012510617962107062
epoch 4 batch id 2001 loss 0.0030161028262227774
epoch 4 batch id 2201 loss 0.0004979298100806773
epoch 4 batch id 2401 loss 0.01264047808945179
epoch 4 batch id 2601 loss 0.0016506321262568235
epoch 4 batch id 2801 loss 0.000547493458725512
epoch 4 batch id 3001 loss 0.0004561647365335375
epoch 4 batch id 3201 loss 0.0020360038615763187
epoch 4 batch id 3401 loss 0.000336756173055619
epoch 4 batch id 3601 loss 0.00023683741164859384
epoch 4 batch id 3801 loss 0.00022957049077376723
epoch 4 train acc {'acc': 0.9504388

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 4 validation acc {'acc': 0.8876500857632933}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.01683790236711502
epoch 5 batch id 201 loss 0.0002049551549134776
epoch 5 batch id 401 loss 4.1446123123168945
epoch 5 batch id 601 loss 0.0005619748844765127
epoch 5 batch id 801 loss 0.0006912937969900668
epoch 5 batch id 1001 loss 0.00044431054266169667
epoch 5 batch id 1201 loss 0.000928266323171556
epoch 5 batch id 1401 loss 0.000205674470635131
epoch 5 batch id 1601 loss 0.00034805660834535956
epoch 5 batch id 1801 loss 0.0006297264480963349
epoch 5 batch id 2001 loss 0.0005475491052493453
epoch 5 batch id 2201 loss 0.0006058131693862379
epoch 5 batch id 2401 loss 0.0009276518831029534
epoch 5 batch id 2601 loss 0.00020412176672834903
epoch 5 batch id 2801 loss 0.00028772425139322877
epoch 5 batch id 3001 loss 0.0016889076214283705
epoch 5 batch id 3201 loss 0.0002923620049841702
epoch 5 batch id 3401 loss 0.00034968130057677627
epoch 5 batch id 3601 loss 0.0003647725097835064
epoch 5 batch id 3801 loss 0.00019929301925003529
epoch 5 train acc {'acc': 0.

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 5 validation acc {'acc': 0.902229845626072}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.0003923597978428006
epoch 6 batch id 201 loss 0.0004294646787457168
epoch 6 batch id 401 loss 0.00033953547244891524
epoch 6 batch id 601 loss 0.00020238629076629877
epoch 6 batch id 801 loss 0.0005185451591387391
epoch 6 batch id 1001 loss 0.00011538515536813065
epoch 6 batch id 1201 loss 8.171319495886564e-05
epoch 6 batch id 1401 loss 0.0002086538588628173
epoch 6 batch id 1601 loss 9.190299897454679e-05
epoch 6 batch id 1801 loss 0.0003420373541302979
epoch 6 batch id 2001 loss 0.00012265566329006106
epoch 6 batch id 2201 loss 0.0005048395833000541
epoch 6 batch id 2401 loss 0.0003611051070038229
epoch 6 batch id 2601 loss 0.0007300837314687669
epoch 6 batch id 2801 loss 3.683489194372669e-05
epoch 6 batch id 3001 loss 0.0007906716782599688
epoch 6 batch id 3201 loss 0.0006224270327948034
epoch 6 batch id 3401 loss 0.0002892651245929301
epoch 6 batch id 3601 loss 0.0002751533465925604
epoch 6 batch id 3801 loss 0.0002878435770981014
epoch 6 train acc {'acc

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 6 validation acc {'acc': 0.8970840480274442}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 0.0001326664787484333
epoch 7 batch id 201 loss 0.00013541101361624897
epoch 7 batch id 401 loss 0.0007587043801322579
epoch 7 batch id 601 loss 0.00023868808057159185
epoch 7 batch id 801 loss 0.00015031162183731794
epoch 7 batch id 1001 loss 0.00022276813979260623
epoch 7 batch id 1201 loss 0.0001829663524404168
epoch 7 batch id 1401 loss 0.00028347159968689084
epoch 7 batch id 1601 loss 0.00016300519928336143
epoch 7 batch id 1801 loss 0.0004114249022677541
epoch 7 batch id 2001 loss 7.360887684626505e-05
epoch 7 batch id 2201 loss 6.764876161469147e-05
epoch 7 batch id 2401 loss 0.0002599376894067973
epoch 7 batch id 2601 loss 0.00012748473091050982
epoch 7 batch id 2801 loss 7.045007077977061e-05
epoch 7 batch id 3001 loss 0.00011705466022249311
epoch 7 batch id 3201 loss 0.00049797841347754
epoch 7 batch id 3401 loss 2.6778483390808105
epoch 7 batch id 3601 loss 0.00017986400052905083
epoch 7 batch id 3801 loss 0.00034320293343625963
epoch 7 train acc {'ac

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 7 validation acc {'acc': 0.9159519725557461}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 0.00013111824227962643
epoch 8 batch id 201 loss 9.798243991099298e-05
epoch 8 batch id 401 loss 4.482168878894299e-05
epoch 8 batch id 601 loss 0.00020835487521253526
epoch 8 batch id 801 loss 5.948278703726828e-05
epoch 8 batch id 1001 loss 3.540433681337163e-05
epoch 8 batch id 1201 loss 0.0002613885444588959
epoch 8 batch id 1401 loss 0.00015495916886720806
epoch 8 batch id 1601 loss 3.367601311765611e-05
epoch 8 batch id 1801 loss 2.4318376745213754e-05
epoch 8 batch id 2001 loss 0.00013863072672393173
epoch 8 batch id 2201 loss 7.688695040997118e-05
epoch 8 batch id 2401 loss 4.291442019166425e-05
epoch 8 batch id 2601 loss 6.40722646494396e-05
epoch 8 batch id 2801 loss 3.9636190194869414e-05
epoch 8 batch id 3001 loss 9.470416989643127e-05
epoch 8 batch id 3201 loss 6.043647226761095e-05
epoch 8 batch id 3401 loss 8.814987813821062e-05
epoch 8 batch id 3601 loss 5.995970423100516e-05
epoch 8 batch id 3801 loss 0.00010203722195001319
epoch 8 train acc {'a

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 8 validation acc {'acc': 0.9065180102915952}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 4.291411823942326e-05
epoch 9 batch id 201 loss 5.906618753215298e-05
epoch 9 batch id 401 loss 2.8252092306502163e-05
epoch 9 batch id 601 loss 0.00013290331116877496
epoch 9 batch id 801 loss 1.7464008124079555e-05
epoch 9 batch id 1001 loss 3.296063368907198e-05
epoch 9 batch id 1201 loss 3.069591912208125e-05
epoch 9 batch id 1401 loss 3.290121821919456e-05
epoch 9 batch id 1601 loss 0.00010966559057123959
epoch 9 batch id 1801 loss 5.858963413629681e-05
epoch 9 batch id 2001 loss 4.1483770473860204e-05
epoch 9 batch id 2201 loss 7.557579374406487e-05
epoch 9 batch id 2401 loss 2.5093217118410394e-05
epoch 9 batch id 2601 loss 3.92783222196158e-05
epoch 9 batch id 2801 loss 1.609310129424557e-05
epoch 9 batch id 3001 loss 0.0001037039837683551
epoch 9 batch id 3201 loss 0.00013404031051322818
epoch 9 batch id 3401 loss 5.3582709369948134e-05
epoch 9 batch id 3601 loss 1.1444026313256472e-05
epoch 9 batch id 3801 loss 0.0001267710467800498
epoch 9 train acc {

  0%|          | 0/583 [00:00<?, ?it/s]

epoch 9 validation acc {'acc': 0.8996569468267581}


  0%|          | 0/3874 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.5497076674364507e-05
epoch 10 batch id 201 loss 0.00014500756515190005
epoch 10 batch id 401 loss 0.0001320105220656842
epoch 10 batch id 601 loss 1.4006974197400268e-05
epoch 10 batch id 801 loss 7.140310481190681e-05
epoch 10 batch id 1001 loss 7.086715777404606e-05
epoch 10 batch id 1201 loss 5.715917359339073e-05
epoch 10 batch id 1401 loss 8.77934944583103e-05
epoch 10 batch id 1601 loss 1.8596445443108678e-05
epoch 10 batch id 1801 loss 2.9027040000073612e-05
epoch 10 batch id 2001 loss 4.535799234872684e-05
epoch 10 batch id 2201 loss 1.9788545614574105e-05
epoch 10 batch id 2401 loss 3.647694393293932e-05
epoch 10 batch id 2601 loss 6.669516733381897e-05
epoch 10 batch id 2801 loss 6.031803786754608e-05
epoch 10 batch id 3001 loss 7.694640953559428e-05
epoch 10 batch id 3201 loss 1.2338080523477402e-05


# Test
### Test Data로 성능 확인시 다음 셀의 반복문에서 dev dataloader를 test dataloader로 코드 수정 필요합니다 :)

In [7]:
# Test 진행
#정확도 측정을 위한 함수 정의
models_to_ensemble = ['task2_best_model(1).pt',
                      'task2_best_model(2).pt',
                      'task2_best_model(3).pt',
                      'task2_best_model(4).pt',
                      'task2_best_model(5).pt',]
ensemble_preds = {}

dev_dataloader = DataLoader(data_dev, batch_size=batch_size, num_workers=4)
test_dataloader = DataLoader(data_test, batch_size=batch_size, num_workers=4)

def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    acc = (preds == labels).mean()
    return {"acc": acc,}

for model_name in models_to_ensemble:
    preds = None
    out_label_ids = None
    model = BERTClassifier(bertmodel).to(device)
    model = torch.load('./model/'+model_name).to(device)
    model.eval()
    for batch_id, (input_ids, attention_mask, entity_1_mask, entity_2_mask, label) in enumerate(notebook.tqdm(dev_dataloader)):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        entity_1_mask = entity_1_mask.to(device)
        entity_2_mask = entity_2_mask.to(device)
        label = label.long().to(device)

        # model output
        out = model(input_ids, attention_mask, entity_1_mask, entity_2_mask)

        # calc accuracy
        if preds is None:
            preds = out.detach().cpu().numpy()
            out_label_ids = label.detach().cpu().numpy()
        else:
            preds = np.append(preds, out.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, label.detach().cpu().numpy(), axis=0)
    preds = np.argmax(preds, axis=1)
    ensemble_preds[model_name] = preds
    result = compute_metrics(preds, out_label_ids)
    print("model {} validation acc {}".format(model_name, result))

  0%|          | 0/583 [00:00<?, ?it/s]

model task2_best_model(1).pt validation acc {'acc': 0.9159519725557461}


  0%|          | 0/583 [00:00<?, ?it/s]

model task2_best_model(2).pt validation acc {'acc': 0.9108061749571184}


  0%|          | 0/583 [00:00<?, ?it/s]

model task2_best_model(3).pt validation acc {'acc': 0.9116638078902229}


  0%|          | 0/583 [00:00<?, ?it/s]

model task2_best_model(4).pt validation acc {'acc': 0.9159519725557461}


  0%|          | 0/583 [00:00<?, ?it/s]

model task2_best_model(5).pt validation acc {'acc': 0.9039451114922813}


In [8]:
# weighted 앙상블 성능
# weight는 임의로 변경하며 탐색함
weight = [2, 2, 1, 3, 1]

for n, i in enumerate(ensemble_preds.values()):
    if n == 0:
        preds = weight[n] * i
    else:
        preds = preds + weight[n] * i
        # pred = np.around(preds/sum(weight[:n+1]))
        # result = compute_metrics(pred, out_label_ids)
        # print("model ensemble validation acc {}".format(result))
pred = np.around(preds/sum(weight[:n+1]))
result = compute_metrics(pred, out_label_ids)
print("model ensemble validation acc {}".format(result))


model ensemble validation acc {'acc': 0.9322469982847341}
