In [28]:
import random
import logging
from IPython.display import display, HTML

import numpy as np
import pandas as pd
import datasets
from datasets import load_dataset, load_metric, ClassLabel, Sequence
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings(action='ignore')

In [14]:
model_checkpoint = "klue/bert-base"
batch_size = 32
task = "nli"
MODEL_P = "models/klue-bert-base-augmented.pth"
RANDOM_SEED = 17

In [None]:
def seed_everything(seed: int = 17):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
    
seed_everything(RANDOM_SEED)

In [15]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [16]:
dataset = pd.read_csv("data/train_data.csv",index_col=False)
test = pd.read_csv("data/test_data.csv",index_col=False)

In [17]:
for i in range(len(dataset['title'])):
    if "..." in dataset['title'][i]:
        dataset['title'][i] = dataset['title'][i].replace("..."," 스포츠")
        
for i in range(len(test['title'])):
    if "..." in test['title'][i]:
        test['title'][i] = test['title'][i].replace("..."," 스포츠")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['title'][i] = dataset['title'][i].replace("..."," 스포츠")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['title'][i] = test['title'][i].replace("..."," 스포츠")


In [18]:
for i in dataset['title']:
    if "스포츠" in i:
        print(i)

NBA 카지노업체와 스폰서 계약…美프로스포츠 사상 처음
셰이크 살만 AFC 회장 평양 방문…최휘 국가체육지도위원장 스포츠
데얀·김치우 OUT 조영욱 IN…서울 과감한 재건 성과 스포츠
아시안게임 만리장성 맞서는 농구 단일팀 이문규 감독 스포츠
한화 4천20일 만의 PS vs 넥센 4년 만의 PO 스포츠
아시안게임 경기장 잔디 점검한 김학범 중동팀에 유리한 스포츠
아가메즈·비예나 등 V리그 외국인 선수 대거 입국…KOVO 스포츠
1보 류현진 한국인 첫 MLB 올스타전 선발로 1이닝 스포츠
아시안게임 여자축구 전가을 황금세대 책임감으로 새 역 스포츠
프로농구 KBL 음주 운전 kt 박철호에 36경기 출전  스포츠
먼저 2승 최태웅 감독 우승 기회 왔다. 철저히 준비 스포츠
홈런왕 루스 유니폼 67억원에 낙찰…역대 스포츠경매 최고가
4쿼터 3점포 4방 OK저축은행 2연패 탈출…신한은행 스포츠
패럴림픽 김정숙 여사 아이스하키 관람…장애인스포츠 많이 알려지길
WNBA 박지수 LA 스파크스 상대로 4분 39초 출전… 스포츠
NBA 경기 도중 난투극 론도 3경기 폴 2경기 출전 정 스포츠
류현진 박찬호 이래 18년 만의 한국인 MLB 개막전 승 스포츠
아시안게임 동점골 이민아 선수들 속상해해…태극 낭자 스포츠
관리의 힘 SK 김광현 5이닝 무실점 컨디션 괜찮아 스포츠
MLB 애틀랜타 9회 2사 후 극적인 뒤집기…18년 만의 스포츠
프로농구 SK 챔프전 사상 최초로 2패 후 3연승 1승 스포츠
베이브 류스 류현진 고교 시절 홈런 1개…통산 타율  스포츠
머리에 사구 NC 손시헌 의식 있어…2∼3일 입원 예정 스포츠
연장 2골 레스터 FA컵 32강 재경기서 더비카운티에 스포츠
한국 남자배구 U21 세계선수권서 중국에 03 패배… 스포츠
아시안게임 라건아 효과 누린 남자농구 몽골 잡고  스포츠
신인 최다홈런 친 알론소 NL신인상 수상…만장일치는 실패 스포츠
아시안게임 이란전 끝낸 김학범호 회복훈련으로 우즈베크 스포츠
세인트루이스 33번 김광현 명문 구단 입단 영광·SK 스포츠


In [19]:
dataset_train, dataset_val = train_test_split(dataset,test_size = 0.2,random_state = RANDOM_SEED)

In [20]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_key, label_key, bert_tokenizer):
        
        self.sentences = [ bert_tokenizer(i,truncation=True,return_token_type_ids=False) for i in dataset[sent_key] ]
        
        if not label_key == None:
            self.mode = "train"
        else:
            self.mode = "test"
            
        if self.mode == "train":
            self.labels = [np.int64(i) for i in dataset[label_key]]
        else:
            self.labels = [np.int64(0) for i in dataset[sent_key]]

    def __getitem__(self, i):
        if self.mode == "train":
            self.sentences[i]["label"] = self.labels[i]
            return self.sentences[i]
#             return ( self.sentences[i] , self.labels[i] )
        else:
            return self.sentences[i]

    def __len__(self):
        return (len(self.labels))


In [21]:
data_train = BERTDataset(dataset_train, "title", "topic_idx", tokenizer)
data_val = BERTDataset(dataset_val, "title", "topic_idx", tokenizer)
data_test = BERTDataset(test, "title", None, tokenizer)

In [22]:
num_labels = 7
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

In [23]:
metric = load_metric("glue", "qnli")

In [24]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [25]:
metric_name = "accuracy"

args = TrainingArguments(
    "test-nli",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
)

In [26]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [27]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=data_train,
    eval_dataset=data_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

In [29]:
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")

[32m[I 2021-08-02 20:57:11,635][0m A new study created in memory with name: no-name-d8c37bed-8fb2-4042-b345-b43a57476330[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4786,0.343172,0.885555
2,0.2817,0.329987,0.887197
3,0.2303,0.330917,0.889169
4,0.1947,0.342439,0.890373


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-571
Configuration saved in test-nli\run-0\checkpoint-571\config.json
Model weights saved in test-nli\run-0\checkpoint-571\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-571\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-571\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-1142
Configuration saved in test-nli\run-0\checkpoint-1142\config.json
Model weights saved in test-nli\run-0\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-1142\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-1713
Config

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4866,0.536607,0.878874
2,0.4315,0.560367,0.883145
3,0.2642,0.696654,0.878765
4,0.1514,0.780365,0.879641
5,0.0737,0.916854,0.875589


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-9131
Configuration saved in test-nli\run-1\checkpoint-9131\config.json
Model weights saved in test-nli\run-1\checkpoint-9131\pytorch_model.bin
tokenizer config file saved in test-nli\run-1\checkpoint-9131\tokenizer_config.json
Special tokens file saved in test-nli\run-1\checkpoint-9131\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-18262
Configuration saved in test-nli\run-1\checkpoint-18262\config.json
Model weights saved in test-nli\run-1\checkpoint-18262\pytorch_model.bin
tokenizer config file saved in test-nli\run-1\checkpoint-18262\tokenizer_config.json
Special tokens file saved in test-nli\run-1\checkpoint-18262\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-2

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3742,0.346169,0.889826
2,0.28,0.345256,0.888183
3,0.2608,0.35834,0.889059
4,0.2249,0.383639,0.890702
5,0.1837,0.392091,0.889716


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-2283
Configuration saved in test-nli\run-2\checkpoint-2283\config.json
Model weights saved in test-nli\run-2\checkpoint-2283\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-2283\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-2283\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-4566
Configuration saved in test-nli\run-2\checkpoint-4566\config.json
Model weights saved in test-nli\run-2\checkpoint-4566\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-4566\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-4566\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-6849
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.366,0.348841,0.878765
2,0.2454,0.358874,0.881612
3,0.1441,0.450891,0.876684
4,0.0763,0.588864,0.876903
5,0.0326,0.689786,0.875041


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-1142
Configuration saved in test-nli\run-3\checkpoint-1142\config.json
Model weights saved in test-nli\run-3\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\run-3\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\run-3\checkpoint-1142\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-2284
Configuration saved in test-nli\run-3\checkpoint-2284\config.json
Model weights saved in test-nli\run-3\checkpoint-2284\pytorch_model.bin
tokenizer config file saved in test-nli\run-3\checkpoint-2284\tokenizer_config.json
Special tokens file saved in test-nli\run-3\checkpoint-2284\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-3426
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3498,0.33885,0.887526
2,0.2842,0.334596,0.887636
3,0.2343,0.330192,0.892345


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-4\checkpoint-1142
Configuration saved in test-nli\run-4\checkpoint-1142\config.json
Model weights saved in test-nli\run-4\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\run-4\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\run-4\checkpoint-1142\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-4\checkpoint-2284
Configuration saved in test-nli\run-4\checkpoint-2284\config.json
Model weights saved in test-nli\run-4\checkpoint-2284\pytorch_model.bin
tokenizer config file saved in test-nli\run-4\checkpoint-2284\tokenizer_config.json
Special tokens file saved in test-nli\run-4\checkpoint-2284\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-4\checkpoint-3426
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.489,0.343016,0.887416


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-5\checkpoint-571
Configuration saved in test-nli\run-5\checkpoint-571\config.json
Model weights saved in test-nli\run-5\checkpoint-571\pytorch_model.bin
tokenizer config file saved in test-nli\run-5\checkpoint-571\tokenizer_config.json
Special tokens file saved in test-nli\run-5\checkpoint-571\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-5\checkpoint-571 (score: 0.8874164932647026).
[32m[I 2021-08-02 22:21:50,091][0m Trial 5 finished with value: 0.8874164932647026 and parameters: {'learning_rate': 1.4501420971229825e-05, 'num_train_epochs': 1, 'seed': 28, 'per_device_train_batch_size': 64}. Best is trial 4 with value: 0.8923447596101194.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\hu

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3907,0.386583,0.887855
2,0.3354,0.414767,0.888512
3,0.248,0.467206,0.88884


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-6\checkpoint-4566
Configuration saved in test-nli\run-6\checkpoint-4566\config.json
Model weights saved in test-nli\run-6\checkpoint-4566\pytorch_model.bin
tokenizer config file saved in test-nli\run-6\checkpoint-4566\tokenizer_config.json
Special tokens file saved in test-nli\run-6\checkpoint-4566\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-6\checkpoint-9132
Configuration saved in test-nli\run-6\checkpoint-9132\config.json
Model weights saved in test-nli\run-6\checkpoint-9132\pytorch_model.bin
tokenizer config file saved in test-nli\run-6\checkpoint-9132\tokenizer_config.json
Special tokens file saved in test-nli\run-6\checkpoint-9132\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-02 22:36:09,074][0m Trial 6 pruned. [0m
T

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5113,0.500885,0.88424


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-02 22:45:19,586][0m Trial 7 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4238,0.338109,0.883036


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-02 22:47:00,544][0m Trial 8 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3627,0.350852,0.884788


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-02 22:49:50,361][0m Trial 9 pruned. [0m


In [30]:
best_run

BestRun(run_id='4', objective=0.8923447596101194, hyperparameters={'learning_rate': 1.155244705586811e-05, 'num_train_epochs': 3, 'seed': 26, 'per_device_train_batch_size': 32})

In [31]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3498,0.33885,0.887526
2,0.2842,0.334596,0.887636
3,0.2343,0.330192,0.892345


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-1142
Configuration saved in test-nli\checkpoint-1142\config.json
Model weights saved in test-nli\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-1142\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-2284
Configuration saved in test-nli\checkpoint-2284\config.json
Model weights saved in test-nli\checkpoint-2284\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-2284\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-2284\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-3426
Configuration saved in test-nli\checkpoint-3426\config.json
Model w

TrainOutput(global_step=3426, training_loss=0.3176889391719286, metrics={'train_runtime': 367.9209, 'train_samples_per_second': 297.806, 'train_steps_per_second': 9.312, 'total_flos': 1644324533546112.0, 'train_loss': 0.3176889391719286, 'epoch': 3.0})

In [32]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32


{'eval_loss': 0.3301921486854553,
 'eval_accuracy': 0.8923447596101194,
 'eval_runtime': 7.7326,
 'eval_samples_per_second': 1180.839,
 'eval_steps_per_second': 36.986,
 'epoch': 3.0}

In [33]:
pred = trainer.predict(data_test)
pred = pred[0]
pred = np.argmax(pred,1)
submission = pd.read_csv('data/sample_submission.csv')
submission['topic_idx'] = pred
submission.to_csv("results/klue-bert-hyperparameter-tuning-with-preprocessing-0803.csv",index=False)

***** Running Prediction *****
  Num examples = 9131
  Batch size = 32
