In [1]:
import random
import logging
from IPython.display import display, HTML
import os

import numpy as np
import pandas as pd
import datasets
from datasets import load_dataset, load_metric, ClassLabel, Sequence
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
def seed_everything(seed: int = 17):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
    
seed_everything(17)

In [3]:
model_checkpoint = "klue/bert-base"
batch_size = 32
task = "nli"
RANDOM_SEED = 17

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [5]:
dataset = pd.read_csv("data/train_data.csv",index_col=False)
dataset_augmented = pd.read_csv("data/train_data_m2m_translation.csv",index_col=False)
test = pd.read_csv("data/test_data.csv",index_col=False)

In [6]:
dataset_augmented["topic_idx"] = dataset["topic_idx"]

In [7]:
dataset_augmented

Unnamed: 0.1,Unnamed: 0,title,topic_idx
0,0,"인도, 핀란드 비행 결승전...비행 승객 이혼",4
1,1,실리콘 밸리는 Google 15th를 통과 할 것입니다.,4
2,2,이란 외부 긴장 결의안 : 미국은 경제 전쟁을 중단 할 것입니다,4
3,3,NYT 클린턴 Co-한국 비즈니스 특수 관계 조명...공과 회사 완료,4
4,4,Shijingping 트럼프는 빠른 무역 협상을 희망합니다.,4
...,...,...,...
45649,45649,KB 금융 US IB 스테펠과 파트너십... 고급 국가 시장 공격,1
45650,45650,뉴 코로나에 있는 서울 고등학교의 첫 번째 에디션.,2
45651,45651,2020 월드컵 영웅,1
45652,45652,국립 박물관에 대한 답변입니다.,2


In [8]:
dataset_train, dataset_val = train_test_split(dataset,test_size = 0.1,random_state = RANDOM_SEED)

In [9]:
dataset_train["index"]

36615    36615
16758    16758
30712    30712
1407      1407
36067    36067
         ...  
25631    25631
42297    42297
33174    33174
34959    34959
10863    10863
Name: index, Length: 41088, dtype: int64

In [10]:
train_dataset_augmented_title = dataset_augmented["title"][dataset_train["index"]]
train_dataset_augmented_topic_idx = dataset_augmented["topic_idx"][dataset_train["index"]]
train_dataset_augmented = pd.DataFrame({'title' : train_dataset_augmented_title.tolist(), "topic_idx" : train_dataset_augmented_topic_idx.tolist()})

In [11]:
dataset_train = pd.concat([dataset_train,train_dataset_augmented])

In [12]:
dataset_train.head()

Unnamed: 0,index,title,topic_idx
36615,36615.0,이란 외무 트럼프 볼턴에 들볶여 알렉산더도 못한 일 하려해,4
16758,16758.0,영상 한국 부도위험지표 12년 만에 최저…북미회담 덕분,6
30712,30712.0,도이치모터스 도이치파이낸셜 주식 160억원에 추가취득,1
1407,1407.0,서울 출신 학자의 외침 사대문 안만 서울이 아니다,3
36067,36067.0,내일날씨 전국 대체로 맑고 더워…낮 최고 22∼31도,3


In [13]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_key, label_key, bert_tokenizer):
        
        self.sentences = [ bert_tokenizer(i,truncation=True,return_token_type_ids=False) for i in dataset[sent_key] ]
        
        if not label_key == None:
            self.mode = "train"
        else:
            self.mode = "test"
            
        if self.mode == "train":
            self.labels = [np.int64(i) for i in dataset[label_key]]
        else:
            self.labels = [np.int64(0) for i in dataset[sent_key]]

    def __getitem__(self, i):
        if self.mode == "train":
            self.sentences[i]["label"] = self.labels[i]
            return self.sentences[i]
#             return ( self.sentences[i] , self.labels[i] )
        else:
            return self.sentences[i]

    def __len__(self):
        return (len(self.labels))


In [14]:
data_train = BERTDataset(dataset_train, "title", "topic_idx", tokenizer)
data_val = BERTDataset(dataset_val, "title", "topic_idx", tokenizer)
data_test = BERTDataset(test, "title", None, tokenizer)

In [15]:
num_labels = 7
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

In [16]:
metric = load_metric("glue", "qnli")

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [18]:
metric_name = "accuracy"

args = TrainingArguments(
    "test-nli",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
)

In [19]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [20]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=data_train,
    eval_dataset=data_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

In [36]:
trainer.train()

***** Running training *****
  Num examples = 82176
  Num Epochs = 5
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12840


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5555,0.329351,0.887648
2,0.449,0.336401,0.886115
3,0.3142,0.370158,0.890714
4,0.2203,0.442731,0.884363
5,0.1596,0.472244,0.885458


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-2568
Configuration saved in test-nli\checkpoint-2568\config.json
Model weights saved in test-nli\checkpoint-2568\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-2568\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-2568\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-5136
Configuration saved in test-nli\checkpoint-5136\config.json
Model weights saved in test-nli\checkpoint-5136\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-5136\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-5136\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-7704
Configuration saved in test-nli\checkpoint-7704\config.json
Model w

TrainOutput(global_step=12840, training_loss=0.3552257332846383, metrics={'train_runtime': 1450.9699, 'train_samples_per_second': 283.176, 'train_steps_per_second': 8.849, 'total_flos': 6944296553352384.0, 'train_loss': 0.3552257332846383, 'epoch': 5.0})

In [21]:
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")

[32m[I 2021-08-05 09:49:18,124][0m A new study created in memory with name: no-name-e1c45500-25f2-4e23-9b46-de32321a38b7[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6661,0.371161,0.878449
2,0.5831,0.346737,0.886991
3,0.5433,0.3335,0.888086
4,0.5264,0.330123,0.890714


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-1284
Configuration saved in test-nli\run-0\checkpoint-1284\config.json
Model weights saved in test-nli\run-0\checkpoint-1284\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-1284\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-1284\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-2568
Configuration saved in test-nli\run-0\checkpoint-2568\config.json
Model weights saved in test-nli\run-0\checkpoint-2568\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-2568\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-2568\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-3852
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.7944,0.45607,0.865747


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-1284
Configuration saved in test-nli\run-1\checkpoint-1284\config.json
Model weights saved in test-nli\run-1\checkpoint-1284\pytorch_model.bin
tokenizer config file saved in test-nli\run-1\checkpoint-1284\tokenizer_config.json
Special tokens file saved in test-nli\run-1\checkpoint-1284\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-1\checkpoint-1284 (score: 0.8657468243539203).
[32m[I 2021-08-05 10:10:00,081][0m Trial 1 finished with value: 0.8657468243539203 and parameters: {'learning_rate': 2.2922190027601913e-06, 'num_train_epochs': 1, 'seed': 20, 'per_device_train_batch_size': 64}. Best is trial 0 with value: 0.8907139728427508.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.ca

Epoch,Training Loss,Validation Loss,Accuracy
1,0.8304,0.586544,0.852825
2,0.5988,0.475445,0.875602


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-10272
Configuration saved in test-nli\run-2\checkpoint-10272\config.json
Model weights saved in test-nli\run-2\checkpoint-10272\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-10272\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-10272\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-20544
Configuration saved in test-nli\run-2\checkpoint-20544\config.json
Model weights saved in test-nli\run-2\checkpoint-20544\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-20544\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-20544\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\r

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5739,0.318675,0.894875
2,0.4612,0.33157,0.893123
3,0.3995,0.341281,0.892685


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-5136
Configuration saved in test-nli\run-3\checkpoint-5136\config.json
Model weights saved in test-nli\run-3\checkpoint-5136\pytorch_model.bin
tokenizer config file saved in test-nli\run-3\checkpoint-5136\tokenizer_config.json
Special tokens file saved in test-nli\run-3\checkpoint-5136\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-10272
Configuration saved in test-nli\run-3\checkpoint-10272\config.json
Model weights saved in test-nli\run-3\checkpoint-10272\pytorch_model.bin
tokenizer config file saved in test-nli\run-3\checkpoint-10272\tokenizer_config.json
Special tokens file saved in test-nli\run-3\checkpoint-10272\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-1

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5255,0.314682,0.892904


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-4\checkpoint-5136
Configuration saved in test-nli\run-4\checkpoint-5136\config.json
Model weights saved in test-nli\run-4\checkpoint-5136\pytorch_model.bin
tokenizer config file saved in test-nli\run-4\checkpoint-5136\tokenizer_config.json
Special tokens file saved in test-nli\run-4\checkpoint-5136\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-4\checkpoint-5136 (score: 0.892904073587385).
[32m[I 2021-08-05 10:57:38,140][0m Trial 4 finished with value: 0.892904073587385 and parameters: {'learning_rate': 4.9011864512716516e-05, 'num_train_epochs': 1, 'seed': 14, 'per_device_train_batch_size': 16}. Best is trial 4 with value: 0.892904073587385.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6263,0.365534,0.88042


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-5\checkpoint-2568
Configuration saved in test-nli\run-5\checkpoint-2568\config.json
Model weights saved in test-nli\run-5\checkpoint-2568\pytorch_model.bin
tokenizer config file saved in test-nli\run-5\checkpoint-2568\tokenizer_config.json
Special tokens file saved in test-nli\run-5\checkpoint-2568\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-5\checkpoint-2568 (score: 0.8804204993429697).
[32m[I 2021-08-05 11:02:32,046][0m Trial 5 finished with value: 0.8804204993429697 and parameters: {'learning_rate': 3.9696788105676335e-06, 'num_train_epochs': 1, 'seed': 7, 'per_device_train_batch_size': 32}. Best is trial 4 with value: 0.892904073587385.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cach

Epoch,Training Loss,Validation Loss,Accuracy
1,0.9168,0.475621,0.861367


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
[32m[I 2021-08-05 11:06:37,523][0m Trial 6 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5918,0.33293,0.890714


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-7\checkpoint-1284
Configuration saved in test-nli\run-7\checkpoint-1284\config.json
Model weights saved in test-nli\run-7\checkpoint-1284\pytorch_model.bin
tokenizer config file saved in test-nli\run-7\checkpoint-1284\tokenizer_config.json
Special tokens file saved in test-nli\run-7\checkpoint-1284\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-7\checkpoint-1284 (score: 0.8907139728427508).
[32m[I 2021-08-05 11:10:49,393][0m Trial 7 finished with value: 0.8907139728427508 and parameters: {'learning_rate': 1.4620848258215489e-05, 'num_train_epochs': 1, 'seed': 29, 'per_device_train_batch_size': 64}. Best is trial 4 with value: 0.892904073587385.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cac

Epoch,Training Loss,Validation Loss,Accuracy
1,0.7023,0.413856,0.888962
2,0.674,0.445024,0.892904
3,0.5141,0.476178,0.89159
4,0.5257,0.493993,0.892466


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-8\checkpoint-20544
Configuration saved in test-nli\run-8\checkpoint-20544\config.json
Model weights saved in test-nli\run-8\checkpoint-20544\pytorch_model.bin
tokenizer config file saved in test-nli\run-8\checkpoint-20544\tokenizer_config.json
Special tokens file saved in test-nli\run-8\checkpoint-20544\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-8\checkpoint-41088
Configuration saved in test-nli\run-8\checkpoint-41088\config.json
Model weights saved in test-nli\run-8\checkpoint-41088\pytorch_model.bin
tokenizer config file saved in test-nli\run-8\checkpoint-41088\tokenizer_config.json
Special tokens file saved in test-nli\run-8\checkpoint-41088\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-8\checkpo

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5618,0.306527,0.897503


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\run-9\checkpoint-1284
Configuration saved in test-nli\run-9\checkpoint-1284\config.json
Model weights saved in test-nli\run-9\checkpoint-1284\pytorch_model.bin
tokenizer config file saved in test-nli\run-9\checkpoint-1284\tokenizer_config.json
Special tokens file saved in test-nli\run-9\checkpoint-1284\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-9\checkpoint-1284 (score: 0.897503285151117).
[32m[I 2021-08-05 12:39:35,172][0m Trial 9 finished with value: 0.897503285151117 and parameters: {'learning_rate': 5.308571720520917e-05, 'num_train_epochs': 1, 'seed': 18, 'per_device_train_batch_size': 64}. Best is trial 9 with value: 0.897503285151117.[0m


In [22]:
best_run

BestRun(run_id='9', objective=0.897503285151117, hyperparameters={'learning_rate': 5.308571720520917e-05, 'num_train_epochs': 1, 'seed': 18, 'per_device_train_batch_size': 64})

In [23]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5618,0.306527,0.897503


***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-1284
Configuration saved in test-nli\checkpoint-1284\config.json
Model weights saved in test-nli\checkpoint-1284\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-1284\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-1284\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\checkpoint-1284 (score: 0.897503285151117).


TrainOutput(global_step=1284, training_loss=0.6057070437992844, metrics={'train_runtime': 249.0942, 'train_samples_per_second': 329.899, 'train_steps_per_second': 5.155, 'total_flos': 1513616259250176.0, 'train_loss': 0.6057070437992844, 'epoch': 1.0})

In [24]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 4566
  Batch size = 32


{'eval_loss': 0.30652689933776855,
 'eval_accuracy': 0.897503285151117,
 'eval_runtime': 3.8515,
 'eval_samples_per_second': 1185.513,
 'eval_steps_per_second': 37.128,
 'epoch': 1.0}

In [25]:
pred = trainer.predict(data_test)
pred = pred[0]
pred = np.argmax(pred,1)
submission = pd.read_csv('data/sample_submission.csv')
submission['topic_idx'] = pred
submission.to_csv("results/klue-bert-hyperparameter-tuning-0804-validation-augmented-dataset-0.1.csv",index=False)

***** Running Prediction *****
  Num examples = 9131
  Batch size = 32
