In [1]:
import random
import logging
from IPython.display import display, HTML

import numpy as np
import pandas as pd
import datasets
from datasets import load_dataset, load_metric, ClassLabel, Sequence
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
model_checkpoint = "klue/bert-base"
batch_size = 32
task = "nli"
MODEL_P = "models/klue-bert-base-augmented.pth"
RANDOM_SEED = 17

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [4]:
dataset = pd.read_csv("data/train_data.csv",index_col=False)
test = pd.read_csv("data/test_data.csv",index_col=False)

In [5]:
for i in range(len(dataset['title'])):
    if "..." in dataset['title'][i]:
        dataset['title'][i] = dataset['title'][i].replace("."," ")
        
for i in range(len(test['title'])):
    if "..." in test['title'][i]:
        test['title'][i] = test['title'][i].replace("."," ")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['title'][i] = dataset['title'][i].replace("."," ")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['title'][i] = test['title'][i].replace("."," ")


In [6]:
dataset_train, dataset_val = train_test_split(dataset,test_size = 0.2,random_state = RANDOM_SEED)

In [7]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_key, label_key, bert_tokenizer):
        
        self.sentences = [ bert_tokenizer(i,truncation=True,return_token_type_ids=False) for i in dataset[sent_key] ]
        
        if not label_key == None:
            self.mode = "train"
        else:
            self.mode = "test"
            
        if self.mode == "train":
            self.labels = [np.int64(i) for i in dataset[label_key]]
        else:
            self.labels = [np.int64(0) for i in dataset[sent_key]]

    def __getitem__(self, i):
        if self.mode == "train":
            self.sentences[i]["label"] = self.labels[i]
            return self.sentences[i]
#             return ( self.sentences[i] , self.labels[i] )
        else:
            return self.sentences[i]

    def __len__(self):
        return (len(self.labels))


In [8]:
data_train = BERTDataset(dataset_train, "title", "topic_idx", tokenizer)
data_val = BERTDataset(dataset_val, "title", "topic_idx", tokenizer)
data_test = BERTDataset(test, "title", None, tokenizer)

In [9]:
num_labels = 7
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

In [10]:
metric = load_metric("glue", "qnli")

In [11]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [16]:
metric_name = "accuracy"

args = TrainingArguments(
    "test-nli",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [13]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [17]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=data_train,
    eval_dataset=data_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [18]:
trainer.train()

***** Running training *****
  Num examples = 36523
  Num Epochs = 1
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1142


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3489,0.321428,0.890812


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-1142
Configuration saved in test-nli\checkpoint-1142\config.json
Model weights saved in test-nli\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-1142\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\checkpoint-1142 (score: 0.8908115211915453).


TrainOutput(global_step=1142, training_loss=0.364667798925824, metrics={'train_runtime': 124.4446, 'train_samples_per_second': 293.488, 'train_steps_per_second': 9.177, 'total_flos': 525534504177084.0, 'train_loss': 0.364667798925824, 'epoch': 1.0})

In [19]:
pred = trainer.predict(data_test)
pred = pred[0]
print(pred)
pred = np.argmax(pred,1)
submission = pd.read_csv('data/sample_submission.csv')
submission['topic_idx'] = pred
submission.to_csv("results/klue-bert-base-test1.csv",index=False)

***** Running Prediction *****
  Num examples = 9131
  Batch size = 32


[[ 2.3010418   0.60454476  2.5831442  ... -1.5192208  -1.9917663
  -2.4488974 ]
 [-1.0358155  -1.4699255   0.8764909  ... -0.9663871  -1.260734
  -1.3360157 ]
 [ 1.9182631   0.56999457  2.9712694  ... -1.461546   -2.4731731
  -0.41492572]
 ...
 [-1.0690801  -1.0014663   3.0400763  ... -0.6253231  -2.2352774
  -1.2795537 ]
 [-0.36679175  0.02533752  3.2850606  ... -0.08727325 -2.4016697
  -0.30308434]
 [ 1.0085557  -0.3574027   2.715603   ... -2.3293097  -1.9081482
   2.2117605 ]]


In [22]:
pred = trainer.predict(data_test)
pred1 = pred[0]
pred2 = pred[0]

pred3 = pred1 + pred2
print(pred3)
len(pred3)

***** Running Prediction *****
  Num examples = 9131
  Batch size = 32


[[ 4.6020837   1.2090895   5.1662884  ... -3.0384417  -3.9835327
  -4.8977947 ]
 [-2.071631   -2.939851    1.7529818  ... -1.9327742  -2.521468
  -2.6720314 ]
 [ 3.8365262   1.1399891   5.9425387  ... -2.923092   -4.9463463
  -0.82985145]
 ...
 [-2.1381602  -2.0029325   6.0801525  ... -1.2506462  -4.470555
  -2.5591073 ]
 [-0.7335835   0.05067504  6.5701213  ... -0.17454651 -4.8033395
  -0.6061687 ]
 [ 2.0171113  -0.7148054   5.431206   ... -4.6586194  -3.8162963
   4.423521  ]]


9131

In [21]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=data_train,
    eval_dataset=data_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

In [22]:
best_run = trainer.hyperparameter_search(n_trials=10, direction="minimize")

[32m[I 2021-08-03 14:30:28,242][0m A new study created in memory with name: no-name-62994dba-8d01-4d96-b32b-f2d84fc4ab12[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3862,0.360997,0.875808
2,0.2548,0.385101,0.877669
3,0.1452,0.467461,0.876246
4,0.0739,0.647516,0.870879
5,0.0306,0.761045,0.870332


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-1142
Configuration saved in test-nli\run-0\checkpoint-1142\config.json
Model weights saved in test-nli\run-0\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-1142\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-2284
Configuration saved in test-nli\run-0\checkpoint-2284\config.json
Model weights saved in test-nli\run-0\checkpoint-2284\pytorch_model.bin
tokenizer config file saved in test-nli\run-0\checkpoint-2284\tokenizer_config.json
Special tokens file saved in test-nli\run-0\checkpoint-2284\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-0\checkpoint-3426
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.353,0.345544,0.880188
2,0.2788,0.343618,0.889059
3,0.211,0.381464,0.887307
4,0.1717,0.442594,0.88665
5,0.1276,0.476842,0.885007


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-2283
Configuration saved in test-nli\run-1\checkpoint-2283\config.json
Model weights saved in test-nli\run-1\checkpoint-2283\pytorch_model.bin
tokenizer config file saved in test-nli\run-1\checkpoint-2283\tokenizer_config.json
Special tokens file saved in test-nli\run-1\checkpoint-2283\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-4566
Configuration saved in test-nli\run-1\checkpoint-4566\config.json
Model weights saved in test-nli\run-1\checkpoint-4566\pytorch_model.bin
tokenizer config file saved in test-nli\run-1\checkpoint-4566\tokenizer_config.json
Special tokens file saved in test-nli\run-1\checkpoint-4566\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-1\checkpoint-6849
C

Epoch,Training Loss,Validation Loss,Accuracy
1,0.5096,0.531906,0.880626
2,0.3606,0.559147,0.884898
3,0.241,0.651644,0.885007
4,0.1281,0.769746,0.880517


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-9131
Configuration saved in test-nli\run-2\checkpoint-9131\config.json
Model weights saved in test-nli\run-2\checkpoint-9131\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-9131\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-9131\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-18262
Configuration saved in test-nli\run-2\checkpoint-18262\config.json
Model weights saved in test-nli\run-2\checkpoint-18262\pytorch_model.bin
tokenizer config file saved in test-nli\run-2\checkpoint-18262\tokenizer_config.json
Special tokens file saved in test-nli\run-2\checkpoint-18262\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-2\checkpoint-2

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3975,0.386779,0.885993


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-3\checkpoint-4566
Configuration saved in test-nli\run-3\checkpoint-4566\config.json
Model weights saved in test-nli\run-3\checkpoint-4566\pytorch_model.bin
tokenizer config file saved in test-nli\run-3\checkpoint-4566\tokenizer_config.json
Special tokens file saved in test-nli\run-3\checkpoint-4566\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-3\checkpoint-4566 (score: 0.38677868247032166).
[32m[I 2021-08-03 15:37:57,267][0m Trial 3 finished with value: 0.8859927718760268 and parameters: {'learning_rate': 5.334074599219582e-06, 'num_train_epochs': 1, 'seed': 21, 'per_device_train_batch_size': 8}. Best is trial 0 with value: 0.8703318366005914.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cac

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3766,0.36282,0.88424


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-4\checkpoint-1142
Configuration saved in test-nli\run-4\checkpoint-1142\config.json
Model weights saved in test-nli\run-4\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\run-4\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\run-4\checkpoint-1142\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\run-4\checkpoint-1142 (score: 0.3628200590610504).
[32m[I 2021-08-03 15:40:02,449][0m Trial 4 finished with value: 0.8842404993976564 and parameters: {'learning_rate': 6.582448154520249e-06, 'num_train_epochs': 1, 'seed': 30, 'per_device_train_batch_size': 32}. Best is trial 0 with value: 0.8703318366005914.[0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cac

Epoch,Training Loss,Validation Loss,Accuracy
1,0.345,0.342851,0.887855


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-03 15:42:55,292][0m Trial 5 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6191,0.586828,0.869675
2,0.416,0.656866,0.869018
3,0.2496,0.690416,0.878217


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-6\checkpoint-9131
Configuration saved in test-nli\run-6\checkpoint-9131\config.json
Model weights saved in test-nli\run-6\checkpoint-9131\pytorch_model.bin
tokenizer config file saved in test-nli\run-6\checkpoint-9131\tokenizer_config.json
Special tokens file saved in test-nli\run-6\checkpoint-9131\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-6\checkpoint-18262
Configuration saved in test-nli\run-6\checkpoint-18262\config.json
Model weights saved in test-nli\run-6\checkpoint-18262\pytorch_model.bin
tokenizer config file saved in test-nli\run-6\checkpoint-18262\tokenizer_config.json
Special tokens file saved in test-nli\run-6\checkpoint-18262\special_tokens_map.json
***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-6\checkpoint-2

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4132,0.328778,0.887307


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-03 16:13:05,347][0m Trial 7 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.411,0.377559,0.889388


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
[32m[I 2021-08-03 16:18:00,829][0m Trial 8 pruned. [0m
Trial:
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6":

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3815,0.380395,0.878108


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\run-9\checkpoint-2283
Configuration saved in test-nli\run-9\checkpoint-2283\config.json
Model weights saved in test-nli\run-9\checkpoint-2283\pytorch_model.bin
tokenizer config file saved in test-nli\run-9\checkpoint-2283\tokenizer_config.json
Special tokens file saved in test-nli\run-9\checkpoint-2283\special_tokens_map.json


KeyboardInterrupt: 

In [18]:
best_run

BestRun(run_id='1', objective=0.892782827729712, hyperparameters={'learning_rate': 3.811972118872508e-05, 'num_train_epochs': 1, 'seed': 3, 'per_device_train_batch_size': 32})

In [19]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()

loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at C:\Users\or7l0/.cache\huggingface\transformers\fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.7cee10e8ea7ffa278f8be4b141000263f2b18795e5ef5e025352b2af6851f8fb
Model config BertConfig {
  "architectures": [
    "BertForPretraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3297,0.315947,0.892783


***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32
Saving model checkpoint to test-nli\checkpoint-1142
Configuration saved in test-nli\checkpoint-1142\config.json
Model weights saved in test-nli\checkpoint-1142\pytorch_model.bin
tokenizer config file saved in test-nli\checkpoint-1142\tokenizer_config.json
Special tokens file saved in test-nli\checkpoint-1142\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from test-nli\checkpoint-1142 (score: 0.892782827729712).


TrainOutput(global_step=1142, training_loss=0.39004160518612835, metrics={'train_runtime': 122.486, 'train_samples_per_second': 298.181, 'train_steps_per_second': 9.324, 'total_flos': 531234008317578.0, 'train_loss': 0.39004160518612835, 'epoch': 1.0})

In [20]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 9131
  Batch size = 32


{'eval_loss': 0.3159468472003937,
 'eval_accuracy': 0.892782827729712,
 'eval_runtime': 7.6659,
 'eval_samples_per_second': 1191.114,
 'eval_steps_per_second': 37.308,
 'epoch': 1.0}

In [21]:
pred = trainer.predict(data_test)
pred = pred[0]
pred = np.argmax(pred,1)
submission = pd.read_csv('data/sample_submission.csv')
submission['topic_idx'] = pred
submission.to_csv("results/klue-bert-hyperparameter-tuning-0729.csv",index=False)

***** Running Prediction *****
  Num examples = 9131
  Batch size = 32
