In [35]:
import os
import math
import random
import pandas as pd
import regex as re
from typing import Optional, Sequence

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split,KFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,\
                             roc_auc_score,f1_score

from tqdm import tqdm
import torch
import torch.nn as nn
from torch import Tensor
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader, Dataset, random_split
from torch.nn import functional as F

from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification,\
                        AutoModelForSequenceClassification,AutoTokenizer, EarlyStoppingCallback,\
                        AutoModel,AutoConfig,T5TokenizerFast,ElectraTokenizerFast,ElectraModel,\
                        T5ForConditionalGeneration

import gc
os.environ["TOKENIZERS_PARALLEISM"] = "false"              

In [22]:
data_dir = "./dataset/"

train = pd.read_csv(os.path.join(data_dir,"train.csv")).drop(['ID'],axis=1)
test = pd.read_csv(os.path.join(data_dir, "test.csv"))

In [28]:
train

Unnamed: 0,문장,유형,극성,시제,확실성,label
0,누군가는 누군가는 살아남고 무대를 떠났다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
1,현재 약 200여 명의 한국생애설계사들이 교육컨설턴트 상담사 등으로 활동하고 있으며...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
2,한 활성화돼 말했다 업계의 어려움을 이해해주어 공항 면세점 임대료 감면을 연장해 주...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
3,캐릭터 나열 순서 설정 기능을 통해 각 이용자가 보유한 캐릭터 중 원하는 캐릭터를 ...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,정도로 관람객이 거의 없을 주말이지만 한적했다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
...,...,...,...,...,...,...
64692,생명을 다투는 위기에 정쟁은 무의미하다,추론형,긍정,현재,확실,추론형-긍정-현재-확실
64693,조이시티는 코로나19 관련 상황이 안정화될 부서별로 각 때까지 탄력적으로 재택근무를...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
64694,이 가운데 심사를 거쳐 사회적기업으로 인증된 19곳이다 곳은,사실형,긍정,현재,확실,사실형-긍정-현재-확실
64695,코로나19 사태가 언제까지 오리무중이다 지속될지,추론형,긍정,현재,확실,추론형-긍정-현재-확실


In [13]:
train.유형.unique(), train.극성.unique(), train.시제.unique(), train.확실성.unique()

(array(['사실형', '추론형', '예측형', '대화형'], dtype=object),
 array(['긍정', '부정', '미정'], dtype=object),
 array(['현재', '과거', '미래'], dtype=object),
 array(['확실', '불확실'], dtype=object))

In [23]:
train['문장'] = train['문장'].apply(lambda x: re.sub("[^ A-Za-z0-9가-힣]","",x))
train['문장'] = train['문장'].apply(lambda x: re.sub("[ +]"," ",x))

test['문장'] = test['문장'].apply(lambda x: re.sub("[^ A-Za-z0-9가-힣]","",x))
test['문장'] = test['문장'].apply(lambda x: re.sub("[ +]"," ",x))

In [24]:
train['문장']

0                       075포인트 금리 인상은 1994년 이후 28년 만에 처음이다
1        이어 앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정이라며 그 이전이라도 ...
2        정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30에서 37까지 확대한다
3        서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만 하루 만에 차...
4                  익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다
                               ...                        
16536    신동덤은 신비한 동물사전과 해리 포터 시리즈를 잇는 마법 어드벤처물로 전편에 이어 ...
16537    수족냉증은 어릴 때부터 심했으며 관절은 어디 한 곳이 아니고 목 어깨 팔꿈치 등 허...
16538    김금희 소설가는 계약서 조정이 그리 어려운가 작가를 격려한다면서 그런 문구 하나 고...
16539    1만명이 넘는 방문자수를 기록한 이번 전시회는 총 77개 작품을 넥슨 사옥을 그대로...
16540                                           목민심서의 내용이다
Name: 문장, Length: 16541, dtype: object

In [25]:
# https://github.com/catSirup/KorEDA/blob/master/eda.py
def swap_word(new_words):
    random_idx_1 = random.randint(0, len(new_words)-1)
    random_idx_2 = random_idx_1
    counter = 0

    while random_idx_2 == random_idx_1:
        random_idx_2 = random.randint(0, len(new_words)-1)
        counter += 1
        if counter > 3:
            return new_words

    new_words[random_idx_1], new_words[random_idx_2] = new_words[random_idx_2], new_words[random_idx_1]
    return new_words

def random_swap(words, n):
    new_words = words.copy()
    for _ in range(n):
        new_words = swap_word(new_words)
    return new_words

def text_aug(sentence, alpha_rs = 0.1, num_aug=3):
    words = sentence.split(' ')
    words = [word for word in words if word != ""]
    num_words = len(words)

    augmented_sentences = []
    num_new_per_technique = num_aug

    n_rs = max(1, int(alpha_rs*num_words))

    for _ in range(num_new_per_technique):
        a_words = random_swap(words, n_rs)
        augmented_sentences.append(" ".join(a_words))

    augmented_sentences = [sentence for sentence in augmented_sentences]
    random.shuffle(augmented_sentences)

    if num_aug >= 1:
        augmented_sentences = augmented_sentences[:num_aug]
    else:
        keep_prob = num_aug / len(augmented_sentences)
        augmented_sentences = [s for s in augmented_sentences if random.uniform(0, 1) < keep_prob]
    return augmented_sentences

aug = train['문장'].apply(lambda x: text_aug(x))

In [26]:
tmp1 = train.copy()
tmp1['문장'] = list(map(lambda x: x[0], aug))

tmp2 = train.copy()
tmp2['문장'] = list(map(lambda x: x[1], aug))

tmp3 = train.copy()
tmp3['문장'] = list(map(lambda x: x[2], aug))

In [27]:
train = pd.concat([train,tmp1,tmp2,tmp3]).drop_duplicates(keep='first').sample(frac=1).reset_index(drop=True)
train

Unnamed: 0,문장,유형,극성,시제,확실성,label
0,누군가는 누군가는 살아남고 무대를 떠났다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
1,현재 약 200여 명의 한국생애설계사들이 교육컨설턴트 상담사 등으로 활동하고 있으며...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
2,한 활성화돼 말했다 업계의 어려움을 이해해주어 공항 면세점 임대료 감면을 연장해 주...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
3,캐릭터 나열 순서 설정 기능을 통해 각 이용자가 보유한 캐릭터 중 원하는 캐릭터를 ...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,정도로 관람객이 거의 없을 주말이지만 한적했다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
...,...,...,...,...,...,...
64692,생명을 다투는 위기에 정쟁은 무의미하다,추론형,긍정,현재,확실,추론형-긍정-현재-확실
64693,조이시티는 코로나19 관련 상황이 안정화될 부서별로 각 때까지 탄력적으로 재택근무를...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
64694,이 가운데 심사를 거쳐 사회적기업으로 인증된 19곳이다 곳은,사실형,긍정,현재,확실,사실형-긍정-현재-확실
64695,코로나19 사태가 언제까지 오리무중이다 지속될지,추론형,긍정,현재,확실,추론형-긍정-현재-확실


In [41]:
device = torch.device("cuda")
checkpoint = "kykim/electra-kor-base"
if checkpoint == 'paust/pko-t5-large':
    tokenozer = T5TokenizerFast.from_pretrained(checkpoint)
elif checkpoint == 'kykim/electra_kor_base':
    tokenizer = ElectraTokenizerFast.from_pretrained(checkpoint)
else:
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)

length = train['문장'].str.len().max()

In [55]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self,idx):
        item = {key:torch.tensor(val[idx]) for key,val in self.encodings.items()}
        if self.labels:
            st_type = self.labels['type'][idx]
            st_polarity = self.labels['polarity'][idx]
            st_tense = self.labels["tense"][idx]
            st_certainty = self.labels["certainty"][idx]
            item["labels"] = torch.tensor(st_type), torch.tensor(st_polarity), torch.tensor(st_tense), torch.tensor(st_certainty)
        
        return item
    def __len__(self):
        return len(self.encodings["input_ids"])

In [53]:
class FocalLoss(nn.Module):
    """ Focal Loss, as described in https://arxiv.org/abs/1708.02002.
    It is essentially an enhancement to cross entropy loss and is
    useful for classification tasks when there is a large class imbalance.
    x is expected to contain raw, unnormalized scores for each class.
    y is expected to contain class labels.
    Shape:
        - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0.
        - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0.
    """

    def __init__(self,
                 alpha: Optional[Tensor] = None,
                 gamma: float = 0.,
                 reduction: str = 'mean',
                 ignore_index: int = -100):
        """Constructor.
        Args:
            alpha (Tensor, optional): Weights for each class. Defaults to None.
            gamma (float, optional): A constant, as described in the paper.
                Defaults to 0.
            reduction (str, optional): 'mean', 'sum' or 'none'.
                Defaults to 'mean'.
            ignore_index (int, optional): class label to ignore.
                Defaults to -100.
        """
        if reduction not in ('mean', 'sum', 'none'):
            raise ValueError(
                'Reduction must be one of: "mean", "sum", "none".')

        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
        self.reduction = reduction

        self.nll_loss = nn.NLLLoss(
            weight=alpha, reduction='none', ignore_index=ignore_index)

    def __repr__(self):
        arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction']
        arg_vals = [self.__dict__[k] for k in arg_keys]
        arg_strs = [f'{k}={v!r}' for k, v in zip(arg_keys, arg_vals)]
        arg_str = ', '.join(arg_strs)
        return f'{type(self).__name__}({arg_str})'

    def forward(self, x: Tensor, y: Tensor) -> Tensor:
        if x.ndim > 2:
            # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C)
            c = x.shape[1]
            x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c)
            # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,)
            y = y.view(-1)

        unignored_mask = y != self.ignore_index
        y = y[unignored_mask]
        if len(y) == 0:
            return torch.tensor(0.)
        x = x[unignored_mask]

        # compute weighted cross entropy term: -alpha * log(pt)
        # (alpha is already part of self.nll_loss)
        log_p = F.log_softmax(x, dim=-1)
        ce = self.nll_loss(log_p, y)

        # get true class column from each row
        all_rows = torch.arange(len(x))
        log_pt = log_p[all_rows, y]

        # compute focal term: (1 - pt)^gamma
        pt = log_pt.exp()
        focal_term = (1 - pt)**self.gamma

        # the full loss: -alpha * ((1 - pt)^gamma) * log(pt)
        loss = focal_term * ce

        if self.reduction == 'mean':
            loss = loss.mean()
        elif self.reduction == 'sum':
            loss = loss.sum()

        return loss

def focal_loss(alpha: Optional[Sequence] = None,
               gamma: float = 0.,
               reduction: str = 'mean',
               ignore_index: int = -100,
               device='cpu',
               dtype=torch.float32) -> FocalLoss:
    """Factory function for FocalLoss.
    Args:
        alpha (Sequence, optional): Weights for each class. Will be converted
            to a Tensor if not None. Defaults to None.
        gamma (float, optional): A constant, as described in the paper.
            Defaults to 0.
        reduction (str, optional): 'mean', 'sum' or 'none'.
            Defaults to 'mean'.
        ignore_index (int, optional): class label to ignore.
            Defaults to -100.
        device (str, optional): Device to move alpha to. Defaults to 'cpu'.
        dtype (torch.dtype, optional): dtype to cast alpha to.
            Defaults to torch.float32.
    Returns:
        A FocalLoss object
    """
    if alpha is not None:
        if not isinstance(alpha, Tensor):
            alpha = torch.tensor(alpha)
        alpha = alpha.to(device=device, dtype=dtype)

    fl = FocalLoss(
        alpha=alpha,
        gamma=gamma,
        reduction=reduction,
        ignore_index=ignore_index)
    return fl
        
def compute_metrics(pred):
    # label = [[cls1,cls2,...],]
    # preds = n list
    focal_loss = FocalLoss()
    labels = pred.label_ids
    preds = pred.predictions
    f1 = []
    focal = []
    for i in range(4):
        # focal.append(focal_loss(torch.tensor(preds[i], dtype=torch.float), torch.tensor(labels[::, i],dtype=torch.float)))
        f1.append(f1_score(y_true = labels[::, i], y_pred = preds[i], average='weighted'))
    return {
        #'focal': sum(focal),
        'f1-sum': sum(f1)/4
    }


In [34]:
config = AutoConfig.from_pretrained(checkpoint)
config.name_or_path="kr.kim"
print(config)

ElectraConfig {
  "_name_or_path": "kr.kim",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 42000
}



In [48]:
유형 = LabelEncoder()
유형.fit(train['유형'])

극성 = LabelEncoder()
극성.fit(train['극성'])

시제 = LabelEncoder()
시제.fit(train['시제'])

확실성 = LabelEncoder()
확실성.fit(train['확실성'])

def encoding(X_train, X_val):
    X_train["유형"] = 유형.transform(X_train['유형'])
    X_val["유형"] = 유형.transform(X_val["유형"])

    X_train["극성"] = 극성.transform(X_train['극성'])
    X_val["극성"] = 극성.transform(X_val["극성"])

    X_train["시제"] = 시제.transform(X_train['시제'])
    X_val["시제"] = 시제.transform(X_val["시제"])

    X_train["확실성"] = 확실성.transform(X_train['확실성'])
    X_val["확실성"] = 확실성.transform(X_val["확실성"])

    train_labels = {
        'type': X_train['유형'].values,
        'polarity': X_train['극성'].values,
        'tense': X_train['시제'].values,
        'certainty': X_train['확실성'].values
    }

    val_labels = {
        'type': X_val['유형'].values,
        'polarity': X_val['극성'].values,
        'tense': X_val['시제'].values,
        'certainty': X_val['확실성'].values
    }

    return train_labels, val_labels

In [21]:
def recent_file(path):
    file_name_and_time_lst = []
    # 해당 경로에 있는 파일들을 생성시간을 함꼐 리스트로 넣어주고 역순으로 정렬
    for f_name in os.listdir(f"{path}"):
        written_time = os.path.getctime(f"{path}/{f_name}")
        file_name_and_time_lst.append((f_name, written_time))

    sorted_file_lst = sorted(file_name_and_time_lst, key=lambda x:x[1], reverse=True)
    recent_file = sorted_file_lst[0]
    recent_file_name = recent_file[0]
    return f"{path}/{recent_file_name}"

In [22]:
test_results = []

In [57]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        if checkpoint == 'monologg/kobigbird-bert-case':
            config.attention_type = "original_full"
            self.base_model = AutoModel.from_pretrained(checkpoint, config=config)
        elif checkpoint == 'kykim/electra-kor-base':
            self.base_model = ElectraModel.from_pretrained(checkpoint, config=config)
        elif checkpoint == 'paust/pko-t5-large':
            self.base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, config=config)
        else:
            self.base_model = AutoModel.from_pretrained(checkpoint, config=config)
       
        try:
            self.out = self.base_model.encoder.layer[-1].output.dense.out_features
        except:
            self.out = 768
        
        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=4)
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=3)
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=3)
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=2)
        )

    def forward(self, input_ids, attention_mask, labels=None, token_type_ids=None):
        if checkpoint == "paust/pko-t5-large":
            x = self.base_model(input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=input_ids)[0]
        else:
            x = self.base_model(input_ids=input_ids, attention_mask=attention_mask)[0]

        type_output = self.type_classifier(x[:,0,:].view(-1,self.out))
        polarity_output = self.polarity_classifier(x[:,0,:].view(-1,self.out))
        tense_output = self.tense_classifier(x[:,0,:].view(-1,self.out))
        certainty_output = self.certainty_classifier(x[:,0,:].view(-1,self.out))
        return type_output, polarity_output, tense_output, certainty_output


In [61]:
lr = 1e-4
stop = 3
epoch = 1000
batch = 16
seed = 42

In [24]:
gc.collect()
torch.cuda.empty_cache()

In [None]:
test_tokenized = tokenizer(test["문장"].tolist(),padding=True, truncation=True, max_length=length, return_tensors="pt")
test_dataset = Cust

In [59]:

class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args,**kwargs)

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(torch.int64)
        type_logit, polarity_logit, tense_logit, certainty_logit = model(**inputs)

        criterion = {
            'type' : FocalLoss().to(device),
            'polarity': FocalLoss().to(device),
            'tense': FocalLoss().to(device),
            'certainty': FocalLoss().to(device)
        }
        loss = criterion['type'](type_logit, labels[::,0]) + \
                criterion['polarity'](polarity_logit, labels[::,1]) + \
                criterion['tense'](tense_logit,labels[::,2]) + \
                criterion['certainty'](certainty_logit,labels[::,3])
        
        outputs = None,\
                torch.argmax(type_logit, dim=1),\
                torch.argmax(polarity_logit, dim=1),\
                torch.argmax(tense_logit, dim=1),\
                torch.argmax(certainty_logit,dim=1)
        
        return (loss, outputs) if return_outputs else loss

In [62]:
print(f'hidden_layers : {config.num_hidden_layers}')
# config.num_hidden_layers = 10
print(f'now_hidden_layers : {config.num_hidden_layers}')

kf = KFold(n_splits=5, random_state=seed, shuffle=True)
for i, (train_index, test_index) in enumerate(kf.split(train)):
    print(f'Round {i}')
    X_train, X_val = train.loc[train_index, :], train.loc[test_index, :]
    train_labels, val_labels = encoding(X_train, X_val)
    token_train, token_val = tokenizer(X_train.문장.tolist(), padding=True, truncation=True, max_length=length), tokenizer(X_val.문장.tolist(), padding=True, truncation=True, max_length=length)
    train_dataset, val_dataset = CustomDataset(token_train, train_labels), CustomDataset(token_val, val_labels)
    model = CustomModel()
    model.to(device)
    args = TrainingArguments(run_name = f'fold_{i}',                                # 모델이름
                             output_dir= f"fold_{i}",                               # 모델저장경로
                             evaluation_strategy="steps",                           # 모델의 평가를 언제 진행할지
                             eval_steps=100,                                        # 500 스텝 마다 모델 평가
                             save_steps=100,                                        # 500 스텝 마다 모델 저장
                             save_total_limit = 2,                                  # 저장할 모델의 갯수
                             logging_steps=100,                                     # 학습로스 로깅
                             per_device_train_batch_size=batch,                     # GPU에 학습데이터를 몇개씩 올려서 학습할지
                             per_device_eval_batch_size=batch,                      # GPU에 학습데이터를 몇개씩 올려서 평가할지
                             gradient_accumulation_steps=16,                        # 가상배치
                             num_train_epochs=epoch,                                # 전체 학습 진행 횟수
                             learning_rate=lr,                                      # 학습률 정의 
                             seed=seed,                                             # seed
                             load_best_model_at_end=True,                           # 평가기준 스코어가 좋은 모델만 저장할지 여부
                             fp16=True,
                             do_train=True,
                             do_eval=True,
                             # metric_for_best_model
                             # greater_is_better = True,
    )
    trainer = CustomTrainer(model=model,
                            args=args,                                                        # args
                            train_dataset=train_dataset,                                      # 학습데이터
                            eval_dataset=val_dataset,                                         # validation 데이터
                            compute_metrics=compute_metrics,                                  # 모델 평가 방식
                            callbacks=[EarlyStoppingCallback(early_stopping_patience=stop)],) # callback
    trainer.train()
    del model
    del trainer
    gc.collect() # python 자원 관리 
    torch.cuda.empty_cache() # gpu 자원관리   

hidden_layers : 12
now_hidden_layers : 12
Round 0


loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from

  0%|          | 0/202000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 1.3559, 'learning_rate': 9.995148514851486e-05, 'epoch': 0.49}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.858784019947052, 'eval_f1-sum': 0.9218885124817623, 'eval_runtime': 54.984, 'eval_samples_per_second': 235.341, 'eval_steps_per_second': 14.713, 'epoch': 0.49}


***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.7944, 'learning_rate': 9.99019801980198e-05, 'epoch': 0.99}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.6629049777984619, 'eval_f1-sum': 0.9387740821106906, 'eval_runtime': 54.7857, 'eval_samples_per_second': 236.193, 'eval_steps_per_second': 14.767, 'epoch': 0.99}


***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.5496, 'learning_rate': 9.985247524752476e-05, 'epoch': 1.48}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.5150865912437439, 'eval_f1-sum': 0.956678700559104, 'eval_runtime': 54.9087, 'eval_samples_per_second': 235.664, 'eval_steps_per_second': 14.734, 'epoch': 1.48}


Deleting older checkpoint [fold_0\checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.4493, 'learning_rate': 9.980346534653467e-05, 'epoch': 1.98}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.3610726594924927, 'eval_f1-sum': 0.9689525806355737, 'eval_runtime': 55.0585, 'eval_samples_per_second': 235.023, 'eval_steps_per_second': 14.693, 'epoch': 1.98}


Deleting older checkpoint [fold_0\checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.2656, 'learning_rate': 9.975396039603961e-05, 'epoch': 2.47}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.26368364691734314, 'eval_f1-sum': 0.9799914058641107, 'eval_runtime': 55.0269, 'eval_samples_per_second': 235.158, 'eval_steps_per_second': 14.702, 'epoch': 2.47}


Deleting older checkpoint [fold_0\checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.211, 'learning_rate': 9.970445544554457e-05, 'epoch': 2.97}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.19969169795513153, 'eval_f1-sum': 0.9854046894301614, 'eval_runtime': 55.1722, 'eval_samples_per_second': 234.539, 'eval_steps_per_second': 14.663, 'epoch': 2.97}


Deleting older checkpoint [fold_0\checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.142, 'learning_rate': 9.965495049504952e-05, 'epoch': 3.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.15942446887493134, 'eval_f1-sum': 0.9892919359399652, 'eval_runtime': 55.0297, 'eval_samples_per_second': 235.146, 'eval_steps_per_second': 14.701, 'epoch': 3.46}


Deleting older checkpoint [fold_0\checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.1253, 'learning_rate': 9.960544554455446e-05, 'epoch': 3.96}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.14259210228919983, 'eval_f1-sum': 0.9900406565420652, 'eval_runtime': 55.0331, 'eval_samples_per_second': 235.131, 'eval_steps_per_second': 14.7, 'epoch': 3.96}


Deleting older checkpoint [fold_0\checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0888, 'learning_rate': 9.955594059405942e-05, 'epoch': 4.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.11629699170589447, 'eval_f1-sum': 0.9924473966322801, 'eval_runtime': 55.0709, 'eval_samples_per_second': 234.97, 'eval_steps_per_second': 14.69, 'epoch': 4.46}


Deleting older checkpoint [fold_0\checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0787, 'learning_rate': 9.950643564356436e-05, 'epoch': 4.95}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.10234030336141586, 'eval_f1-sum': 0.9936460187474765, 'eval_runtime': 55.0339, 'eval_samples_per_second': 235.128, 'eval_steps_per_second': 14.7, 'epoch': 4.95}


Deleting older checkpoint [fold_0\checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0716, 'learning_rate': 9.945693069306932e-05, 'epoch': 5.45}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.11696507781744003, 'eval_f1-sum': 0.9923428861125986, 'eval_runtime': 55.0309, 'eval_samples_per_second': 235.141, 'eval_steps_per_second': 14.701, 'epoch': 5.45}


Deleting older checkpoint [fold_0\checkpoint-900] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0602, 'learning_rate': 9.940742574257427e-05, 'epoch': 5.94}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.10813390463590622, 'eval_f1-sum': 0.9930136163981995, 'eval_runtime': 55.0856, 'eval_samples_per_second': 234.907, 'eval_steps_per_second': 14.686, 'epoch': 5.94}


Deleting older checkpoint [fold_0\checkpoint-1100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0483, 'learning_rate': 9.935792079207921e-05, 'epoch': 6.44}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.09795583039522171, 'eval_f1-sum': 0.9939947592137364, 'eval_runtime': 55.1848, 'eval_samples_per_second': 234.485, 'eval_steps_per_second': 14.66, 'epoch': 6.44}


Deleting older checkpoint [fold_0\checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0522, 'learning_rate': 9.930841584158417e-05, 'epoch': 6.93}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06633509695529938, 'eval_f1-sum': 0.9962331696069653, 'eval_runtime': 55.0221, 'eval_samples_per_second': 235.178, 'eval_steps_per_second': 14.703, 'epoch': 6.93}


Deleting older checkpoint [fold_0\checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0455, 'learning_rate': 9.925891089108912e-05, 'epoch': 7.43}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.08093179017305374, 'eval_f1-sum': 0.9951484081398407, 'eval_runtime': 55.064, 'eval_samples_per_second': 234.999, 'eval_steps_per_second': 14.692, 'epoch': 7.43}


Deleting older checkpoint [fold_0\checkpoint-1300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0455, 'learning_rate': 9.920940594059406e-05, 'epoch': 7.92}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0814175009727478, 'eval_f1-sum': 0.9952888656700581, 'eval_runtime': 55.0307, 'eval_samples_per_second': 235.142, 'eval_steps_per_second': 14.701, 'epoch': 7.92}


Deleting older checkpoint [fold_0\checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0429, 'learning_rate': 9.915990099009902e-05, 'epoch': 8.42}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_0\checkpoint-1700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.08053115010261536, 'eval_f1-sum': 0.9954975067676588, 'eval_runtime': 55.1048, 'eval_samples_per_second': 234.825, 'eval_steps_per_second': 14.681, 'epoch': 8.42}


Deleting older checkpoint [fold_0\checkpoint-1600] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from fold_0\checkpoint-1400 (score: 0.06633509695529938).


{'train_runtime': 6299.0261, 'train_samples_per_second': 8216.667, 'train_steps_per_second': 32.068, 'train_loss': 0.2603979595969705, 'epoch': 8.42}
Round 1


loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from

  0%|          | 0/202000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 1.4502, 'learning_rate': 9.995247524752476e-05, 'epoch': 0.49}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.874043345451355, 'eval_f1-sum': 0.9198834972790705, 'eval_runtime': 45.3054, 'eval_samples_per_second': 285.617, 'eval_steps_per_second': 17.857, 'epoch': 0.49}


***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.8112, 'learning_rate': 9.990346534653466e-05, 'epoch': 0.99}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.7000640034675598, 'eval_f1-sum': 0.9353608776508942, 'eval_runtime': 45.1816, 'eval_samples_per_second': 286.4, 'eval_steps_per_second': 17.906, 'epoch': 0.99}


***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.5797, 'learning_rate': 9.985396039603961e-05, 'epoch': 1.48}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.5210573077201843, 'eval_f1-sum': 0.95372913303703, 'eval_runtime': 45.2765, 'eval_samples_per_second': 285.8, 'eval_steps_per_second': 17.868, 'epoch': 1.48}


Deleting older checkpoint [fold_1\checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.4521, 'learning_rate': 9.980445544554457e-05, 'epoch': 1.98}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.33668917417526245, 'eval_f1-sum': 0.972255748695996, 'eval_runtime': 45.1781, 'eval_samples_per_second': 286.422, 'eval_steps_per_second': 17.907, 'epoch': 1.98}


Deleting older checkpoint [fold_1\checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.2605, 'learning_rate': 9.975495049504951e-05, 'epoch': 2.47}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.2536270022392273, 'eval_f1-sum': 0.9805850331164396, 'eval_runtime': 45.1931, 'eval_samples_per_second': 286.327, 'eval_steps_per_second': 17.901, 'epoch': 2.47}


Deleting older checkpoint [fold_1\checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.2085, 'learning_rate': 9.970544554455447e-05, 'epoch': 2.97}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.18162892758846283, 'eval_f1-sum': 0.9870275645978556, 'eval_runtime': 45.155, 'eval_samples_per_second': 286.568, 'eval_steps_per_second': 17.916, 'epoch': 2.97}


Deleting older checkpoint [fold_1\checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.1341, 'learning_rate': 9.965594059405942e-05, 'epoch': 3.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.14383730292320251, 'eval_f1-sum': 0.9893884071860086, 'eval_runtime': 45.1891, 'eval_samples_per_second': 286.352, 'eval_steps_per_second': 17.903, 'epoch': 3.46}


Deleting older checkpoint [fold_1\checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.1125, 'learning_rate': 9.960643564356436e-05, 'epoch': 3.96}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.1136552169919014, 'eval_f1-sum': 0.9917952343021681, 'eval_runtime': 45.1864, 'eval_samples_per_second': 286.369, 'eval_steps_per_second': 17.904, 'epoch': 3.96}


Deleting older checkpoint [fold_1\checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0813, 'learning_rate': 9.955693069306932e-05, 'epoch': 4.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.112478107213974, 'eval_f1-sum': 0.9924574733641937, 'eval_runtime': 45.1155, 'eval_samples_per_second': 286.819, 'eval_steps_per_second': 17.932, 'epoch': 4.46}


Deleting older checkpoint [fold_1\checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0798, 'learning_rate': 9.950742574257427e-05, 'epoch': 4.95}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0806511789560318, 'eval_f1-sum': 0.9948046683672169, 'eval_runtime': 45.1465, 'eval_samples_per_second': 286.623, 'eval_steps_per_second': 17.919, 'epoch': 4.95}


Deleting older checkpoint [fold_1\checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0612, 'learning_rate': 9.945792079207921e-05, 'epoch': 5.45}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07395507395267487, 'eval_f1-sum': 0.9951790642707224, 'eval_runtime': 45.0259, 'eval_samples_per_second': 287.39, 'eval_steps_per_second': 17.967, 'epoch': 5.45}


Deleting older checkpoint [fold_1\checkpoint-900] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0545, 'learning_rate': 9.940841584158417e-05, 'epoch': 5.94}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07821055501699448, 'eval_f1-sum': 0.9950675880952772, 'eval_runtime': 45.1669, 'eval_samples_per_second': 286.493, 'eval_steps_per_second': 17.911, 'epoch': 5.94}


Deleting older checkpoint [fold_1\checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0441, 'learning_rate': 9.935891089108911e-05, 'epoch': 6.44}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06529417634010315, 'eval_f1-sum': 0.9959964702054454, 'eval_runtime': 45.1576, 'eval_samples_per_second': 286.552, 'eval_steps_per_second': 17.915, 'epoch': 6.44}


Deleting older checkpoint [fold_1\checkpoint-1100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0455, 'learning_rate': 9.930940594059407e-05, 'epoch': 6.93}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06213758513331413, 'eval_f1-sum': 0.9959326683821402, 'eval_runtime': 45.2217, 'eval_samples_per_second': 286.146, 'eval_steps_per_second': 17.89, 'epoch': 6.93}


Deleting older checkpoint [fold_1\checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0368, 'learning_rate': 9.925990099009902e-05, 'epoch': 7.43}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05005645379424095, 'eval_f1-sum': 0.9967151919787364, 'eval_runtime': 45.2095, 'eval_samples_per_second': 286.223, 'eval_steps_per_second': 17.894, 'epoch': 7.43}


Deleting older checkpoint [fold_1\checkpoint-1300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0362, 'learning_rate': 9.921039603960396e-05, 'epoch': 7.92}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.045572422444820404, 'eval_f1-sum': 0.9971250968170952, 'eval_runtime': 44.6165, 'eval_samples_per_second': 290.027, 'eval_steps_per_second': 18.132, 'epoch': 7.92}


Deleting older checkpoint [fold_1\checkpoint-1400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0411, 'learning_rate': 9.916089108910892e-05, 'epoch': 8.42}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05418479070067406, 'eval_f1-sum': 0.9966416738814305, 'eval_runtime': 44.6121, 'eval_samples_per_second': 290.056, 'eval_steps_per_second': 18.134, 'epoch': 8.42}


Deleting older checkpoint [fold_1\checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.034, 'learning_rate': 9.911138613861388e-05, 'epoch': 8.91}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06221068277955055, 'eval_f1-sum': 0.9954866032658906, 'eval_runtime': 44.6606, 'eval_samples_per_second': 289.741, 'eval_steps_per_second': 18.114, 'epoch': 8.91}


Deleting older checkpoint [fold_1\checkpoint-1700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12940
  Batch size = 16


{'loss': 0.0318, 'learning_rate': 9.906188118811882e-05, 'epoch': 9.41}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_1\checkpoint-1900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0550999715924263, 'eval_f1-sum': 0.9962331253153185, 'eval_runtime': 44.6692, 'eval_samples_per_second': 289.685, 'eval_steps_per_second': 18.111, 'epoch': 9.41}


Deleting older checkpoint [fold_1\checkpoint-1800] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from fold_1\checkpoint-1600 (score: 0.045572422444820404).


{'train_runtime': 6826.0131, 'train_samples_per_second': 7582.318, 'train_steps_per_second': 29.593, 'train_loss': 0.2397543666237279, 'epoch': 9.41}
Round 2


loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from

  0%|          | 0/202000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 1.3062, 'learning_rate': 9.995099009900992e-05, 'epoch': 0.49}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.8655519485473633, 'eval_f1-sum': 0.9172180603341777, 'eval_runtime': 54.402, 'eval_samples_per_second': 237.841, 'eval_steps_per_second': 14.871, 'epoch': 0.49}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.755, 'learning_rate': 9.990148514851486e-05, 'epoch': 0.99}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.6428796648979187, 'eval_f1-sum': 0.9440351152079496, 'eval_runtime': 54.4256, 'eval_samples_per_second': 237.737, 'eval_steps_per_second': 14.864, 'epoch': 0.99}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.5268, 'learning_rate': 9.985247524752476e-05, 'epoch': 1.48}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.4829269349575043, 'eval_f1-sum': 0.9584137546222965, 'eval_runtime': 54.4416, 'eval_samples_per_second': 237.668, 'eval_steps_per_second': 14.86, 'epoch': 1.48}


Deleting older checkpoint [fold_2\checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.3927, 'learning_rate': 9.980297029702971e-05, 'epoch': 1.98}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.32709458470344543, 'eval_f1-sum': 0.9743798680733649, 'eval_runtime': 54.4783, 'eval_samples_per_second': 237.507, 'eval_steps_per_second': 14.85, 'epoch': 1.98}


Deleting older checkpoint [fold_2\checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.2321, 'learning_rate': 9.975346534653465e-05, 'epoch': 2.47}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.25484323501586914, 'eval_f1-sum': 0.9807159197623884, 'eval_runtime': 54.611, 'eval_samples_per_second': 236.93, 'eval_steps_per_second': 14.814, 'epoch': 2.47}


Deleting older checkpoint [fold_2\checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1995, 'learning_rate': 9.970396039603961e-05, 'epoch': 2.97}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.1834181547164917, 'eval_f1-sum': 0.9855926903557563, 'eval_runtime': 54.4925, 'eval_samples_per_second': 237.446, 'eval_steps_per_second': 14.846, 'epoch': 2.97}


Deleting older checkpoint [fold_2\checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1277, 'learning_rate': 9.965445544554456e-05, 'epoch': 3.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.16390928626060486, 'eval_f1-sum': 0.9884156716475401, 'eval_runtime': 54.7575, 'eval_samples_per_second': 236.296, 'eval_steps_per_second': 14.774, 'epoch': 3.46}


Deleting older checkpoint [fold_2\checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.108, 'learning_rate': 9.96049504950495e-05, 'epoch': 3.96}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.14229179918766022, 'eval_f1-sum': 0.9896749370016259, 'eval_runtime': 54.4752, 'eval_samples_per_second': 237.521, 'eval_steps_per_second': 14.851, 'epoch': 3.96}


Deleting older checkpoint [fold_2\checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0785, 'learning_rate': 9.955544554455446e-05, 'epoch': 4.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.11362999677658081, 'eval_f1-sum': 0.9922496561045782, 'eval_runtime': 54.5957, 'eval_samples_per_second': 236.997, 'eval_steps_per_second': 14.818, 'epoch': 4.46}


Deleting older checkpoint [fold_2\checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0756, 'learning_rate': 9.950594059405942e-05, 'epoch': 4.95}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.09128563106060028, 'eval_f1-sum': 0.9940119036764306, 'eval_runtime': 54.4959, 'eval_samples_per_second': 237.431, 'eval_steps_per_second': 14.845, 'epoch': 4.95}


Deleting older checkpoint [fold_2\checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0604, 'learning_rate': 9.945643564356436e-05, 'epoch': 5.45}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.08712072670459747, 'eval_f1-sum': 0.9941604577522534, 'eval_runtime': 54.5545, 'eval_samples_per_second': 237.176, 'eval_steps_per_second': 14.829, 'epoch': 5.45}


Deleting older checkpoint [fold_2\checkpoint-900] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0536, 'learning_rate': 9.940693069306931e-05, 'epoch': 5.94}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.092976413667202, 'eval_f1-sum': 0.9938721085426397, 'eval_runtime': 54.533, 'eval_samples_per_second': 237.269, 'eval_steps_per_second': 14.835, 'epoch': 5.94}


Deleting older checkpoint [fold_2\checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0526, 'learning_rate': 9.935742574257426e-05, 'epoch': 6.44}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.08130865544080734, 'eval_f1-sum': 0.9950141070465592, 'eval_runtime': 54.4651, 'eval_samples_per_second': 237.565, 'eval_steps_per_second': 14.854, 'epoch': 6.44}


Deleting older checkpoint [fold_2\checkpoint-1100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0425, 'learning_rate': 9.930792079207921e-05, 'epoch': 6.93}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07907763868570328, 'eval_f1-sum': 0.9950601217029448, 'eval_runtime': 54.4746, 'eval_samples_per_second': 237.524, 'eval_steps_per_second': 14.851, 'epoch': 6.93}


Deleting older checkpoint [fold_2\checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0383, 'learning_rate': 9.925841584158417e-05, 'epoch': 7.43}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.08902303874492645, 'eval_f1-sum': 0.9943879122440558, 'eval_runtime': 54.7067, 'eval_samples_per_second': 236.516, 'eval_steps_per_second': 14.788, 'epoch': 7.43}


Deleting older checkpoint [fold_2\checkpoint-1300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0442, 'learning_rate': 9.920891089108911e-05, 'epoch': 7.92}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06647900491952896, 'eval_f1-sum': 0.9956057557247857, 'eval_runtime': 54.454, 'eval_samples_per_second': 237.613, 'eval_steps_per_second': 14.857, 'epoch': 7.92}


Deleting older checkpoint [fold_2\checkpoint-1400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0328, 'learning_rate': 9.915940594059406e-05, 'epoch': 8.42}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05870518833398819, 'eval_f1-sum': 0.9964395297048784, 'eval_runtime': 54.4991, 'eval_samples_per_second': 237.417, 'eval_steps_per_second': 14.844, 'epoch': 8.42}


Deleting older checkpoint [fold_2\checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0438, 'learning_rate': 9.910990099009902e-05, 'epoch': 8.91}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07686098664999008, 'eval_f1-sum': 0.9952706560365169, 'eval_runtime': 54.3772, 'eval_samples_per_second': 237.949, 'eval_steps_per_second': 14.878, 'epoch': 8.91}


Deleting older checkpoint [fold_2\checkpoint-1600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0333, 'learning_rate': 9.906039603960396e-05, 'epoch': 9.41}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-1900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05526718869805336, 'eval_f1-sum': 0.9970395380345984, 'eval_runtime': 54.3877, 'eval_samples_per_second': 237.903, 'eval_steps_per_second': 14.875, 'epoch': 9.41}


Deleting older checkpoint [fold_2\checkpoint-1700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0293, 'learning_rate': 9.901089108910892e-05, 'epoch': 9.9}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06203579530119896, 'eval_f1-sum': 0.9968496696530452, 'eval_runtime': 54.3871, 'eval_samples_per_second': 237.906, 'eval_steps_per_second': 14.875, 'epoch': 9.9}


Deleting older checkpoint [fold_2\checkpoint-1800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0304, 'learning_rate': 9.896138613861387e-05, 'epoch': 10.4}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-2100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05569898709654808, 'eval_f1-sum': 0.9967541074684829, 'eval_runtime': 54.676, 'eval_samples_per_second': 236.649, 'eval_steps_per_second': 14.796, 'epoch': 10.4}


Deleting older checkpoint [fold_2\checkpoint-2000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0281, 'learning_rate': 9.891188118811881e-05, 'epoch': 10.89}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_2\checkpoint-2200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07455954700708389, 'eval_f1-sum': 0.9958983545072672, 'eval_runtime': 54.3813, 'eval_samples_per_second': 237.931, 'eval_steps_per_second': 14.876, 'epoch': 10.89}


Deleting older checkpoint [fold_2\checkpoint-2100] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from fold_2\checkpoint-1900 (score: 0.05526718869805336).


{'train_runtime': 8104.9076, 'train_samples_per_second': 6386.007, 'train_steps_per_second': 24.923, 'train_loss': 0.19507073684172196, 'epoch': 10.89}
Round 3


loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from

  0%|          | 0/202000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 1.2806, 'learning_rate': 9.995148514851486e-05, 'epoch': 0.49}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.8242678642272949, 'eval_f1-sum': 0.9226496412959626, 'eval_runtime': 54.5729, 'eval_samples_per_second': 237.096, 'eval_steps_per_second': 14.824, 'epoch': 0.49}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.7525, 'learning_rate': 9.99029702970297e-05, 'epoch': 0.99}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.6305246949195862, 'eval_f1-sum': 0.9451804084302122, 'eval_runtime': 54.5061, 'eval_samples_per_second': 237.386, 'eval_steps_per_second': 14.842, 'epoch': 0.99}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.5315, 'learning_rate': 9.985445544554456e-05, 'epoch': 1.48}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.45931705832481384, 'eval_f1-sum': 0.9586984777526081, 'eval_runtime': 54.5399, 'eval_samples_per_second': 237.239, 'eval_steps_per_second': 14.833, 'epoch': 1.48}


Deleting older checkpoint [fold_3\checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.3934, 'learning_rate': 9.980495049504951e-05, 'epoch': 1.98}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.3134855031967163, 'eval_f1-sum': 0.9751092425022483, 'eval_runtime': 54.4078, 'eval_samples_per_second': 237.815, 'eval_steps_per_second': 14.869, 'epoch': 1.98}


Deleting older checkpoint [fold_3\checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.2342, 'learning_rate': 9.975544554455447e-05, 'epoch': 2.47}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.23439043760299683, 'eval_f1-sum': 0.9818815477474448, 'eval_runtime': 54.4646, 'eval_samples_per_second': 237.567, 'eval_steps_per_second': 14.854, 'epoch': 2.47}


Deleting older checkpoint [fold_3\checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.187, 'learning_rate': 9.970594059405941e-05, 'epoch': 2.97}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.1732349544763565, 'eval_f1-sum': 0.9874728818876681, 'eval_runtime': 54.4905, 'eval_samples_per_second': 237.454, 'eval_steps_per_second': 14.847, 'epoch': 2.97}


Deleting older checkpoint [fold_3\checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1175, 'learning_rate': 9.965643564356436e-05, 'epoch': 3.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.13429246842861176, 'eval_f1-sum': 0.9904532982346703, 'eval_runtime': 54.5023, 'eval_samples_per_second': 237.403, 'eval_steps_per_second': 14.843, 'epoch': 3.46}


Deleting older checkpoint [fold_3\checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1003, 'learning_rate': 9.96069306930693e-05, 'epoch': 3.96}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.09849458187818527, 'eval_f1-sum': 0.9931737996795919, 'eval_runtime': 54.544, 'eval_samples_per_second': 237.221, 'eval_steps_per_second': 14.832, 'epoch': 3.96}


Deleting older checkpoint [fold_3\checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0694, 'learning_rate': 9.955742574257426e-05, 'epoch': 4.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.09419902414083481, 'eval_f1-sum': 0.9933717046973285, 'eval_runtime': 54.4603, 'eval_samples_per_second': 237.586, 'eval_steps_per_second': 14.855, 'epoch': 4.46}


Deleting older checkpoint [fold_3\checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0653, 'learning_rate': 9.950792079207922e-05, 'epoch': 4.95}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0647071972489357, 'eval_f1-sum': 0.9962151144786762, 'eval_runtime': 54.5221, 'eval_samples_per_second': 237.316, 'eval_steps_per_second': 14.838, 'epoch': 4.95}


Deleting older checkpoint [fold_3\checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0531, 'learning_rate': 9.945841584158416e-05, 'epoch': 5.45}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0718943327665329, 'eval_f1-sum': 0.995174444960799, 'eval_runtime': 54.4473, 'eval_samples_per_second': 237.642, 'eval_steps_per_second': 14.858, 'epoch': 5.45}


Deleting older checkpoint [fold_3\checkpoint-900] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0489, 'learning_rate': 9.940891089108911e-05, 'epoch': 5.94}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06962080299854279, 'eval_f1-sum': 0.9955475860802032, 'eval_runtime': 54.4649, 'eval_samples_per_second': 237.566, 'eval_steps_per_second': 14.854, 'epoch': 5.94}


Deleting older checkpoint [fold_3\checkpoint-1100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0423, 'learning_rate': 9.935940594059407e-05, 'epoch': 6.44}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06020750477910042, 'eval_f1-sum': 0.9959978753138928, 'eval_runtime': 54.6229, 'eval_samples_per_second': 236.879, 'eval_steps_per_second': 14.811, 'epoch': 6.44}


Deleting older checkpoint [fold_3\checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.04, 'learning_rate': 9.930990099009901e-05, 'epoch': 6.93}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.04612154886126518, 'eval_f1-sum': 0.9969865380444505, 'eval_runtime': 54.5749, 'eval_samples_per_second': 237.087, 'eval_steps_per_second': 14.824, 'epoch': 6.93}


Deleting older checkpoint [fold_3\checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0384, 'learning_rate': 9.926039603960397e-05, 'epoch': 7.43}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05158518999814987, 'eval_f1-sum': 0.9968400834983737, 'eval_runtime': 54.5569, 'eval_samples_per_second': 237.165, 'eval_steps_per_second': 14.829, 'epoch': 7.43}


Deleting older checkpoint [fold_3\checkpoint-1300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0347, 'learning_rate': 9.921089108910892e-05, 'epoch': 7.92}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05643206462264061, 'eval_f1-sum': 0.9961192338587872, 'eval_runtime': 54.4669, 'eval_samples_per_second': 237.557, 'eval_steps_per_second': 14.853, 'epoch': 7.92}


Deleting older checkpoint [fold_3\checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0339, 'learning_rate': 9.916138613861386e-05, 'epoch': 8.42}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_3\checkpoint-1700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.04817748814821243, 'eval_f1-sum': 0.996915191992684, 'eval_runtime': 54.562, 'eval_samples_per_second': 237.143, 'eval_steps_per_second': 14.827, 'epoch': 8.42}


Deleting older checkpoint [fold_3\checkpoint-1600] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from fold_3\checkpoint-1400 (score: 0.04612154886126518).


{'train_runtime': 6268.6088, 'train_samples_per_second': 8256.696, 'train_steps_per_second': 32.224, 'train_loss': 0.23665453251670387, 'epoch': 8.42}
Round 4


loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from

  0%|          | 0/202000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 1.2749, 'learning_rate': 9.995049504950496e-05, 'epoch': 0.49}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.8323339223861694, 'eval_f1-sum': 0.9243668278918692, 'eval_runtime': 34.108, 'eval_samples_per_second': 379.354, 'eval_steps_per_second': 23.719, 'epoch': 0.49}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.7552, 'learning_rate': 9.99019801980198e-05, 'epoch': 0.99}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.6200096607208252, 'eval_f1-sum': 0.9449501456746748, 'eval_runtime': 34.1351, 'eval_samples_per_second': 379.053, 'eval_steps_per_second': 23.7, 'epoch': 0.99}


***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.5094, 'learning_rate': 9.985247524752476e-05, 'epoch': 1.48}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.4496142864227295, 'eval_f1-sum': 0.9623707584181618, 'eval_runtime': 34.1499, 'eval_samples_per_second': 378.889, 'eval_steps_per_second': 23.69, 'epoch': 1.48}


Deleting older checkpoint [fold_4\checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.3987, 'learning_rate': 9.980346534653467e-05, 'epoch': 1.98}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.30026912689208984, 'eval_f1-sum': 0.9753497835521967, 'eval_runtime': 34.138, 'eval_samples_per_second': 379.02, 'eval_steps_per_second': 23.698, 'epoch': 1.98}


Deleting older checkpoint [fold_4\checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.2344, 'learning_rate': 9.975396039603961e-05, 'epoch': 2.47}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.23363636434078217, 'eval_f1-sum': 0.982327987951718, 'eval_runtime': 34.1295, 'eval_samples_per_second': 379.115, 'eval_steps_per_second': 23.704, 'epoch': 2.47}


Deleting older checkpoint [fold_4\checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1882, 'learning_rate': 9.970445544554457e-05, 'epoch': 2.97}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.19320175051689148, 'eval_f1-sum': 0.9853285175484413, 'eval_runtime': 34.1771, 'eval_samples_per_second': 378.586, 'eval_steps_per_second': 23.671, 'epoch': 2.97}


Deleting older checkpoint [fold_4\checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.124, 'learning_rate': 9.965495049504952e-05, 'epoch': 3.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.1474001109600067, 'eval_f1-sum': 0.9888802731305046, 'eval_runtime': 34.1513, 'eval_samples_per_second': 378.873, 'eval_steps_per_second': 23.689, 'epoch': 3.46}


Deleting older checkpoint [fold_4\checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.1057, 'learning_rate': 9.960544554455446e-05, 'epoch': 3.96}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.10493812710046768, 'eval_f1-sum': 0.992389566925562, 'eval_runtime': 34.1573, 'eval_samples_per_second': 378.806, 'eval_steps_per_second': 23.685, 'epoch': 3.96}


Deleting older checkpoint [fold_4\checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0832, 'learning_rate': 9.955643564356436e-05, 'epoch': 4.46}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.11433342099189758, 'eval_f1-sum': 0.9923668711041639, 'eval_runtime': 34.2068, 'eval_samples_per_second': 378.258, 'eval_steps_per_second': 23.65, 'epoch': 4.46}


Deleting older checkpoint [fold_4\checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0735, 'learning_rate': 9.950693069306932e-05, 'epoch': 4.95}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.1060372143983841, 'eval_f1-sum': 0.9920876023014448, 'eval_runtime': 34.1908, 'eval_samples_per_second': 378.435, 'eval_steps_per_second': 23.661, 'epoch': 4.95}


Deleting older checkpoint [fold_4\checkpoint-900] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0592, 'learning_rate': 9.945742574257426e-05, 'epoch': 5.45}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.09086247533559799, 'eval_f1-sum': 0.9940046854990765, 'eval_runtime': 34.1978, 'eval_samples_per_second': 378.357, 'eval_steps_per_second': 23.656, 'epoch': 5.45}


Deleting older checkpoint [fold_4\checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.061, 'learning_rate': 9.940792079207921e-05, 'epoch': 5.94}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06282828748226166, 'eval_f1-sum': 0.9959438596723111, 'eval_runtime': 34.1584, 'eval_samples_per_second': 378.794, 'eval_steps_per_second': 23.684, 'epoch': 5.94}


Deleting older checkpoint [fold_4\checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0486, 'learning_rate': 9.935841584158417e-05, 'epoch': 6.44}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.05675218254327774, 'eval_f1-sum': 0.9964196185036156, 'eval_runtime': 34.1838, 'eval_samples_per_second': 378.512, 'eval_steps_per_second': 23.666, 'epoch': 6.44}


Deleting older checkpoint [fold_4\checkpoint-1100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0496, 'learning_rate': 9.930891089108911e-05, 'epoch': 6.93}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.07224131375551224, 'eval_f1-sum': 0.9951514719545118, 'eval_runtime': 34.1675, 'eval_samples_per_second': 378.693, 'eval_steps_per_second': 23.677, 'epoch': 6.93}


Deleting older checkpoint [fold_4\checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0414, 'learning_rate': 9.925940594059407e-05, 'epoch': 7.43}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.06022560968995094, 'eval_f1-sum': 0.9961956823948144, 'eval_runtime': 34.1747, 'eval_samples_per_second': 378.613, 'eval_steps_per_second': 23.672, 'epoch': 7.43}


Deleting older checkpoint [fold_4\checkpoint-1400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0433, 'learning_rate': 9.920990099009901e-05, 'epoch': 7.92}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1600
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.052273549139499664, 'eval_f1-sum': 0.9965303850199491, 'eval_runtime': 34.1349, 'eval_samples_per_second': 379.055, 'eval_steps_per_second': 23.7, 'epoch': 7.92}


Deleting older checkpoint [fold_4\checkpoint-1300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0386, 'learning_rate': 9.916039603960396e-05, 'epoch': 8.42}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1700
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.047780465334653854, 'eval_f1-sum': 0.9966826761463308, 'eval_runtime': 34.1892, 'eval_samples_per_second': 378.453, 'eval_steps_per_second': 23.662, 'epoch': 8.42}


Deleting older checkpoint [fold_4\checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0365, 'learning_rate': 9.911089108910892e-05, 'epoch': 8.91}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1800
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0599055215716362, 'eval_f1-sum': 0.9957534439779048, 'eval_runtime': 34.2467, 'eval_samples_per_second': 377.817, 'eval_steps_per_second': 23.623, 'epoch': 8.91}


Deleting older checkpoint [fold_4\checkpoint-1600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0323, 'learning_rate': 9.906138613861386e-05, 'epoch': 9.41}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-1900
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.0461689792573452, 'eval_f1-sum': 0.9970631663502055, 'eval_runtime': 34.2045, 'eval_samples_per_second': 378.283, 'eval_steps_per_second': 23.652, 'epoch': 9.41}


Deleting older checkpoint [fold_4\checkpoint-1700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12939
  Batch size = 16


{'loss': 0.0383, 'learning_rate': 9.901188118811882e-05, 'epoch': 9.9}


  0%|          | 0/809 [00:00<?, ?it/s]

Saving model checkpoint to fold_4\checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


{'eval_loss': 0.057122115045785904, 'eval_f1-sum': 0.9962343526400518, 'eval_runtime': 34.1405, 'eval_samples_per_second': 378.992, 'eval_steps_per_second': 23.696, 'epoch': 9.9}


Deleting older checkpoint [fold_4\checkpoint-1800] due to args.save_total_limit


KeyboardInterrupt: 

In [63]:
def recent_file(path):
    file_name_and_time_lst = []
    # 해당 경로에 있는 파일들의 생성시간을 함께 리스트로 넣어줌. 
    for f_name in os.listdir(f"{path}"):
        written_time = os.path.getctime(f"{path}/{f_name}")
        file_name_and_time_lst.append((f_name, written_time))
    # 생성시간 역순으로 정렬하고, 
    sorted_file_lst = sorted(file_name_and_time_lst, key=lambda x: x[1], reverse=True)
    # 가장 앞에 이는 놈을 넣어준다.
    recent_file = sorted_file_lst[0]
    recent_file_name = recent_file[0]
    return f"{path}/{recent_file_name}"

In [65]:
gc.collect() # python 자원 관리 
torch.cuda.empty_cache() # gpu 자원관리
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenized = tokenizer(test.문장.tolist(), padding=True, truncation=True, max_length=length, return_tensors="pt")
test_dataset = CustomDataset(tokenized, None)
test_args = TrainingArguments(
    output_dir = './',
    do_train = False,
    do_predict = True,
    per_device_eval_batch_size = 512,   
    dataloader_drop_last = False    
)

tmp = 0
while os.path.isdir(f'fold_{tmp}'):
    tmp += 1

test_results = []
for i in range(tmp):
    print(f'Round {i}')
    # model = AutoModel.from_pretrained(recent_file('custom_model'), config=config)
    model = CustomModel().to(device)
    model.load_state_dict(torch.load(f"{recent_file(f'fold_{i}')}/pytorch_model.bin"))
    trainer = CustomTrainer(
                  model = model, 
                  args = test_args, 
                  compute_metrics = compute_metrics)
    test_results.append(trainer.predict(test_dataset))
    del model
    del trainer
    gc.collect() # python 자원 관리 
    torch.cuda.empty_cache() # gpu 자원관리

loading configuration file config.json from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\config.json
Model config ElectraConfig {
  "_name_or_path": "kykim/electra-kor-base",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 42000
}

loading file vocab.txt from 

Round 0


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from the model checkpoint at kykim/electra-kor-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use ElectraModel for predictions without furthe

  0%|          | 0/14 [00:00<?, ?it/s]

loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin


Round 1


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from the model checkpoint at kykim/electra-kor-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use ElectraModel for predictions without furthe

  0%|          | 0/14 [00:00<?, ?it/s]

loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin


Round 2


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from the model checkpoint at kykim/electra-kor-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use ElectraModel for predictions without furthe

  0%|          | 0/14 [00:00<?, ?it/s]

loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin


Round 3


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from the model checkpoint at kykim/electra-kor-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use ElectraModel for predictions without furthe

  0%|          | 0/14 [00:00<?, ?it/s]

loading weights file pytorch_model.bin from cache at C:\Users\kyj09/.cache\huggingface\hub\models--kykim--electra-kor-base\snapshots\8599418d72f5dcb21ae3972ba2405f88c819b195\pytorch_model.bin


Round 4


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of ElectraModel were initialized from the model checkpoint at kykim/electra-kor-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use ElectraModel for predictions without furthe

  0%|          | 0/14 [00:00<?, ?it/s]

In [66]:
import numpy as np

test['유형'] = list(map(lambda x : 유형.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[0], test_results)))/len(test_results)))
test['극성'] = list(map(lambda x : 극성.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[1], test_results)))/len(test_results)))
test['시제'] = list(map(lambda x : 시제.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[2], test_results)))/len(test_results)))
test['확실성'] = list(map(lambda x : 확실성.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[3], test_results)))/len(test_results)))

test['유형'] = list(map(lambda x : x[0], test['유형']))
test['극성'] = list(map(lambda x : x[0], test['극성']))
test['시제'] = list(map(lambda x : x[0], test['시제']))
test['확실성'] = list(map(lambda x : x[0], test['확실성']))

In [67]:
test['label'] = test['유형'] + '-' + test['극성'] + '-' + test['시제'] + '-' + test['확실성']
test

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label
0,TEST_0000,장욱진의 가족은 허물 없는 가족애를 처음 공개되는 정약용의 정효자전과 정부인전은 강...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
1,TEST_0001,조지 W 부시 버락 오바마 전 대통령도 전쟁 위험 때문에 버린 카드다,사실형,긍정,현재,확실,사실형-긍정-현재-확실
2,TEST_0002,지난해 1분기 128억원이었던 영업이익이 올해 1분기 505억원으로 급증했다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
3,TEST_0003,수상 작가와 맺으려던 계약서 내용 가운데 일부가 독소 조항으로 해석돼 수정을 요청받...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
4,TEST_0004,결국 최근 KDB산업은행은 대규모 손실 위기에 닥친 에어부산에 140억원 금융지원을...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
...,...,...,...,...,...,...,...
7085,TEST_7085,2020 세계국가편람 모바일 앱은 세계 216개국의 국가개황과 주요 경제지표 사회개...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
7086,TEST_7086,탈세계화 징후들이 반갑지 않은 이유다,추론형,긍정,현재,확실,추론형-긍정-현재-확실
7087,TEST_7087,틱톡은 6월 인터넷 안전의 달을 맞아 올바른 개인정보 보호 관리 방법 앱 내 유용한...,사실형,긍정,미래,확실,사실형-긍정-미래-확실
7088,TEST_7088,만약 3개월 간 채굴자들의 투표를 거쳐 23 이상의 해시파워가 채굴세 도입에 찬성한...,추론형,긍정,미래,불확실,추론형-긍정-미래-불확실


In [69]:
sub = pd.read_csv(os.path.join(data_dir,'sample_submission.csv'))
sub['label'] = test['label']
tmp = 0
while os.path.exists(f'제출{tmp}.csv'):
    tmp += 1
sub.to_csv(f'제출{tmp}.csv', index=False, mode='w')