In [1]:
# MLM 방식을 이용한 Further pre-traning 방식 구현 예제
# 참고 소스 : https://towardsdatascience.com/masked-language-modelling-with-bert-7d49793e5d2c 참조 바람
import torch
import os

from tqdm.notebook import tqdm
from transformers import BertTokenizer, BertConfig, BertForMaskedLM, BertTokenizerFast
from transformers import AdamW, get_linear_schedule_with_warmup
from os import sys
#sys.path.append('..')
from myutils import GPU_info, seed_everything, mlogging

logfilepath:bwdataset_2022-04-15.log
logfilepath:qnadataset_2022-04-15.log


In [2]:
#======================================================================================================
# *WandB 툴을 이용하여, HyperPrameter sweeps 하는 예제임(http://wandb.ai 에 회원가입 해야 함)
#  
# == HyperPrameter sweeps 과정 ==
#
# 1. 로그인
# import wandb
# wandb.login()  # 로그인 => wandb.ai 사이트에 로그인 후, API Key 입력해야 함
#
# 2. sweep_config 설정
# sweep_config ={ 
#       'method': 'random', #grid, random
#       'metric':{
#           'name': 'val_accuracy', #loss
#           'goal': 'maximize'      # loss일때는 minimize로 설정해야함
#       },
#      'parameters':{
#           'learning_rate':{
#               'distribution': 'uniform',
#               'min' : 0,
#               'max': 7e-5
#           },
#           'batch_size':{
#               'values' : [8, 16, 32]
#           },
#           'epochs':{
#               'values' : [2, 3, 4]
#           }
#       }
#}
#
#sweep_id = wandb.sweep(sweep_config, project="bert-wb-test")
#
# 3. wandb 초기화 
# - wandb.init(config=config)
#
# 4. wandb 로그설정
# - wandb.log({"val_accuracy": total_test_correct / total_test_len})
#
# 5. wand 실행
# - wandb.agent(sweep_id, train, count=3)
#
# 6. wand 종료
# -wandb.finish()
#======================================================================================================
# 1. 로그인
#!pip install wandb -qqq
import wandb
wandb.login()  # 로그인 => wandb.ai 사이트에 로그인 후, API Key 입력해야 함

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkobongsoo[0m (use `wandb login --relogin` to force relogin)


True

In [3]:
# 2. sweep_config 설정

sweep_config ={ 
      'method': 'random', #grid, random
      'metric':{
          'name': 'val_accuracy', #loss
          'goal': 'maximize'      # loss일때는 minimize로 설정해야함
      },
     'parameters':{
          'learning_rate':{
              'distribution': 'uniform',
              'min' : 0,
              'max': 7e-5
          },
          'batch_size':{
              'values' : [8, 16, 32]
          },
          'epochs':{
              'values' : [3, 4, 5]
          }
      }
}

sweep_id = wandb.sweep(sweep_config, project="bert-wb-mlm-test")

Create sweep with ID: 4fsqluk6
Sweep URL: https://wandb.ai/kobongsoo/bert-wb-mlm-test/sweeps/4fsqluk6


In [4]:
# 훈련시킬 말뭉치(사전 만들때 동일한 말뭉치 이용)
input_corpus = "../korpora/kowiki_20190620/wiki_20190620_small.txt"

# test 말뭉치 
eval_corpus = "../korpora/kowiki_20190620/wiki_eval_test.txt"

# 기존 사전훈련된 모델
model_path = "../model/bert/bmc-fpt-wiki_20190620_mecab_false_0311-nouns-0327/"

# 기존 사전 + 추가된 사전 파일
#vocab_path="tokenizer/wiki_20190620_false_0311_speical/bmc_add_wiki_20190620_false_0311.txt"
vocab_path="../model/bert/bmc-fpt-wiki_20190620_mecab_false_0311-nouns-0327/"

# 출력
OUTPATH = '../model/bert/bmc-fpt-wiki_20190620_mecab_false_0311-nouns-0414/'

#batch_size = 32
token_max_len = 128

device = GPU_info()
print(device)

#seed 설정
seed_everything(222)

#logging 설정
logger =  mlogging(loggername="wb-bert-fpt-mlm", logfilename="../log/wb-bert-fpt-mlm")

True
device: cuda:0
cuda index: 0
gpu 개수: 1
graphic name: NVIDIA A30
cuda:0
logfilepath:../log/wb-bert-fpt-mlm_2022-04-15.log


In [5]:
# tokeinzier 생성
# tokenizer 생성
# => BertTokenizer, BertTokenizerFast 둘중 사용하면됨

#tokenizer = BertTokenizer(vocab_file=vocab_path, max_len=token_max_len, do_lower_case=False)
tokenizer = BertTokenizer.from_pretrained(vocab_path, max_len=token_max_len, do_lower_case=False)
# tokenizer = BertTokenizerFast(vocab_file=vocab_file, max_len=token_max_len, do_lower_case=False)


# speical 토큰 계수 + vocab 계수 - 이미 vocab에 포함된 speical 토큰 계수(5)
vocab_size = len(tokenizer.all_special_tokens) + tokenizer.vocab_size - 5 + 1
#vocab_size = len(tokenizer.all_special_tokens) + tokenizer.vocab_size - 5
print('special_token_size: {}, tokenizer.vocab_size: {}'.format(len(tokenizer.all_special_tokens), tokenizer.vocab_size))
print('vocab_size: {}'.format(vocab_size))
print('tokenizer_len: {}'.format(len(tokenizer)))


special_token_size: 27, tokenizer.vocab_size: 167537
vocab_size: 167560
tokenizer_len: 167550


In [6]:
def load_model():
    # 모델 로딩 further pre-training 
    #config = BertConfig.from_pretrained(model_path)
    #model = BertForMaskedLM.from_pretrained(model_path, from_tf=bool(".ckpt" in model_path), config=config) 
    model = BertForMaskedLM.from_pretrained(model_path)    

    #################################################################################
    # 모델 embedding 사이즈를 tokenizer 크기 만큼 재 설정함.
    # 재설정하지 않으면, 다음과 같은 에러 발생함
    # CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)` CUDA 에러가 발생함
    #  indexSelectLargeIndex: block: [306,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
    #
    #     해당 오류는 기존 Embedding(8002, 768, padding_idx=1) 처럼 입력 vocab 사이즈가 8002인데,
    #     0~8001 사이를 초과하는 word idx 값이 들어가면 에러 발생함.
    #################################################################################
    model.resize_token_embeddings(len(tokenizer))

    model.to(device)
    
    return model

In [7]:
def build_dataset(batch_size):
    from torch.utils.data import DataLoader, RandomSampler
    import sys
    sys.path.append('..')
    from myutils import MLMDataset
  
    # 각 스페셜 tokenid를 구함
    CLStokenid = tokenizer.convert_tokens_to_ids('[CLS]')
    SEPtokenid = tokenizer.convert_tokens_to_ids('[SEP]')
    UNKtokenid = tokenizer.convert_tokens_to_ids('[UNK]')
    PADtokenid = tokenizer.convert_tokens_to_ids('[PAD]')
    MASKtokenid = tokenizer.convert_tokens_to_ids('[MASK]')
    print('CLSid:{}, SEPid:{}, UNKid:{}, PADid:{}, MASKid:{}'.format(CLStokenid, SEPtokenid, UNKtokenid, PADtokenid, MASKtokenid))

    #===============================================================================
    # 학습 dataloader 생성
    train_dataset = MLMDataset(corpus_path = input_corpus,
                               tokenizer = tokenizer, 
                               CLStokeinid = CLStokenid ,   # [CLS] 토큰 id
                               SEPtokenid = SEPtokenid ,    # [SEP] 토큰 id
                               UNKtokenid = UNKtokenid ,    # [UNK] 토큰 id
                               PADtokenid = PADtokenid,    # [PAD] 토큰 id
                               Masktokenid = MASKtokenid,   # [MASK] 토큰 id
                               max_sequence_len=token_max_len,  # max_sequence_len)
                               mlm_probability=0.15,
                               overwrite_cache=False
                              )


    # 학습 dataloader 생성
    # => tenosor로 만듬
    train_loader = DataLoader(train_dataset, 
                              batch_size=batch_size, 
                              #shuffle=True, # dataset을 섞음
                              sampler=RandomSampler(train_dataset, replacement=False), #dataset을 랜덤하게 샘플링함
                              num_workers=3
                             )
    #===============================================================================

    #===============================================================================
    # eval dataloader 생성
    eval_dataset = MLMDataset(corpus_path = eval_corpus,
                               tokenizer = tokenizer, 
                               CLStokeinid = CLStokenid ,   # [CLS] 토큰 id
                               SEPtokenid = SEPtokenid ,    # [SEP] 토큰 id
                               UNKtokenid = UNKtokenid ,    # [UNK] 토큰 id
                               PADtokenid = PADtokenid,    # [PAD] 토큰 id
                               Masktokenid = MASKtokenid,   # [MASK] 토큰 id
                               max_sequence_len=token_max_len,  # max_sequence_len)
                               mlm_probability=0.15,
                               overwrite_cache=False
                              )


    # eval dataloader 생성
    # => tenosor로 만듬
    eval_loader = DataLoader(eval_dataset, 
                              batch_size=batch_size, 
                              #shuffle=True, # dataset을 섞음
                              sampler=RandomSampler(eval_dataset, replacement=False), #dataset을 랜덤하게 샘플링함
                              num_workers=3
                             )
     #===============================================================================

    #print(train_dataset[0])
    
    return train_loader, eval_loader

In [8]:
def Train_epoch(config,
                model,
                train_loader,
                eval_loader):
    
    ##################################################
    epochs = config.epochs            # epochs
    learning_rate = config.learning_rate  # 학습률
    #p_itr = 15000           # 손실률 보여줄 step 수
    #save_steps = 50000     # 50000 step마다 모델 저장
    ##################################################

    # optimizer 적용
    optimizer = AdamW(model.parameters(), 
                     lr=learning_rate, 
                     eps=1e-8) # 0으로 나누는 것을 방지하기 위한 epsilon 값(10^-6 ~ 10^-8 사이 이값 입력합)

    # 총 훈련과정에서 반복할 스탭
    total_steps = len(train_loader)*epochs
    warmup_steps = total_steps * 0.1 #10% of train data for warm-up
    
    # 손실률 보여줄 step 수
    p_itr = int(len(train_loader)*0.1)  
    
    # step마다 모델 저장
    save_steps = int(total_steps * 0.2)
    
    # 스캐줄러 생성
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps=warmup_steps, 
                                                num_training_steps=total_steps)

    itr = 1
    total_loss = 0
    total_len = 0
    total_correct = 0
    list_training_loss = []
    list_acc_loss = []
    list_validation_acc_loss = []

    model.zero_grad()# 그래디언트 초기화
    for epoch in tqdm(range(epochs)):

        # model.train() # 훈련모드로 변환
        for data in tqdm(train_loader):
            model.train() # 훈련모드로 변환
            #optimizer.zero_grad()
            model.zero_grad()# 그래디언트 초기화

            # 입력 값 설정
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            token_type_ids = data['token_type_ids'].to(device)       
            labels = data['labels'].to(device)
            #print('Labels:{}'.format(labels))

            # 모델 실행
            outputs = model(input_ids=input_ids, 
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids,
                            labels=labels)

            # 출력값 loss,logits를 outputs에서 얻어옴
            loss = outputs.loss
            logits = outputs.logits
            #print('Loss:{}, logits:{}'.format(loss, logits))

            # optimizer 과 scheduler 업데이트 시킴
            loss.backward()   # backward 구함
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)   # 그래디언트 클리핑 (gradient vanishing이나 gradient exploding 방지하기 위한 기법)
            optimizer.step()  # 가중치 파라미터 업데이트(optimizer 이동)
            scheduler.step()  # 학습률 감소

            # ***further pretrain 에는 손실률 계산을 넣지 않음
            # 정확도 계산하는 부분은 no_grade 시켜서, 계산량을 줄임.

            # => torch.no_grad()는 gradient을 계산하는 autograd engine를 비활성화 하여 
            # 필요한 메모리를 줄이고, 연산속도를 증가시키는 역활을 함
            with torch.no_grad():
                
                # 손실(loss) 계산 
                total_loss += loss.item()
                
                # 정확도(Accurarcy) 계산
                pred = torch.argmax(logits, dim=2)
                correct = pred.eq(labels)
                total_correct += correct.sum().item()
                total_len += len(labels) * token_max_len   
    
                # 주기마다 test(validataion) 데이터로 평가하여 손실류 계산함.
                if itr % p_itr == 0:
                    logger.info('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch+1, epochs, itr, total_loss/p_itr, total_correct/total_len))

                    list_training_loss.append(total_loss/p_itr)
                    
                    # wandb 로그 기록
                    wandb.log({"Loss": total_loss/p_itr,
                              "train_accuracy": total_correct/total_len
                              })
                        
                    total_loss = 0
                    total_len = 0
                    total_correct = 0
                    
                    ####################################################################
                    # 주기마다 eval(validataion) 데이터로 평가하여 손실류 계산함.
                    # 평가 시작
                    model.eval()

                    total_test_correct = 0
                    total_test_len = 0

                    #for data in tqdm(eval_loader):
                    for data in eval_loader:
                        # 입력 값 설정
                        input_ids = data['input_ids'].to(device)
                        attention_mask = data['attention_mask'].to(device)
                        token_type_ids = data['token_type_ids'].to(device)       
                        labels = data['labels'].to(device)

                        # 손실률 계산하는 부분은 no_grade 시켜서, 계산량을 줄임.
                        # => torch.no_grad()는 gradient을 계산하는 autograd engine를 비활성화 하여 
                        # 필요한 메모리를 줄이고, 연산속도를 증가시키는 역활을 함
                        with torch.no_grad():
                            # 모델 실행
                            outputs = model(input_ids=input_ids, 
                                            attention_mask=attention_mask,
                                            token_type_ids=token_type_ids,
                                            labels=labels)

                            # 출력값 loss,logits를 outputs에서 얻어옴
                            #loss = outputs.loss
                            logits = outputs.logits

                            # 정확도(Accurarcy) 계산
                            pred = torch.argmax(logits, dim=2)
                            correct = pred.eq(labels)
                            total_test_correct += correct.sum().item()
                            total_test_len += len(labels) * token_max_len 

                    list_validation_acc_loss.append(total_test_correct/total_test_len)
                    logger.info("[Epoch {}/{}] Validatation Accuracy:{}".format(epoch+1, epochs, total_test_correct / total_test_len))

                    # wandb 로그 기록
                    wandb.log({"val_accuracy": total_test_correct / total_test_len})

                    ####################################################################

                '''
                if itr % save_steps == 0:
                    #전체모델 저장
                    TMP_OUT_PATH = OUTPATH + str(itr)
                    os.makedirs(TMP_OUT_PATH, exist_ok=True)
                    # save_pretrained 로 저장하면 config.json, pytorch_model.bin 2개의 파일이 생성됨
                    model.save_pretrained(TMP_OUT_PATH)
                    #torch.save(model, TMP_OUT_PATH + 'pytorch_model.bin') 

                    # tokeinizer 파일 저장(vocab)
                    VOCAB_PATH = TMP_OUT_PATH
                    #os.makedirs(VOCAB_PATH)
                    tokenizer.save_pretrained(VOCAB_PATH)

                    logger.info('Iteration {} -> save model:{}'.format(itr, TMP_OUT_PATH))
                '''
            itr+=1


In [9]:
# 훈련
def train(config=None):
    # wandb 초기화
    with wandb.init(config=config):
        config = wandb.config
        
        # 데이터 로더 생성
        train_loader, eval_loader = build_dataset(config.batch_size)
        
        # 모델 로딩
        tmodel = load_model()
        
        # 훈련 시작 
        Train_epoch(config, tmodel, train_loader, eval_loader)

In [12]:
# 훈련을 시작 (총 3번(epoch이 아님))
wandb.agent(sweep_id, train, count=5)

[34m[1mwandb[0m: Agent Starting Run: hxjl1sxx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 3.880686219829353e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


2022-04-15 10:36:12,821 - bwpdataset - INFO - ==>[Start] cached file read: ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt


CLSid:101, SEPid:102, UNKid:100, PADid:0, MASKid:103
*corpus:../korpora/kowiki_20190620/wiki_20190620_small.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103


2022-04-15 10:36:13,071 - bwpdataset - INFO - <==[End] Loading features from cached file ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt [took 0.248 s]


*corpus:../korpora/kowiki_20190620/wiki_eval_test.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103
*total_line: 114


  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:36:25,711 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 30 -> Train Loss: 0.2275, Train Acc: 0.9723
2022-04-15 10:36:26,498 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9710526315789474
2022-04-15 10:36:34,720 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 60 -> Train Loss: 0.1617, Train Acc: 0.9771
2022-04-15 10:36:35,510 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9736842105263158
2022-04-15 10:36:43,759 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 90 -> Train Loss: 0.1390, Train Acc: 0.9789
2022-04-15 10:36:44,563 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9759046052631579
2022-04-15 10:36:52,807 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 120 -> Train Loss: 0.1222, Train Acc: 0.9811
2022-04-15 10:36:53,664 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9768092105263158
2022-04-15 10:37:01,849 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 150 -> Train Loss: 0.1290, Train Acc: 0.97

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:37:56,361 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 330 -> Train Loss: 0.0940, Train Acc: 0.9837
2022-04-15 10:37:57,141 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9783717105263158
2022-04-15 10:38:05,431 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 360 -> Train Loss: 0.0838, Train Acc: 0.9855
2022-04-15 10:38:06,215 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9782072368421053
2022-04-15 10:38:14,533 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 390 -> Train Loss: 0.0826, Train Acc: 0.9854
2022-04-15 10:38:15,305 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9780427631578947
2022-04-15 10:38:23,555 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 420 -> Train Loss: 0.0803, Train Acc: 0.9860
2022-04-15 10:38:24,352 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9782894736842105
2022-04-15 10:38:32,640 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 450 -> Train Loss: 0.0792, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:39:27,347 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 630 -> Train Loss: 0.0602, Train Acc: 0.9888
2022-04-15 10:39:28,145 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9789473684210527
2022-04-15 10:39:36,417 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 660 -> Train Loss: 0.0570, Train Acc: 0.9892
2022-04-15 10:39:37,211 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9789473684210527
2022-04-15 10:39:45,518 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 690 -> Train Loss: 0.0580, Train Acc: 0.9891
2022-04-15 10:39:46,315 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9787828947368421
2022-04-15 10:39:54,594 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 720 -> Train Loss: 0.0533, Train Acc: 0.9898
2022-04-15 10:39:55,402 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9786184210526315
2022-04-15 10:40:03,629 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 750 -> Train Loss: 0.0563, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:40:58,972 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 930 -> Train Loss: 0.0543, Train Acc: 0.9898
2022-04-15 10:40:59,754 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9785361842105263
2022-04-15 10:41:08,087 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 960 -> Train Loss: 0.0406, Train Acc: 0.9921
2022-04-15 10:41:08,916 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9787828947368421
2022-04-15 10:41:17,258 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 990 -> Train Loss: 0.0441, Train Acc: 0.9917
2022-04-15 10:41:18,057 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9785361842105263
2022-04-15 10:41:26,309 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 1020 -> Train Loss: 0.0433, Train Acc: 0.9917
2022-04-15 10:41:27,155 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9784539473684211
2022-04-15 10:41:35,464 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 1050 -> Train Loss: 0.0429, Train Acc:

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:42:30,699 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1230 -> Train Loss: 0.0397, Train Acc: 0.9923
2022-04-15 10:42:31,487 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9786184210526315
2022-04-15 10:42:39,745 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1260 -> Train Loss: 0.0318, Train Acc: 0.9939
2022-04-15 10:42:40,552 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9784539473684211
2022-04-15 10:42:48,898 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1290 -> Train Loss: 0.0347, Train Acc: 0.9933
2022-04-15 10:42:49,706 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9784539473684211
2022-04-15 10:42:58,035 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1320 -> Train Loss: 0.0335, Train Acc: 0.9936
2022-04-15 10:42:58,845 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9784539473684211
2022-04-15 10:43:07,170 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1350 -> Train Loss: 0.0360, Train A




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Loss,█▆▅▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▇▇▆▆▆▇▇▇▇▇▇▇▇▇████████
val_accuracy,▁▃▅▆▇▆▇▇▇▇▇████████████████▇██▇█████████

0,1
Loss,0.03596
train_accuracy,0.99325
val_accuracy,0.97862


[34m[1mwandb[0m: Agent Starting Run: 15j4ezei with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 4.7135827822144786e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


2022-04-15 10:44:20,698 - bwpdataset - INFO - ==>[Start] cached file read: ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt


CLSid:101, SEPid:102, UNKid:100, PADid:0, MASKid:103
*corpus:../korpora/kowiki_20190620/wiki_20190620_small.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103


2022-04-15 10:44:20,921 - bwpdataset - INFO - <==[End] Loading features from cached file ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt [took 0.222 s]


*corpus:../korpora/kowiki_20190620/wiki_eval_test.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103
*total_line: 114


  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1228 [00:00<?, ?it/s]

2022-04-15 10:44:38,676 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 122 -> Train Loss: 0.1864, Train Acc: 0.9750
2022-04-15 10:44:39,507 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9761513157894737
2022-04-15 10:44:53,282 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 244 -> Train Loss: 0.1306, Train Acc: 0.9797
2022-04-15 10:44:54,146 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9780427631578947
2022-04-15 10:45:07,891 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 366 -> Train Loss: 0.1272, Train Acc: 0.9801
2022-04-15 10:45:08,708 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.978125
2022-04-15 10:45:22,458 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 488 -> Train Loss: 0.1195, Train Acc: 0.9811
2022-04-15 10:45:23,318 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9787006578947368
2022-04-15 10:45:36,721 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 610 -> Train Loss: 0.1187, Train Acc: 0.9810
2022

  0%|          | 0/1228 [00:00<?, ?it/s]

2022-04-15 10:47:02,829 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 1342 -> Train Loss: 0.0649, Train Acc: 0.9880
2022-04-15 10:47:03,663 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9790296052631579
2022-04-15 10:47:16,938 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 1464 -> Train Loss: 0.0658, Train Acc: 0.9877
2022-04-15 10:47:17,772 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9788651315789474
2022-04-15 10:47:31,374 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 1586 -> Train Loss: 0.0609, Train Acc: 0.9883
2022-04-15 10:47:32,212 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9789473684210527
2022-04-15 10:47:45,908 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 1708 -> Train Loss: 0.0620, Train Acc: 0.9885
2022-04-15 10:47:46,760 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9792763157894737
2022-04-15 10:48:00,444 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 1830 -> Train Loss: 0.0652, Train A

  0%|          | 0/1228 [00:00<?, ?it/s]

2022-04-15 10:49:28,298 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 2562 -> Train Loss: 0.0377, Train Acc: 0.9928
2022-04-15 10:49:29,138 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9789473684210527
2022-04-15 10:49:42,827 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 2684 -> Train Loss: 0.0355, Train Acc: 0.9933
2022-04-15 10:49:43,671 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9786184210526315
2022-04-15 10:49:57,427 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 2806 -> Train Loss: 0.0326, Train Acc: 0.9934
2022-04-15 10:49:58,274 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9790296052631579
2022-04-15 10:50:12,059 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 2928 -> Train Loss: 0.0351, Train Acc: 0.9932
2022-04-15 10:50:12,929 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9787006578947368
2022-04-15 10:50:26,263 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 3050 -> Train Loss: 0.0319, Train A




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Loss,█▅▅▅▅▅▄▅▅▅▃▃▂▂▃▂▂▃▂▃▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▃▃▃▃▄▄▄▄▆▆▆▆▆▆▆▆▆▆██████████
val_accuracy,▁▅▅▆▇▆▅▇▇▅▇▇▇▇▇▆▆█▆▇▇▆▇▆▇▆▇▆▆▆

0,1
Loss,0.03294
train_accuracy,0.99347
val_accuracy,0.9787


[34m[1mwandb[0m: Agent Starting Run: wlmg18qn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 3.9656329308605424e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


2022-04-15 10:51:56,590 - bwpdataset - INFO - ==>[Start] cached file read: ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt


CLSid:101, SEPid:102, UNKid:100, PADid:0, MASKid:103
*corpus:../korpora/kowiki_20190620/wiki_20190620_small.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103


2022-04-15 10:51:57,105 - bwpdataset - INFO - <==[End] Loading features from cached file ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt [took 0.514 s]


*corpus:../korpora/kowiki_20190620/wiki_eval_test.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103
*total_line: 114


  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:52:09,519 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 30 -> Train Loss: 0.2244, Train Acc: 0.9730
2022-04-15 10:52:10,405 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9714638157894737
2022-04-15 10:52:18,742 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 60 -> Train Loss: 0.1630, Train Acc: 0.9769
2022-04-15 10:52:19,555 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9752467105263158
2022-04-15 10:52:27,901 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 90 -> Train Loss: 0.1377, Train Acc: 0.9797
2022-04-15 10:52:28,682 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9771381578947368
2022-04-15 10:52:36,964 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 120 -> Train Loss: 0.1283, Train Acc: 0.9801
2022-04-15 10:52:37,802 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Validatation Accuracy:0.9777138157894737
2022-04-15 10:52:45,988 - wb-bert-fpt-mlm - INFO - [Epoch 1/5] Iteration 150 -> Train Loss: 0.1178, Train Acc: 0.98

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:53:40,931 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 330 -> Train Loss: 0.0831, Train Acc: 0.9857
2022-04-15 10:53:41,752 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9792763157894737
2022-04-15 10:53:50,048 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 360 -> Train Loss: 0.0834, Train Acc: 0.9855
2022-04-15 10:53:50,820 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9792763157894737
2022-04-15 10:53:59,134 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 390 -> Train Loss: 0.0794, Train Acc: 0.9858
2022-04-15 10:53:59,930 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9793585526315789
2022-04-15 10:54:08,237 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 420 -> Train Loss: 0.0788, Train Acc: 0.9860
2022-04-15 10:54:09,073 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Validatation Accuracy:0.9792763157894737
2022-04-15 10:54:17,280 - wb-bert-fpt-mlm - INFO - [Epoch 2/5] Iteration 450 -> Train Loss: 0.0785, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:55:12,573 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 630 -> Train Loss: 0.0698, Train Acc: 0.9870
2022-04-15 10:55:13,336 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9795230263157895
2022-04-15 10:55:21,662 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 660 -> Train Loss: 0.0531, Train Acc: 0.9899
2022-04-15 10:55:22,472 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9796052631578948
2022-04-15 10:55:30,793 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 690 -> Train Loss: 0.0528, Train Acc: 0.9900
2022-04-15 10:55:31,599 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9796052631578948
2022-04-15 10:55:39,905 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 720 -> Train Loss: 0.0545, Train Acc: 0.9897
2022-04-15 10:55:40,736 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Validatation Accuracy:0.9789473684210527
2022-04-15 10:55:49,062 - wb-bert-fpt-mlm - INFO - [Epoch 3/5] Iteration 750 -> Train Loss: 0.0558, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:56:44,196 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 930 -> Train Loss: 0.0494, Train Acc: 0.9907
2022-04-15 10:56:44,951 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9793585526315789
2022-04-15 10:56:53,291 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 960 -> Train Loss: 0.0412, Train Acc: 0.9922
2022-04-15 10:56:54,082 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9795230263157895
2022-04-15 10:57:02,363 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 990 -> Train Loss: 0.0402, Train Acc: 0.9922
2022-04-15 10:57:03,149 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9792763157894737
2022-04-15 10:57:11,419 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 1020 -> Train Loss: 0.0405, Train Acc: 0.9921
2022-04-15 10:57:12,192 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Validatation Accuracy:0.9794407894736842
2022-04-15 10:57:20,495 - wb-bert-fpt-mlm - INFO - [Epoch 4/5] Iteration 1050 -> Train Loss: 0.0405, Train Acc:

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 10:58:15,473 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1230 -> Train Loss: 0.0420, Train Acc: 0.9916
2022-04-15 10:58:16,227 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9793585526315789
2022-04-15 10:58:24,543 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1260 -> Train Loss: 0.0337, Train Acc: 0.9937
2022-04-15 10:58:25,332 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9795230263157895
2022-04-15 10:58:33,567 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1290 -> Train Loss: 0.0329, Train Acc: 0.9940
2022-04-15 10:58:34,338 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9795230263157895
2022-04-15 10:58:42,708 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1320 -> Train Loss: 0.0320, Train Acc: 0.9940
2022-04-15 10:58:43,502 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Validatation Accuracy:0.9793585526315789
2022-04-15 10:58:51,802 - wb-bert-fpt-mlm - INFO - [Epoch 5/5] Iteration 1350 -> Train Loss: 0.0345, Train A




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Loss,█▆▅▅▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▇▇▆▇▆▆▆▇▇▇▇▇▇▇▇████████
val_accuracy,▁▄▆▆▇█▇▇████▇▇▇███▇████████▇████████████

0,1
Loss,0.03112
train_accuracy,0.994
val_accuracy,0.97969


[34m[1mwandb[0m: Agent Starting Run: cryuteb9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 3.103372875261932e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


2022-04-15 11:00:04,055 - bwpdataset - INFO - ==>[Start] cached file read: ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt


CLSid:101, SEPid:102, UNKid:100, PADid:0, MASKid:103
*corpus:../korpora/kowiki_20190620/wiki_20190620_small.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103


2022-04-15 11:00:04,576 - bwpdataset - INFO - <==[End] Loading features from cached file ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt [took 0.520 s]


*corpus:../korpora/kowiki_20190620/wiki_eval_test.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103
*total_line: 114


  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:00:16,812 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 30 -> Train Loss: 0.2218, Train Acc: 0.9732
2022-04-15 11:00:17,626 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9722039473684211
2022-04-15 11:00:25,920 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 60 -> Train Loss: 0.1566, Train Acc: 0.9774
2022-04-15 11:00:26,711 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9752467105263158
2022-04-15 11:00:35,016 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 90 -> Train Loss: 0.1395, Train Acc: 0.9785
2022-04-15 11:00:35,848 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9768092105263158
2022-04-15 11:00:44,176 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 120 -> Train Loss: 0.1342, Train Acc: 0.9792
2022-04-15 11:00:45,029 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Validatation Accuracy:0.9770559210526316
2022-04-15 11:00:53,328 - wb-bert-fpt-mlm - INFO - [Epoch 1/3] Iteration 150 -> Train Loss: 0.1259, Train Acc: 0.98

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:01:48,337 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 330 -> Train Loss: 0.0905, Train Acc: 0.9842
2022-04-15 11:01:49,119 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9787828947368421
2022-04-15 11:01:57,312 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 360 -> Train Loss: 0.0842, Train Acc: 0.9853
2022-04-15 11:01:58,072 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9787828947368421
2022-04-15 11:02:06,357 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 390 -> Train Loss: 0.0865, Train Acc: 0.9849
2022-04-15 11:02:07,131 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9785361842105263
2022-04-15 11:02:15,410 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 420 -> Train Loss: 0.0813, Train Acc: 0.9857
2022-04-15 11:02:16,178 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Validatation Accuracy:0.9784539473684211
2022-04-15 11:02:24,492 - wb-bert-fpt-mlm - INFO - [Epoch 2/3] Iteration 450 -> Train Loss: 0.0835, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:03:19,387 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 630 -> Train Loss: 0.0777, Train Acc: 0.9864
2022-04-15 11:03:20,173 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9782894736842105
2022-04-15 11:03:28,112 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 660 -> Train Loss: 0.0683, Train Acc: 0.9875
2022-04-15 11:03:28,919 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9788651315789474
2022-04-15 11:03:37,243 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 690 -> Train Loss: 0.0659, Train Acc: 0.9880
2022-04-15 11:03:38,011 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9787828947368421
2022-04-15 11:03:46,312 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 720 -> Train Loss: 0.0660, Train Acc: 0.9878
2022-04-15 11:03:47,108 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Validatation Accuracy:0.9787006578947368
2022-04-15 11:03:55,421 - wb-bert-fpt-mlm - INFO - [Epoch 3/3] Iteration 750 -> Train Loss: 0.0665, Train Acc: 0




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Loss,█▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▃▄▄▅▅▅▅▅▆▇▆▇▇▇▆▇▇▇▇█████████
val_accuracy,▁▄▆▆▆▇█▇▇█████████▇█▇███▇█████

0,1
Loss,0.0692
train_accuracy,0.98758
val_accuracy,0.9787


[34m[1mwandb[0m: Agent Starting Run: n0ywhxy7 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	learning_rate: 2.8709963639692357e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


2022-04-15 11:05:02,856 - bwpdataset - INFO - ==>[Start] cached file read: ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt


CLSid:101, SEPid:102, UNKid:100, PADid:0, MASKid:103
*corpus:../korpora/kowiki_20190620/wiki_20190620_small.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103


2022-04-15 11:05:03,069 - bwpdataset - INFO - <==[End] Loading features from cached file ../korpora/kowiki_20190620/cached_lm_BertTokenizer_128_wiki_20190620_small.txt [took 0.212 s]


*corpus:../korpora/kowiki_20190620/wiki_eval_test.txt
*max_sequence_len:128
*mlm_probability:0.15
*CLStokenid:101, SEPtokenid:102, UNKtokenid:100, PADtokeinid:0, Masktokeid:103
*total_line: 114


  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:05:15,623 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Iteration 30 -> Train Loss: 0.2201, Train Acc: 0.9734
2022-04-15 11:05:16,435 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Validatation Accuracy:0.9700657894736842
2022-04-15 11:05:24,757 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Iteration 60 -> Train Loss: 0.1652, Train Acc: 0.9770
2022-04-15 11:05:25,665 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Validatation Accuracy:0.9736842105263158
2022-04-15 11:05:33,983 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Iteration 90 -> Train Loss: 0.1417, Train Acc: 0.9790
2022-04-15 11:05:34,763 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Validatation Accuracy:0.9751644736842106
2022-04-15 11:05:42,931 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Iteration 120 -> Train Loss: 0.1318, Train Acc: 0.9796
2022-04-15 11:05:43,689 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Validatation Accuracy:0.9754934210526316
2022-04-15 11:05:51,975 - wb-bert-fpt-mlm - INFO - [Epoch 1/4] Iteration 150 -> Train Loss: 0.1221, Train Acc: 0.98

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:06:46,907 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Iteration 330 -> Train Loss: 0.0981, Train Acc: 0.9831
2022-04-15 11:06:47,685 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Validatation Accuracy:0.9768092105263158
2022-04-15 11:06:55,996 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Iteration 360 -> Train Loss: 0.0860, Train Acc: 0.9851
2022-04-15 11:06:56,771 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Validatation Accuracy:0.9764802631578947
2022-04-15 11:07:05,066 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Iteration 390 -> Train Loss: 0.0908, Train Acc: 0.9849
2022-04-15 11:07:05,827 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Validatation Accuracy:0.9767269736842106
2022-04-15 11:07:14,143 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Iteration 420 -> Train Loss: 0.0853, Train Acc: 0.9851
2022-04-15 11:07:14,930 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Validatation Accuracy:0.9766447368421053
2022-04-15 11:07:23,119 - wb-bert-fpt-mlm - INFO - [Epoch 2/4] Iteration 450 -> Train Loss: 0.0862, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:08:17,871 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Iteration 630 -> Train Loss: 0.0722, Train Acc: 0.9867
2022-04-15 11:08:18,652 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Validatation Accuracy:0.9772203947368421
2022-04-15 11:08:26,803 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Iteration 660 -> Train Loss: 0.0676, Train Acc: 0.9877
2022-04-15 11:08:27,577 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Validatation Accuracy:0.9773848684210527
2022-04-15 11:08:35,914 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Iteration 690 -> Train Loss: 0.0640, Train Acc: 0.9885
2022-04-15 11:08:36,740 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Validatation Accuracy:0.9771381578947368
2022-04-15 11:08:45,022 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Iteration 720 -> Train Loss: 0.0679, Train Acc: 0.9875
2022-04-15 11:08:45,791 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Validatation Accuracy:0.9770559210526316
2022-04-15 11:08:54,095 - wb-bert-fpt-mlm - INFO - [Epoch 3/4] Iteration 750 -> Train Loss: 0.0654, Train Acc: 0

  0%|          | 0/307 [00:00<?, ?it/s]

2022-04-15 11:09:48,892 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Iteration 930 -> Train Loss: 0.0633, Train Acc: 0.9882
2022-04-15 11:09:49,686 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Validatation Accuracy:0.9774671052631579
2022-04-15 11:09:57,990 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Iteration 960 -> Train Loss: 0.0567, Train Acc: 0.9893
2022-04-15 11:09:58,729 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Validatation Accuracy:0.9775493421052631
2022-04-15 11:10:07,011 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Iteration 990 -> Train Loss: 0.0521, Train Acc: 0.9904
2022-04-15 11:10:07,774 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Validatation Accuracy:0.9772203947368421
2022-04-15 11:10:16,028 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Iteration 1020 -> Train Loss: 0.0563, Train Acc: 0.9894
2022-04-15 11:10:16,781 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Validatation Accuracy:0.9774671052631579
2022-04-15 11:10:25,101 - wb-bert-fpt-mlm - INFO - [Epoch 4/4] Iteration 1050 -> Train Loss: 0.0565, Train Acc:




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Loss,█▆▅▄▄▄▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▂▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████▇█
val_accuracy,▁▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇▇▇▇██████████████

0,1
Loss,0.05523
train_accuracy,0.98931
val_accuracy,0.97747


In [11]:
'''
### 전체모델 저장
os.makedirs(OUTPATH, exist_ok=True)
#torch.save(model, OUTPATH + 'pytorch_model.bin') 
# save_pretrained 로 저장하면 config.json, pytorch_model.bin 2개의 파일이 생성됨
model.save_pretrained(OUTPATH)

# tokeinizer 파일 저장(vocab)
VOCAB_PATH = OUTPATH
tokenizer.save_pretrained(VOCAB_PATH)
'''

"\n### 전체모델 저장\nos.makedirs(OUTPATH, exist_ok=True)\n#torch.save(model, OUTPATH + 'pytorch_model.bin') \n# save_pretrained 로 저장하면 config.json, pytorch_model.bin 2개의 파일이 생성됨\nmodel.save_pretrained(OUTPATH)\n\n# tokeinizer 파일 저장(vocab)\nVOCAB_PATH = OUTPATH\ntokenizer.save_pretrained(VOCAB_PATH)\n"