In [1]:
# NLI(Natural Language Interference:자연어 추론) 훈련 예제
#
# => input_ids : [CLS]senetence1(전제)[SEP]sentence2(가설)
# => attention_mask : 1111111111(전체,가설)0000000(그외)
# => token_type_ids : 0000000(전제)1111111(가설)00000000(그외)
# => laels : 참(수반:entailment), 거짓(모순:contradiction), 모름(중립:neutral)

import numpy as np
import pandas as pd
import torch
import os
import torch.nn.functional as F

from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, DistilBertConfig, DistilBertModel, AdamW, get_linear_schedule_with_warmup

import sys
sys.path.append("../../")
from myutils import seed_everything, GPU_info, mlogging
from tqdm.notebook import tqdm

logger = mlogging(loggername="distilbertfttrain", logfilename="distilbertftmultitrain")
device = GPU_info()
seed_everything(111)

logfilepath:bwdataset_2022-03-30.log
logfilepath:qnadataset_2022-03-30.log
logfilepath:distilbertftmultitrain_2022-03-30.log
True
device: cuda:0
cuda index: 0
gpu 개수: 1
graphic name: NVIDIA A30


In [2]:
#############################################################################################
# 변수들 설정
# - model_path : from_pretrained() 로 호출하는 경우에는 모델파일이 있는 폴더 경로나 
#          huggingface에 등록된 모델명(예:'bert-base-multilingual-cased')
#          torch.load(model)로 로딩하는 경우에는 모델 파일 풀 경로
#
# - vocab_path : from_pretrained() 호출하는 경우에는 모델파일이 있는 폴더 경로나
#          huggingface에 등록된 모델명(예:'bert-base-multilingual-cased')   
#          BertTokenizer() 로 호출하는 경우에는 vocab.txt 파일 풀 경로,
#
# - OUTPATH : 출력 모델, vocab 저장할 폴더 경로
#############################################################################################

model_path = '../../model/distilbert/distilbert-base-multilingual-cased'
vocab_path = '../../model/distilbert/distilbert-base-multilingual-cased'
OUTPATH = '../../model/distilbert/distilbert-base-multilingual-cased-nli-0330'

# tokeniaer 및 model 설정
#tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# strip_accents=False : True로 하면, 가자 => ㄱ ㅏ ㅈ ㅏ 식으로 토큰화 되어 버림(*따라서 한국어에서는 반드시 False)
# do_lower_case=False : # 소문자 입력 사용 안함(한국어에서는 반드시 False)
tokenizer = DistilBertTokenizer.from_pretrained(vocab_path, strip_accents=False, do_lower_case=False) 
                        
# NLI 모델에서 레벨은 3개지(참,거짓,모름) 이므로, num_labels=3을 입력함
model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=3)
#model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=6)

# 레벨을 멀티로 선택해야 하는 경우
#model = BertForSequenceClassification.from_pretrained(model_path, problem_type="multi_label_classification",num_labels=6)
                   
#기존 모델 파일을 로딩하는 경우    
#model = torch.load(model_path) 

model.to(device)

Some weights of the model checkpoint at ../../model/distilbert/distilbert-base-multilingual-cased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at ../../model/distilbert/distilbert-base-multilingual-cased and are newly i

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
      

In [3]:
model.num_parameters()

135326979

In [4]:
# 학습 data loader 생성
sys.path.append('..')
from myutils import ClassificationDataset, KlueNLICorpus, data_collator
from torch.utils.data import DataLoader, RandomSampler

#############################################################################
# 변수 설정
#############################################################################
max_seq_len = 128   # 글자 최대 토큰 길이 해당 토큰 길이 이상은 잘린다.
batch_size = 16        # 배치 사이즈(64면 GUP Memory 오류 나므로, 32 이하로 설정할것=>max_seq_length 를 줄이면, 64도 가능함)

# 훈련할 csv 파일
file_fpath = '../../korpora/klue-nli/klue-nli-v1.1_train.json'
#file_fpath = 'Korpora/nsmc/ratings_train.txt'
cache = True   # 캐쉬파일 생성할거면 True로 (True이면 loding할때 캐쉬파일있어도 이용안함)
#############################################################################

# corpus 파일 설정
corpus = KlueNLICorpus()

# 학습 dataset 생성
dataset = ClassificationDataset(file_fpath=file_fpath,max_seq_length=max_seq_len, tokenizer=tokenizer, corpus=corpus, overwrite_cache=cache)


# 학습 dataloader 생성
train_loader = DataLoader(dataset, 
                          batch_size=batch_size, 
                          #shuffle=True, # dataset을 섞음
                          sampler=RandomSampler(dataset, replacement=False), #dataset을 랜덤하게 샘플링함
                          collate_fn=data_collator, # dataset을 tensor로 변환(예시 {'input_ids':tensor[0,1,2,3,1,], 'token_type_id:tensor[0,0,0,0,0], 'attention_mask:tensor[1,1,1,1,1], 'labels':tensor[5]}
                          num_workers=4)

# 평가 dataset 생성
file_fpath = '../../korpora/klue-nli/klue-nli-v1.1_dev.json'
dataset = ClassificationDataset(file_fpath=file_fpath, max_seq_length=max_seq_len, tokenizer=tokenizer, corpus=corpus, overwrite_cache=cache)

# 평가 dataloader 생성
eval_loader = DataLoader(dataset, 
                          batch_size=batch_size, 
                          #shuffle=True, # dataset을 섞음
                          sampler=RandomSampler(dataset, replacement=False), #dataset을 랜덤하게 샘플링함
                          collate_fn=data_collator, # dataset을 tensor로 변환(예시 {'input_ids':tensor[0,1,2,3,1,], 'token_type_id:tensor[0,0,0,0,0], 'attention_mask:tensor[1,1,1,1,1], 'labels':tensor[5]}
                          num_workers=4)

print('train_loader_len: {}, eval_loader_len: {}'.format(len(train_loader), len(eval_loader)))

2022-03-30 14:27:00,247 - bwpdataset - INFO - Creating features from dataset file at ../../korpora/klue-nli/klue-nli-v1.1_train.json
2022-03-30 14:27:00,248 - bwpdataset - INFO - loading data... LOOKING AT ../../korpora/klue-nli/klue-nli-v1.1_train.json
2022-03-30 14:27:00,484 - bwpdataset - INFO - tokenize sentences, it could take a lot of time...
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
2022-03-30 14:27:07,297 - bwpdataset - INFO - tokenize sentences [took 6.812 s]


  0%|          | 0/24998 [00:00<?, ?it/s]

2022-03-30 14:27:07,489 - bwpdataset - INFO - *** Example ***
2022-03-30 14:27:07,490 - bwpdataset - INFO - sentence A, B: 힛걸 진심 최고다 그 어떤 히어로보다 멋지다 + 힛걸 진심 최고로 멋지다.
2022-03-30 14:27:07,491 - bwpdataset - INFO - tokens: [CLS] [UNK] 진 ##심 최고 ##다 그 어떤 히 ##어로 ##보다 멋 ##지 ##다 [SEP] [UNK] 진 ##심 최고 ##로 멋 ##지 ##다 . [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]
2022-03-30 14:27:07,491 - bwpdataset - INFO - label: entailment
2022

  0%|          | 0/3000 [00:00<?, ?it/s]

2022-03-30 14:27:09,628 - bwpdataset - INFO - *** Example ***
2022-03-30 14:27:09,629 - bwpdataset - INFO - sentence A, B: 흡연자분들은 발코니가 있는 방이면 발코니에서 흡연이 가능합니다. + 어떤 방에서도 흡연은 금지됩니다.
2022-03-30 14:27:09,630 - bwpdataset - INFO - tokens: [CLS] 흡 ##연 ##자 ##분 ##들은 발 ##코 ##니 ##가 있는 방 ##이 ##면 발 ##코 ##니 ##에서 흡 ##연 ##이 가 ##능 ##합 ##니다 . [SEP] 어떤 방 ##에서 ##도 흡 ##연 ##은 [UNK] . [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]
2022-03-30 14:27:09,630 - bwpdataset - INFO - label: contradiction
2022-03-30 14:2

train_loader_len: 1563, eval_loader_len: 188


In [5]:
# tokenier 테스트
print(len(tokenizer))
print(tokenizer.encode("눈에 보이는 반전이었지만 영화의 흡인력은 사라지지 않았다", "정말 재미있다"))
print(tokenizer.convert_ids_to_tokens(131027))
print(tokenizer.convert_tokens_to_ids('정말'))

119547
[101, 9034, 10530, 9356, 31728, 9321, 16617, 10739, 69708, 42428, 10459, 10020, 12030, 28143, 10892, 9405, 17342, 12508, 12508, 49137, 102, 9670, 89523, 9659, 22458, 76820, 102]
[UNK]
100


In [6]:
import time

logger.info(f"=== model: {model_path} ===")
logger.info(f"num_parameters: {model.num_parameters()}")

# 학습 시작

##################################################
# 변수 설정
##################################################
epochs = 10            # epochs
learning_rate = 2e-5  # 학습률
p_itr = 200           # 손실률 보여줄 step 수
##################################################

# optimizer 적용
optimizer = AdamW(model.parameters(), 
                 lr=learning_rate, 
                 eps=1e-8) # 0으로 나누는 것을 방지하기 위한 epsilon 값(10^-6 ~ 10^-8 사이 이값 입력합)

# 총 훈련과정에서 반복할 스탭
total_steps = len(train_loader)*epochs

num_warmup_steps = total_steps * 0.1

# 스캐줄러 생성
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=num_warmup_steps, 
                                            num_training_steps=total_steps)

itr = 1
total_loss = 0
total_len = 0
total_correct = 0
list_training_loss = []
list_acc_loss = []
list_validation_acc_loss = []

model.zero_grad()# 그래디언트 초기화
for epoch in tqdm(range(epochs)):

    model.train() # 훈련모드로 변환
    for data in tqdm(train_loader):
    
        #optimizer.zero_grad()
        model.zero_grad()# 그래디언트 초기화
        
        # 입력 값 설정
        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        labels = data['labels'].to(device)
        #print('Labels:{}'.format(labels))
        
        # 모델 실행
        outputs = model(input_ids=input_ids, 
                        attention_mask=attention_mask,
                        labels=labels)
        
        # 출력값 loss,logits를 outputs에서 얻어옴
        loss = outputs.loss
        logits = outputs.logits
        #print('Loss:{}, logits:{}'.format(loss, logits))
        
        # optimizer 과 scheduler 업데이트 시킴
        loss.backward()   # backward 구함
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)   # 그래디언트 클리핑 (gradient vanishing이나 gradient exploding 방지하기 위한 기법)
        optimizer.step()  # 가중치 파라미터 업데이트(optimizer 이동)
        scheduler.step()  # 학습률 감소
        
        # 정확도와 손실률 계산하는 부분은 no_grade 시켜서, 계산량을 줄임.
        # => torch.no_grad()는 gradient을 계산하는 autograd engine를 비활성화 하여 
        # 필요한 메모리를 줄이고, 연산속도를 증가시키는 역활을 함
        with torch.no_grad():
            # 정확도와 총 손실률 계산
            pred = torch.argmax(F.softmax(logits), dim=1)
            correct = pred.eq(labels)
            total_correct += correct.sum().item()
            total_len += len(labels)    
            total_loss += loss.item()
            #print('pred:{}, correct:{}'.format(pred, correct))

            # 주기마다 test(validataion) 데이터로 평가하여 손실류 계산함.
            if itr % p_itr == 0:

                logger.info('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Train Accuracy: {:.3f}'.format(epoch+1, epochs, itr, total_loss/p_itr, total_correct/total_len))

                list_training_loss.append(total_loss/p_itr)
                list_acc_loss.append(total_correct/total_len)

                total_loss = 0
                total_len = 0
                total_correct = 0

        itr+=1
        
        #if itr > 5:
        #    break
   
    ####################################################################
    # 1epochs 마다 실제 test(validattion)데이터로 평가 해봄
    # 평가 시작
    
    start = time.time()
    logger.info(f'---------------------------------------------------------')

    model.eval()
    
    total_test_correct = 0
    total_test_len = 0
    
    for data in tqdm(eval_loader):
        # 입력 값 설정
        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        labels = data['labels'].to(device)
 
        # 손실률 계산하는 부분은 no_grade 시켜서, 계산량을 줄임.
        # => torch.no_grad()는 gradient을 계산하는 autograd engine를 비활성화 하여 
        # 필요한 메모리를 줄이고, 연산속도를 증가시키는 역활을 함
        with torch.no_grad():
            # 모델 실행
            outputs = model(input_ids=input_ids, 
                            attention_mask=attention_mask,
                            labels=labels)
    
            # 출력값 loss,logits를 outputs에서 얻어옴
            #loss = outputs.loss
            logits = outputs.logits
    
            # 총 손실류 구함
            pred = torch.argmax(F.softmax(logits), dim=1)
            correct = pred.eq(labels)
            total_test_correct += correct.sum().item()
            total_test_len += len(labels)
    
    list_validation_acc_loss.append(total_test_correct/total_test_len)
    logger.info("[Epoch {}/{}] Validatation Accuracy:{}".format(epoch+1, epochs, total_test_correct / total_test_len))
    logger.info(f'---------------------------------------------------------')
    logger.info(f'=== 처리시간: {time.time() - start:.3f} 초 ===')
    logger.info(f'-END-\n')
    ####################################################################
    

2022-03-30 14:27:09,794 - distilbertfttrain - INFO - === model: ../../model/distilbert/distilbert-base-multilingual-cased ===
2022-03-30 14:27:09,796 - distilbertfttrain - INFO - num_parameters: 135326979


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1563 [00:00<?, ?it/s]

  pred = torch.argmax(F.softmax(logits), dim=1)
2022-03-30 14:27:20,787 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 200 -> Train Loss: 1.1029, Train Accuracy: 0.332
2022-03-30 14:27:30,462 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 400 -> Train Loss: 1.0981, Train Accuracy: 0.347
2022-03-30 14:27:40,146 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 600 -> Train Loss: 1.0570, Train Accuracy: 0.443
2022-03-30 14:27:49,700 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 800 -> Train Loss: 0.8936, Train Accuracy: 0.598
2022-03-30 14:27:59,264 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 1000 -> Train Loss: 0.8474, Train Accuracy: 0.632
2022-03-30 14:28:08,754 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 1200 -> Train Loss: 0.8418, Train Accuracy: 0.638
2022-03-30 14:28:18,284 - distilbertfttrain - INFO - [Epoch 1/10] Iteration 1400 -> Train Loss: 0.8151, Train Accuracy: 0.648
2022-03-30 14:28:26,155 - distilbertfttrain - INFO - ---------------------

  0%|          | 0/188 [00:00<?, ?it/s]

  pred = torch.argmax(F.softmax(logits), dim=1)
2022-03-30 14:28:28,630 - distilbertfttrain - INFO - [Epoch 1/10] Validatation Accuracy:0.6023333333333334
2022-03-30 14:28:28,631 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:28:28,632 - distilbertfttrain - INFO - === 처리시간: 2.477 초 ===
2022-03-30 14:28:28,633 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:28:30,749 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 1600 -> Train Loss: 0.8022, Train Accuracy: 0.661
2022-03-30 14:28:40,738 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 1800 -> Train Loss: 0.7643, Train Accuracy: 0.677
2022-03-30 14:28:50,140 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 2000 -> Train Loss: 0.7520, Train Accuracy: 0.695
2022-03-30 14:28:59,617 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 2200 -> Train Loss: 0.7564, Train Accuracy: 0.678
2022-03-30 14:29:08,928 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 2400 -> Train Loss: 0.7276, Train Accuracy: 0.700
2022-03-30 14:29:18,245 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 2600 -> Train Loss: 0.7247, Train Accuracy: 0.689
2022-03-30 14:29:27,475 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 2800 -> Train Loss: 0.7268, Train Accuracy: 0.694
2022-03-30 14:29:36,688 - distilbertfttrain - INFO - [Epoch 2/10] Iteration 3000 -> Train Loss: 0.6930, Train Accuracy

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:29:45,125 - distilbertfttrain - INFO - [Epoch 2/10] Validatation Accuracy:0.6353333333333333
2022-03-30 14:29:45,126 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:29:45,128 - distilbertfttrain - INFO - === 처리시간: 2.429 초 ===
2022-03-30 14:29:45,128 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:29:49,109 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 3200 -> Train Loss: 0.6660, Train Accuracy: 0.726
2022-03-30 14:29:58,387 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 3400 -> Train Loss: 0.5939, Train Accuracy: 0.757
2022-03-30 14:30:07,652 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 3600 -> Train Loss: 0.5799, Train Accuracy: 0.766
2022-03-30 14:30:19,469 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 3800 -> Train Loss: 0.5616, Train Accuracy: 0.783
2022-03-30 14:30:31,455 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 4000 -> Train Loss: 0.6302, Train Accuracy: 0.748
2022-03-30 14:30:41,707 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 4200 -> Train Loss: 0.5910, Train Accuracy: 0.761
2022-03-30 14:30:51,037 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 4400 -> Train Loss: 0.5823, Train Accuracy: 0.759
2022-03-30 14:31:00,426 - distilbertfttrain - INFO - [Epoch 3/10] Iteration 4600 -> Train Loss: 0.6088, Train Accuracy

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:31:06,954 - distilbertfttrain - INFO - [Epoch 3/10] Validatation Accuracy:0.6513333333333333
2022-03-30 14:31:06,956 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:31:06,957 - distilbertfttrain - INFO - === 처리시간: 2.400 초 ===
2022-03-30 14:31:06,959 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:31:12,586 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 4800 -> Train Loss: 0.5206, Train Accuracy: 0.798
2022-03-30 14:31:22,455 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 5000 -> Train Loss: 0.4728, Train Accuracy: 0.813
2022-03-30 14:31:31,732 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 5200 -> Train Loss: 0.4768, Train Accuracy: 0.822
2022-03-30 14:31:41,130 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 5400 -> Train Loss: 0.4718, Train Accuracy: 0.818
2022-03-30 14:31:50,195 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 5600 -> Train Loss: 0.4672, Train Accuracy: 0.816
2022-03-30 14:31:59,269 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 5800 -> Train Loss: 0.4608, Train Accuracy: 0.828
2022-03-30 14:32:08,217 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 6000 -> Train Loss: 0.4845, Train Accuracy: 0.812
2022-03-30 14:32:17,202 - distilbertfttrain - INFO - [Epoch 4/10] Iteration 6200 -> Train Loss: 0.4883, Train Accuracy

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:32:22,189 - distilbertfttrain - INFO - [Epoch 4/10] Validatation Accuracy:0.665
2022-03-30 14:32:22,191 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:32:22,193 - distilbertfttrain - INFO - === 처리시간: 2.529 초 ===
2022-03-30 14:32:22,194 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:32:29,610 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 6400 -> Train Loss: 0.3981, Train Accuracy: 0.851
2022-03-30 14:32:38,798 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 6600 -> Train Loss: 0.3409, Train Accuracy: 0.875
2022-03-30 14:32:47,739 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 6800 -> Train Loss: 0.3646, Train Accuracy: 0.865
2022-03-30 14:32:56,657 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 7000 -> Train Loss: 0.3843, Train Accuracy: 0.856
2022-03-30 14:33:05,592 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 7200 -> Train Loss: 0.3821, Train Accuracy: 0.853
2022-03-30 14:33:15,664 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 7400 -> Train Loss: 0.3833, Train Accuracy: 0.854
2022-03-30 14:33:26,446 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 7600 -> Train Loss: 0.3789, Train Accuracy: 0.854
2022-03-30 14:33:37,763 - distilbertfttrain - INFO - [Epoch 5/10] Iteration 7800 -> Train Loss: 0.3663, Train Accuracy

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:33:41,040 - distilbertfttrain - INFO - [Epoch 5/10] Validatation Accuracy:0.6563333333333333
2022-03-30 14:33:41,041 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:33:41,042 - distilbertfttrain - INFO - === 처리시간: 2.453 초 ===
2022-03-30 14:33:41,043 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:33:51,944 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 8000 -> Train Loss: 0.2791, Train Accuracy: 0.892
2022-03-30 14:34:03,101 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 8200 -> Train Loss: 0.2972, Train Accuracy: 0.893
2022-03-30 14:34:12,065 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 8400 -> Train Loss: 0.2825, Train Accuracy: 0.897
2022-03-30 14:34:21,489 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 8600 -> Train Loss: 0.3078, Train Accuracy: 0.891
2022-03-30 14:34:30,942 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 8800 -> Train Loss: 0.3038, Train Accuracy: 0.892
2022-03-30 14:34:40,467 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 9000 -> Train Loss: 0.2908, Train Accuracy: 0.890
2022-03-30 14:34:49,891 - distilbertfttrain - INFO - [Epoch 6/10] Iteration 9200 -> Train Loss: 0.2853, Train Accuracy: 0.898
2022-03-30 14:34:58,313 - distilbertfttrain - INFO - ---------------------------------------------------------


  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:35:00,802 - distilbertfttrain - INFO - [Epoch 6/10] Validatation Accuracy:0.655
2022-03-30 14:35:00,804 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:35:00,805 - distilbertfttrain - INFO - === 처리시간: 2.492 초 ===
2022-03-30 14:35:00,806 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:35:02,366 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 9400 -> Train Loss: 0.2887, Train Accuracy: 0.899
2022-03-30 14:35:11,547 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 9600 -> Train Loss: 0.2347, Train Accuracy: 0.924
2022-03-30 14:35:20,821 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 9800 -> Train Loss: 0.2235, Train Accuracy: 0.926
2022-03-30 14:35:30,502 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 10000 -> Train Loss: 0.2141, Train Accuracy: 0.918
2022-03-30 14:35:39,785 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 10200 -> Train Loss: 0.2162, Train Accuracy: 0.925
2022-03-30 14:35:49,010 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 10400 -> Train Loss: 0.2325, Train Accuracy: 0.923
2022-03-30 14:35:58,163 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 10600 -> Train Loss: 0.2382, Train Accuracy: 0.921
2022-03-30 14:36:07,168 - distilbertfttrain - INFO - [Epoch 7/10] Iteration 10800 -> Train Loss: 0.2190, Train Acc

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:36:16,316 - distilbertfttrain - INFO - [Epoch 7/10] Validatation Accuracy:0.6566666666666666
2022-03-30 14:36:16,318 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:36:16,319 - distilbertfttrain - INFO - === 처리시간: 2.433 초 ===
2022-03-30 14:36:16,320 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:36:19,730 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 11000 -> Train Loss: 0.2093, Train Accuracy: 0.930
2022-03-30 14:36:29,572 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 11200 -> Train Loss: 0.1818, Train Accuracy: 0.943
2022-03-30 14:36:39,080 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 11400 -> Train Loss: 0.1823, Train Accuracy: 0.941
2022-03-30 14:36:48,622 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 11600 -> Train Loss: 0.1992, Train Accuracy: 0.933
2022-03-30 14:36:57,937 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 11800 -> Train Loss: 0.1895, Train Accuracy: 0.938
2022-03-30 14:37:07,791 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 12000 -> Train Loss: 0.1957, Train Accuracy: 0.936
2022-03-30 14:37:17,725 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 12200 -> Train Loss: 0.1960, Train Accuracy: 0.941
2022-03-30 14:37:27,879 - distilbertfttrain - INFO - [Epoch 8/10] Iteration 12400 -> Train Loss: 0.1950, Train 

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:37:35,845 - distilbertfttrain - INFO - [Epoch 8/10] Validatation Accuracy:0.649
2022-03-30 14:37:35,847 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:37:35,848 - distilbertfttrain - INFO - === 처리시간: 2.463 초 ===
2022-03-30 14:37:35,849 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:37:40,893 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 12600 -> Train Loss: 0.1673, Train Accuracy: 0.951
2022-03-30 14:37:50,391 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 12800 -> Train Loss: 0.1683, Train Accuracy: 0.950
2022-03-30 14:37:59,670 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 13000 -> Train Loss: 0.1548, Train Accuracy: 0.954
2022-03-30 14:38:09,064 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 13200 -> Train Loss: 0.1648, Train Accuracy: 0.949
2022-03-30 14:38:18,345 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 13400 -> Train Loss: 0.1464, Train Accuracy: 0.954
2022-03-30 14:38:27,640 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 13600 -> Train Loss: 0.1666, Train Accuracy: 0.952
2022-03-30 14:38:37,063 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 13800 -> Train Loss: 0.1317, Train Accuracy: 0.959
2022-03-30 14:38:46,709 - distilbertfttrain - INFO - [Epoch 9/10] Iteration 14000 -> Train Loss: 0.1417, Train 

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:38:52,756 - distilbertfttrain - INFO - [Epoch 9/10] Validatation Accuracy:0.653
2022-03-30 14:38:52,758 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:38:52,760 - distilbertfttrain - INFO - === 처리시간: 2.568 초 ===
2022-03-30 14:38:52,761 - distilbertfttrain - INFO - -END-



  0%|          | 0/1563 [00:00<?, ?it/s]

2022-03-30 14:38:59,673 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 14200 -> Train Loss: 0.1397, Train Accuracy: 0.959
2022-03-30 14:39:09,337 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 14400 -> Train Loss: 0.1273, Train Accuracy: 0.963
2022-03-30 14:39:19,028 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 14600 -> Train Loss: 0.1147, Train Accuracy: 0.965
2022-03-30 14:39:28,871 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 14800 -> Train Loss: 0.1185, Train Accuracy: 0.964
2022-03-30 14:39:39,017 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 15000 -> Train Loss: 0.1303, Train Accuracy: 0.966
2022-03-30 14:39:48,886 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 15200 -> Train Loss: 0.1182, Train Accuracy: 0.968
2022-03-30 14:39:58,453 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 15400 -> Train Loss: 0.1150, Train Accuracy: 0.966
2022-03-30 14:40:08,081 - distilbertfttrain - INFO - [Epoch 10/10] Iteration 15600 -> Train Loss: 0.1263

  0%|          | 0/188 [00:00<?, ?it/s]

2022-03-30 14:40:12,188 - distilbertfttrain - INFO - [Epoch 10/10] Validatation Accuracy:0.6496666666666666
2022-03-30 14:40:12,190 - distilbertfttrain - INFO - ---------------------------------------------------------
2022-03-30 14:40:12,191 - distilbertfttrain - INFO - === 처리시간: 2.505 초 ===
2022-03-30 14:40:12,191 - distilbertfttrain - INFO - -END-



In [None]:
# 그래프로 loss 표기
#!pip install matplotlib
import matplotlib.pyplot as plt

plt.plot(list_training_loss, label='Train Loss')
plt.plot(list_acc_loss, label='Train Accuracy')
plt.legend()
plt.show()

In [None]:
# train loss와 Validatiaon acc 출력
plt.plot(list_training_loss, label='Train Loss')
plt.plot(list_validation_acc_loss, label='Validatiaon Accuracy')
plt.legend()
plt.show()

In [7]:
### 전체모델 저장
#OUTPATH = '../model/distilbert/distilbert-model-0317-distillation-best-nli'

os.makedirs(OUTPATH, exist_ok=True)
#torch.save(model, OUTPATH + 'pytorch_model.bin') 
model.save_pretrained(OUTPATH)  # save_pretrained 로 저장하면 config.json, pytorch_model.bin 2개의 파일이 생성됨

# tokeinizer 파일 저장
VOCAB_PATH = OUTPATH
os.makedirs(VOCAB_PATH, exist_ok=True)
tokenizer.save_pretrained(VOCAB_PATH)

('../../model/distilbert/distilbert-base-multilingual-cased-nli-0330/tokenizer_config.json',
 '../../model/distilbert/distilbert-base-multilingual-cased-nli-0330/special_tokens_map.json',
 '../../model/distilbert/distilbert-base-multilingual-cased-nli-0330/vocab.txt',
 '../../model/distilbert/distilbert-base-multilingual-cased-nli-0330/added_tokens.json')