## 시작

In [1]:
# ------ LIBRARY -------#
import numpy as np
import os
import pickle
import sys
import pandas as pd
import re
import cv2
# torch
import torch
import torch.cuda.amp as amp
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import *

import torch.nn as nn
import torch.nn.functional as F

from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau, MultiStepLR, OneCycleLR
#

import math
import torch
from torch.optim.optimizer import Optimizer, required
import torch_optimizer as optim
from collections import defaultdict
import itertools as it

import tqdm
import random
#import time
import matplotlib.pyplot as plt
from timeit import default_timer as timer
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import torch
import transformers

# transformer
from transformers import XLMPreTrainedModel, XLMRobertaModel, XLMRobertaConfig, XLMRobertaTokenizer
from transformers import XLMRobertaForSequenceClassification, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BertForSequenceClassification, DistilBertForSequenceClassification, XLNetForSequenceClassification,\
XLMRobertaForSequenceClassification, XLMForSequenceClassification, RobertaForSequenceClassification
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

In [2]:
# class args
class args:
    # ---- factor ---- #
    debug=False
    amp = True
    gpu = '1'
    
    epochs=10
    batch_size=64
    weight_decay=1e-6
    n_fold=5
    fold=3 # [0, 1, 2, 3, 4] # 원래는 3
    
    exp_name = 'experiment_name_folder'
    dir_ = f'./saved_models/'
    pt = 'klue/roberta-large' # ['klue/roberta-base','klue/roberta-small','klue/roberta-large', 'klue/roberta-large']
    
    max_len = 33
    
    start_lr = 1e-5#1e-3,5e-5
    min_lr=1e-6
    # ---- Dataset ---- #

    # ---- Else ---- #
    num_workers=8
    seed=2021
    scheduler = None#'get_linear_schedule_with_warmup'


data_dir = './'
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")

##----------------
def set_seeds(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # for faster training, but not deterministic

set_seeds(seed=args.seed)    


In [3]:
# - util - #
def get_learning_rate(optimizer):
    lr=[]
    for param_group in optimizer.param_groups:
        lr +=[ param_group['lr'] ]

    assert(len(lr)==1) #we support only one param_group
    lr = lr[0]

    return lr

# data processing

def load_data():
    train_df = []
    test_df = []

    # train
    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/1.Training/라벨링데이터/TL1/안전건설/안전건설_93747.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 0
    train_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/1.Training/라벨링데이터/TL1/교통/교통_85465.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 1
    train_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/1.Training/라벨링데이터/TL1/건축허가/건축허가_57256.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 2
    train_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/1.Training/라벨링데이터/TL1/환경미화/환경미화_38129.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 3
    train_df.append(df)

    # val
    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/2.Validation/라벨링데이터/VL1/안전건설/안전건설_11719.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 0
    test_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/2.Validation/라벨링데이터/VL1/교통/교통_10683.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 1
    test_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/2.Validation/라벨링데이터/VL1/건축허가/건축허가_7157.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 2
    test_df.append(df)

    #
    df = pd.DataFrame()
    df['contents'] = pd.read_json('./data/2.Validation/라벨링데이터/VL1/환경미화/환경미화_4766.json')['documents'].apply(lambda x: x['Q_refined'])
    df['label'] = 3
    test_df.append(df)
    
    
    train_df = pd.concat(train_df).reset_index(drop=True)
    test_df = pd.concat(test_df).reset_index(drop=True)
    
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    train_df['fold'] = -1
    for n_fold, (_,v_idx) in enumerate(skf.split(train_df, train_df['label'])):
        train_df.loc[v_idx, 'fold']  = n_fold
    train_df['id'] = [x for x in range(len(train_df))]
    
    
    return train_df, test_df



# 전처리

In [6]:
def bert_tokenizer(sent, MAX_LEN, tokenizer):
    
    encoded_dict=tokenizer.encode_plus(
    text = sent, 
    add_special_tokens=True, 
    max_length=MAX_LEN, 
    pad_to_max_length=True, 
    return_attention_mask=True,
    truncation = True)
    
    input_id=encoded_dict['input_ids']
    attention_mask=encoded_dict['attention_mask']
    #token_type_id = encoded_dict['token_type_ids']
    token_type_id = 0
    
    return input_id, attention_mask, token_type_id

def preprocessing(train, type='train'):
    
    pt = args.pt#'monologg/kobert'

    tokenizer = AutoTokenizer.from_pretrained(args.pt)
    
    MAX_LEN = args.max_len
#     train = pd.read_csv('./train_data.csv')
#     train=train[['title','topic_idx']]

    input_ids =[]
    attention_masks =[]
    token_type_ids =[]
    train_data_labels = []

    for train_sent, train_label in tqdm.tqdm(zip(train['contents'], train['label'])):
        try:
            input_id, attention_mask,_ = bert_tokenizer(train_sent, MAX_LEN=MAX_LEN, tokenizer=tokenizer)

            input_ids.append(input_id)
            attention_masks.append(attention_mask)
            token_type_ids.append(0)
            #########################################
            train_data_labels.append(train_label)

        except Exception as e:
            print(e)
            pass

    train_input_ids=np.array(input_ids, dtype=int)
    train_attention_masks=np.array(attention_masks, dtype=int)
    train_token_type_ids=np.array(token_type_ids, dtype=int)
    ###########################################################
    train_inputs=(train_input_ids, train_attention_masks, train_token_type_ids)
    train_labels=np.asarray(train_data_labels, dtype=np.int32)

    # save
    train_data = {}

    train_data['input_ids'] = train_input_ids
    train_data['attention_mask'] = train_attention_masks
    train_data['token_type_ids'] = train_token_type_ids
    train_data['targets'] = np.asarray(train_data_labels, dtype=np.int32)
    
    os.makedirs(f'./data/{pt}/', exist_ok=True)
    with open(f'./data/{pt}/{type}_data_{MAX_LEN}.pickle', 'wb') as f:
        pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)


In [7]:
# ['monologg/kobert','klue/roberta-base','klue/roberta-small','klue/roberta-large','xlm-roberta-large', 
#            'bert-base-multilingual-uncased', 'klue/roberta-large']


# train_df, test_df = load_data()
# preprocessing(train_df)
# preprocessing(test_df, type='test')

        


Downloading:   0%|          | 0.00/375 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/243k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/734k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/173 [00:00<?, ?B/s]

274597it [00:29, 9403.33it/s] 
34325it [00:03, 9864.71it/s] 


# models

In [4]:
# ------------------------
#  dataset
# ------------------------
class KobertDataSet(Dataset):
    
    def __init__(self, data, test=False):
        
        self.data = data
        self.test = test
        
    def __len__(self):
        
        return self.data['input_ids'].shape[0]
    
    def __getitem__(self,idx):
        
        ids = torch.tensor(self.data['input_ids'][idx], dtype=torch.long)
        mask = torch.tensor(self.data['attention_mask'][idx], dtype=torch.long)
        token_type_ids = torch.tensor(self.data['token_type_ids'][idx], dtype=torch.long)
         
            
        if self.test:
            return {
                'ids': ids,
                'mask': mask,
                'token_type_ids': token_type_ids
            }
        
        else:
            target = torch.tensor(self.data['targets'][idx],dtype=torch.long)

            return {
                    'ids': ids,
                    'mask': mask,
                    'token_type_ids': token_type_ids,
                    'targets': target
                }

# training

In [5]:
# ------------------------
#  scheduler
# ------------------------

def do_valid(net, valid_loader):

    val_loss = 0
    target_lst = []
    pred_lst = []
    logit = []
    loss_fn = nn.CrossEntropyLoss()

    net.eval()
    start_timer = timer()
    for t, data in enumerate(tqdm.tqdm(valid_loader)):
        ids  = data['ids'].to(device)
        mask  = data['mask'].to(device)
        tokentype = data['token_type_ids'].to(device)
        target = data['targets'].to(device)

        with torch.no_grad():
            if args.amp:
                with amp.autocast():
                    # output
                    output = net(ids, mask)
                    output = output[0]

                    # loss
                    loss = loss_fn(output, target)

            else:
                output = net(ids, mask)#.squeeze(0)
                loss = loss_fn(output, target)
            
            val_loss += loss
            target_lst.extend(target.detach().cpu().numpy())
            pred_lst.extend(output.argmax(dim=1).tolist())
            logit.extend(output.tolist())
            
        val_mean_loss = val_loss / len(valid_loader)
        validation_score = f1_score(y_true=target_lst, y_pred=pred_lst, average='macro')
        validation_acc = accuracy_score(y_true=target_lst, y_pred=pred_lst)
        

    return val_mean_loss, validation_score, validation_acc, logit

def do_predict(net, valid_loader):
    
    val_loss = 0
    pred_lst = []
    logit=[]
    net.eval()
    for t, data in enumerate(tqdm.tqdm(valid_loader)):
        ids  = data['ids'].to(device)
        mask  = data['mask'].to(device)
        tokentype = data['token_type_ids'].to(device)

        with torch.no_grad():
            if args.amp:
                with amp.autocast():
                    # output
                    output = net(ids, mask)[0]

            else:
                output = net(ids, mask)
             
            pred_lst.extend(output.argmax(dim=1).tolist())
            logit.extend(output.tolist())
            
    return pred_lst,logit

def run_train(folds=3):
    out_dir = args.dir_+ f'/fold{args.fold}/{args.exp_name}/'
    os.makedirs(out_dir, exist_ok=True)
    
    # load dataset
    train, test = load_data()    
    with open(f'./data/{args.pt}/train_data_{args.max_len}.pickle', 'rb') as f:
        train_data = pickle.load(f)
    with open(f'./data/{args.pt}/test_data_{args.max_len}.pickle', 'rb') as f:
        test_data = pickle.load(f)    
    
    # split fold
    for n_fold in range(5):
        if n_fold != folds:
            print(f'{n_fold} fold pass'+'\n')
            continue
            
        if args.debug:
            train = train.sample(1000).copy()
        
        trn_idx = train[train['fold']!=n_fold]['id'].values
        val_idx = train[train['fold']==n_fold]['id'].values
    

        train_dict = {'input_ids' : train_data['input_ids'][trn_idx] , 'attention_mask' : train_data['attention_mask'][trn_idx] , 
                      'token_type_ids' : train_data['token_type_ids'][trn_idx], 'targets' : train_data['targets'][trn_idx]}
#         val_dict = {'input_ids' : train_data['input_ids'][val_idx] , 'attention_mask' : train_data['attention_mask'][val_idx] , 
#                       'token_type_ids' : train_data['token_type_ids'][val_idx], 'targets' : train_data['targets'][val_idx]}
        val_dict = {'input_ids' : test_data['input_ids'] , 'attention_mask' : test_data['attention_mask'] , 
                      'token_type_ids' : test_data['token_type_ids'], 'targets' : test_data['targets']}
        ## dataset ------------------------------------
        train_dataset = KobertDataSet(data = train_dict)
        valid_dataset = KobertDataSet(data = val_dict)
        trainloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size,
                                 num_workers=8, shuffle=True, pin_memory=True)
        validloader = DataLoader(dataset=valid_dataset, batch_size=args.batch_size, 
                                 num_workers=8, shuffle=False, pin_memory=True)

        ## net ----------------------------------------
        scaler = amp.GradScaler()
#         if 'xlm-roberta' in args.pt:
#             net = XLMRobertaForSequenceClassification.from_pretrained(args.pt, num_labels = 7) 
        
#         elif 'klue/roberta' in args.pt:
#             net = RobertaForSequenceClassification.from_pretrained(args.pt, num_labels = 7) 
#         else:
#             net = BertForSequenceClassification.from_pretrained(args.pt, num_labels = 7) 
        net = AutoModelForSequenceClassification.from_pretrained(args.pt, num_labels = 4)
        net.to(device)
        if len(args.gpu)>1:
            net = nn.DataParallel(net)

        # ------------------------
        # loss
        # ------------------------
        loss_fn = nn.CrossEntropyLoss()

        # ------------------------
        #  Optimizer
        # ------------------------
        optimizer = optim.Lookahead(optim.RAdam(filter(lambda p: p.requires_grad,net.parameters()), lr=args.start_lr), alpha=0.5, k=5)

        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = len(trainloader)*args.epochs)
        
        
        # ----
        start_timer = timer()
        best_score = 0

        for epoch in range(1, args.epochs+1):
            train_loss = 0
            valid_loss = 0

            target_lst = []
            pred_lst = []
            lr = get_learning_rate(optimizer)
            print(f'-------------------')
            print(f'{epoch}epoch start')
            print(f'-------------------'+'\n')
            print(f'learning rate : {lr : .6f}')
            for t, data in enumerate(tqdm.tqdm(trainloader)):

                # one iteration update  -------------
                ids  = data['ids'].to(device)
                mask  = data['mask'].to(device)
                tokentype = data['token_type_ids'].to(device)
                target = data['targets'].to(device)

                # ------------
                net.train()
                optimizer.zero_grad()


                if args.amp:
                    with amp.autocast():
                        # output
                        output = net(ids, mask)
                        output = output[0]

                        # loss
                        loss = loss_fn(output, target)
                        train_loss += loss


                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()

                else:
                    # output
                    output = net(ids, mask)

                    # loss
                    loss = loss_fn(output, target)
                    train_loss += loss

                    # update
                    loss.backward()
                    optimizer.step()


                # for calculate f1 score
                target_lst.extend(target.detach().cpu().numpy())
                pred_lst.extend(output.argmax(dim=1).tolist())


                if scheduler is not None:
                    scheduler.step() 
            train_loss = train_loss / len(trainloader)
            train_score = f1_score(y_true=target_lst, y_pred=pred_lst, average='macro')
            train_acc = accuracy_score(y_true=target_lst, y_pred=pred_lst)

            # validation
            valid_loss, valid_score, valid_acc, _ = do_valid(net, validloader)


            if valid_acc > best_score:
                best_score = valid_acc
                best_epoch = epoch
                best_loss = valid_loss

                torch.save(net.state_dict(), out_dir + f'/{folds}f_{epoch}e_{best_score:.4f}_s.pth')
                print('best model saved'+'\n')


            print(f'train loss : {train_loss:.4f}, train f1 score : {train_score : .4f}, train acc : {train_acc : .4f}'+'\n')
            print(f'test loss : {valid_loss:.4f}, test f1 score : {valid_score : .4f}, test acc : {valid_acc : .4f}'+'\n')


        print(f'best test loss : {best_loss : .4f}'+'\n')
        print(f'best epoch : {best_epoch }'+'\n')
        print(f'best accuracy : {best_score : .4f}'+'\n')
        
def run_predict(model_path):
    ## dataset ------------------------------------
    # load
    with open(f'./data/{args.pt}/test_data_{args.max_len}.pickle', 'rb') as f:
        test_dict = pickle.load(f)
        
    print('test load')
    test_dataset = KobertDataSet(data = test_dict, test=True)
    testloader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, 
                             num_workers=8, shuffle=False, pin_memory=True)
    print('set testloader')
    ## net ----------------------------------------
    scaler = amp.GradScaler()
    net = AutoModelForSequenceClassification.from_pretrained(args.pt, num_labels = 4)

        
    net.to(device)
    
    if len(args.gpu)>1:
        net = nn.DataParallel(net)

    f = torch.load(model_path)
    net.load_state_dict(f, strict=True)  # True
    print('load saved models')
    # ------------------------
    # validation
    preds, logit = do_predict(net, testloader) #outputs
           
    print('complete predict')
    
    return preds, np.array(logit)
     

In [6]:
# ['monologg/kobert','klue/roberta-base','klue/roberta-small','klue/roberta-large','xlm-roberta-large', 
#            'bert-base-multilingual-uncased', 'klue/roberta-large']

"""5fold 전용"""
if __name__ == '__main__':
    for f in [0,1,2,3,4]:
        run_train(folds=f)

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'cl

-------------------
1epoch start
-------------------

learning rate :  0.000010


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:49<00:00,  6.48it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.27it/s]


best model saved

train loss : 0.4512, train f1 score :  0.8389, train acc :  0.8339

test loss : 0.3549, test f1 score :  0.8794, test acc :  0.8730

-------------------
2epoch start
-------------------

learning rate :  0.000009


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:53<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.36it/s]


best model saved

train loss : 0.3196, train f1 score :  0.8946, train acc :  0.8881

test loss : 0.2979, test f1 score :  0.9060, test acc :  0.9000

-------------------
3epoch start
-------------------

learning rate :  0.000008


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:51<00:00,  6.46it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.13it/s]


best model saved

train loss : 0.2433, train f1 score :  0.9223, train acc :  0.9164

test loss : 0.2495, test f1 score :  0.9224, test acc :  0.9163

-------------------
4epoch start
-------------------

learning rate :  0.000007


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.47it/s]


best model saved

train loss : 0.1834, train f1 score :  0.9428, train acc :  0.9379

test loss : 0.2025, test f1 score :  0.9389, test acc :  0.9337

-------------------
5epoch start
-------------------

learning rate :  0.000006


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.45it/s]


best model saved

train loss : 0.1396, train f1 score :  0.9570, train acc :  0.9530

test loss : 0.2028, test f1 score :  0.9402, test acc :  0.9347

-------------------
6epoch start
-------------------

learning rate :  0.000005


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.45it/s]


best model saved

train loss : 0.1097, train f1 score :  0.9668, train acc :  0.9636

test loss : 0.1801, test f1 score :  0.9490, test acc :  0.9462

-------------------
7epoch start
-------------------

learning rate :  0.000004


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.51it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.38it/s]


best model saved

train loss : 0.0866, train f1 score :  0.9739, train acc :  0.9714

test loss : 0.1676, test f1 score :  0.9553, test acc :  0.9520

-------------------
8epoch start
-------------------

learning rate :  0.000003


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.57it/s]


best model saved

train loss : 0.0701, train f1 score :  0.9786, train acc :  0.9764

test loss : 0.1667, test f1 score :  0.9561, test acc :  0.9532

-------------------
9epoch start
-------------------

learning rate :  0.000002


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.65it/s]


best model saved

train loss : 0.0588, train f1 score :  0.9822, train acc :  0.9803

test loss : 0.1601, test f1 score :  0.9594, test acc :  0.9567

-------------------
10epoch start
-------------------

learning rate :  0.000001


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.58it/s]


best model saved

train loss : 0.0503, train f1 score :  0.9848, train acc :  0.9832

test loss : 0.1597, test f1 score :  0.9603, test acc :  0.9578

best test loss :  0.1597

best epoch : 10

best accuracy :  0.9578

1 fold pass

2 fold pass

3 fold pass

4 fold pass

0 fold pass



Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'cl

-------------------
1epoch start
-------------------

learning rate :  0.000010


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:48<00:00,  6.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.35it/s]


best model saved

train loss : 0.4529, train f1 score :  0.8396, train acc :  0.8327

test loss : 0.3603, test f1 score :  0.8791, test acc :  0.8733

-------------------
2epoch start
-------------------

learning rate :  0.000009


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:48<00:00,  6.50it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.36it/s]


best model saved

train loss : 0.3215, train f1 score :  0.8942, train acc :  0.8877

test loss : 0.2927, test f1 score :  0.9048, test acc :  0.8987

-------------------
3epoch start
-------------------

learning rate :  0.000008


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:48<00:00,  6.50it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.42it/s]


best model saved

train loss : 0.2473, train f1 score :  0.9206, train acc :  0.9146

test loss : 0.2428, test f1 score :  0.9247, test acc :  0.9189

-------------------
4epoch start
-------------------

learning rate :  0.000007


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:49<00:00,  6.48it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.24it/s]


best model saved

train loss : 0.1875, train f1 score :  0.9417, train acc :  0.9364

test loss : 0.2106, test f1 score :  0.9364, test acc :  0.9314

-------------------
5epoch start
-------------------

learning rate :  0.000006


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:54<00:00,  6.42it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.45it/s]


best model saved

train loss : 0.1441, train f1 score :  0.9556, train acc :  0.9514

test loss : 0.1958, test f1 score :  0.9418, test acc :  0.9374

-------------------
6epoch start
-------------------

learning rate :  0.000005


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:53<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.46it/s]


best model saved

train loss : 0.1128, train f1 score :  0.9652, train acc :  0.9617

test loss : 0.1715, test f1 score :  0.9508, test acc :  0.9477

-------------------
7epoch start
-------------------

learning rate :  0.000004


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:50<00:00,  6.47it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.58it/s]


best model saved

train loss : 0.0898, train f1 score :  0.9723, train acc :  0.9696

test loss : 0.1631, test f1 score :  0.9542, test acc :  0.9517

-------------------
8epoch start
-------------------

learning rate :  0.000003


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:53<00:00,  6.43it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.37it/s]


best model saved

train loss : 0.0714, train f1 score :  0.9780, train acc :  0.9758

test loss : 0.1596, test f1 score :  0.9567, test acc :  0.9540

-------------------
9epoch start
-------------------

learning rate :  0.000002


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:49<00:00,  6.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.62it/s]


best model saved

train loss : 0.0592, train f1 score :  0.9816, train acc :  0.9797

test loss : 0.1555, test f1 score :  0.9591, test acc :  0.9567

-------------------
10epoch start
-------------------

learning rate :  0.000001


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.27it/s]


best model saved

train loss : 0.0512, train f1 score :  0.9845, train acc :  0.9828

test loss : 0.1539, test f1 score :  0.9606, test acc :  0.9584

best test loss :  0.1539

best epoch : 10

best accuracy :  0.9584

2 fold pass

3 fold pass

4 fold pass

0 fold pass

1 fold pass



Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'cl

-------------------
1epoch start
-------------------

learning rate :  0.000010


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.11it/s]


best model saved

train loss : 0.4545, train f1 score :  0.8375, train acc :  0.8318

test loss : 0.3526, test f1 score :  0.8834, test acc :  0.8772

-------------------
2epoch start
-------------------

learning rate :  0.000009


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.23it/s]


best model saved

train loss : 0.3227, train f1 score :  0.8937, train acc :  0.8871

test loss : 0.2912, test f1 score :  0.9050, test acc :  0.8983

-------------------
3epoch start
-------------------

learning rate :  0.000008


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:53<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.28it/s]


best model saved

train loss : 0.2473, train f1 score :  0.9202, train acc :  0.9142

test loss : 0.2395, test f1 score :  0.9237, test acc :  0.9180

-------------------
4epoch start
-------------------

learning rate :  0.000007


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.56it/s]


best model saved

train loss : 0.1874, train f1 score :  0.9406, train acc :  0.9354

test loss : 0.2163, test f1 score :  0.9342, test acc :  0.9294

-------------------
5epoch start
-------------------

learning rate :  0.000006


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.55it/s]


best model saved

train loss : 0.1413, train f1 score :  0.9559, train acc :  0.9518

test loss : 0.1794, test f1 score :  0.9475, test acc :  0.9434

-------------------
6epoch start
-------------------

learning rate :  0.000005


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:47<00:00,  6.51it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.62it/s]


best model saved

train loss : 0.1101, train f1 score :  0.9661, train acc :  0.9628

test loss : 0.1714, test f1 score :  0.9515, test acc :  0.9479

-------------------
7epoch start
-------------------

learning rate :  0.000004


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.41it/s]


best model saved

train loss : 0.0871, train f1 score :  0.9734, train acc :  0.9707

test loss : 0.1646, test f1 score :  0.9565, test acc :  0.9537

-------------------
8epoch start
-------------------

learning rate :  0.000003


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:47<00:00,  6.51it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.47it/s]


best model saved

train loss : 0.0703, train f1 score :  0.9785, train acc :  0.9763

test loss : 0.1590, test f1 score :  0.9582, test acc :  0.9555

-------------------
9epoch start
-------------------

learning rate :  0.000002


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:46<00:00,  6.52it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.62it/s]


best model saved

train loss : 0.0580, train f1 score :  0.9823, train acc :  0.9804

test loss : 0.1567, test f1 score :  0.9590, test acc :  0.9569

-------------------
10epoch start
-------------------

learning rate :  0.000001


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.46it/s]


best model saved

train loss : 0.0502, train f1 score :  0.9846, train acc :  0.9831

test loss : 0.1556, test f1 score :  0.9610, test acc :  0.9588

best test loss :  0.1556

best epoch : 10

best accuracy :  0.9588

3 fold pass

4 fold pass

0 fold pass

1 fold pass

2 fold pass



Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'cl

-------------------
1epoch start
-------------------

learning rate :  0.000010


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.06it/s]


best model saved

train loss : 0.4591, train f1 score :  0.8352, train acc :  0.8295

test loss : 0.3542, test f1 score :  0.8814, test acc :  0.8750

-------------------
2epoch start
-------------------

learning rate :  0.000009


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:53<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.21it/s]


best model saved

train loss : 0.3216, train f1 score :  0.8937, train acc :  0.8871

test loss : 0.2980, test f1 score :  0.9034, test acc :  0.8957

-------------------
3epoch start
-------------------

learning rate :  0.000008


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.21it/s]


best model saved

train loss : 0.2479, train f1 score :  0.9208, train acc :  0.9147

test loss : 0.2444, test f1 score :  0.9228, test acc :  0.9175

-------------------
4epoch start
-------------------

learning rate :  0.000007


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:51<00:00,  6.46it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.17it/s]


best model saved

train loss : 0.1897, train f1 score :  0.9412, train acc :  0.9361

test loss : 0.2075, test f1 score :  0.9370, test acc :  0.9322

-------------------
5epoch start
-------------------

learning rate :  0.000006


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.52it/s]


best model saved

train loss : 0.1462, train f1 score :  0.9548, train acc :  0.9505

test loss : 0.1873, test f1 score :  0.9441, test acc :  0.9400

-------------------
6epoch start
-------------------

learning rate :  0.000005


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.44it/s]


best model saved

train loss : 0.1157, train f1 score :  0.9648, train acc :  0.9613

test loss : 0.1846, test f1 score :  0.9456, test acc :  0.9431

-------------------
7epoch start
-------------------

learning rate :  0.000004


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:51<00:00,  6.46it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:33<00:00, 16.25it/s]


best model saved

train loss : 0.0928, train f1 score :  0.9718, train acc :  0.9692

test loss : 0.1675, test f1 score :  0.9534, test acc :  0.9506

-------------------
8epoch start
-------------------

learning rate :  0.000003


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:51<00:00,  6.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.40it/s]


best model saved

train loss : 0.0753, train f1 score :  0.9775, train acc :  0.9753

test loss : 0.1579, test f1 score :  0.9568, test acc :  0.9539

-------------------
9epoch start
-------------------

learning rate :  0.000002


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:52<00:00,  6.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.57it/s]


best model saved

train loss : 0.0627, train f1 score :  0.9809, train acc :  0.9791

test loss : 0.1589, test f1 score :  0.9589, test acc :  0.9562

-------------------
10epoch start
-------------------

learning rate :  0.000001


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.57it/s]


best model saved

train loss : 0.0539, train f1 score :  0.9836, train acc :  0.9822

test loss : 0.1573, test f1 score :  0.9597, test acc :  0.9574

best test loss :  0.1573

best epoch : 10

best accuracy :  0.9574

4 fold pass

0 fold pass

1 fold pass

2 fold pass

3 fold pass



Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'cl

-------------------
1epoch start
-------------------

learning rate :  0.000010


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.35it/s]


best model saved

train loss : 0.4568, train f1 score :  0.8344, train acc :  0.8296

test loss : 0.3532, test f1 score :  0.8824, test acc :  0.8761

-------------------
2epoch start
-------------------

learning rate :  0.000009


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.49it/s]


best model saved

train loss : 0.3203, train f1 score :  0.8943, train acc :  0.8876

test loss : 0.2974, test f1 score :  0.9044, test acc :  0.8974

-------------------
3epoch start
-------------------

learning rate :  0.000008


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.40it/s]


best model saved

train loss : 0.2443, train f1 score :  0.9218, train acc :  0.9156

test loss : 0.2420, test f1 score :  0.9235, test acc :  0.9179

-------------------
4epoch start
-------------------

learning rate :  0.000007


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.56it/s]


best model saved

train loss : 0.1841, train f1 score :  0.9417, train acc :  0.9368

test loss : 0.2083, test f1 score :  0.9389, test acc :  0.9344

-------------------
5epoch start
-------------------

learning rate :  0.000006


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.42it/s]


best model saved

train loss : 0.1393, train f1 score :  0.9570, train acc :  0.9530

test loss : 0.1863, test f1 score :  0.9464, test acc :  0.9426

-------------------
6epoch start
-------------------

learning rate :  0.000005


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.65it/s]


best model saved

train loss : 0.1077, train f1 score :  0.9671, train acc :  0.9639

test loss : 0.1724, test f1 score :  0.9524, test acc :  0.9496

-------------------
7epoch start
-------------------

learning rate :  0.000004


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.49it/s]


best model saved

train loss : 0.0861, train f1 score :  0.9738, train acc :  0.9712

test loss : 0.1566, test f1 score :  0.9563, test acc :  0.9536

-------------------
8epoch start
-------------------

learning rate :  0.000003


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:45<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.59it/s]


best model saved

train loss : 0.0698, train f1 score :  0.9786, train acc :  0.9766

test loss : 0.1565, test f1 score :  0.9580, test acc :  0.9559

-------------------
9epoch start
-------------------

learning rate :  0.000002


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.68it/s]


best model saved

train loss : 0.0573, train f1 score :  0.9824, train acc :  0.9806

test loss : 0.1548, test f1 score :  0.9591, test acc :  0.9575

-------------------
10epoch start
-------------------

learning rate :  0.000001


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3433/3433 [08:44<00:00,  6.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:32<00:00, 16.54it/s]


best model saved

train loss : 0.0488, train f1 score :  0.9849, train acc :  0.9833

test loss : 0.1524, test f1 score :  0.9610, test acc :  0.9592

best test loss :  0.1524

best epoch : 10

best accuracy :  0.9592



# ensemble & test

In [9]:
_, test_df = load_data()

In [13]:
"""
pretrain 다운로드:
https://drive.google.com/drive/folders/1cqwv4OQtjCQFfMQkaiO6WYmcHLngg-B5?usp=share_link
"""

def ensemble():
    final_logit=0
    final_logit2=0
    
#     args.max_len=33
#     args.pt = 'klue/roberta-base'
#     _, logit1 = run_predict("./saved_models/fold3/experiment_name_folder/0f_10e_0.9323_s.pth")
#     _, logit2 = run_predict("./saved_models/fold3/experiment_name_folder/1f_10e_0.9321_s.pth")
#     _, logit3 = run_predict("./saved_models/fold3/experiment_name_folder/2f_10e_0.9333_s.pth")
#     _, logit4 = run_predict("./saved_models/fold3/experiment_name_folder/3f_10e_0.9325_s.pth")
#     _, logit5 = run_predict("./saved_models/fold3/experiment_name_folder/4f_10e_0.9321_s.pth")
    
#     final_logit += logit1/5
#     final_logit += logit2/5
#     final_logit += logit3/5
#     final_logit += logit4/5
#     final_logit += logit5/5
    
    args.pt = 'klue/roberta-large'
    _, logit1 = run_predict("./saved_models/fold3/experiment_name_folder/0f_10e_0.9578_s.pth")
    _, logit2 = run_predict("./saved_models/fold3/experiment_name_folder/1f_10e_0.9584_s.pth")
    _, logit3 = run_predict("./saved_models/fold3/experiment_name_folder/2f_10e_0.9588_s.pth")
    _, logit4 = run_predict("./saved_models/fold3/experiment_name_folder/3f_10e_0.9574_s.pth")
    _, logit5 = run_predict("./saved_models/fold3/experiment_name_folder/4f_10e_0.9592_s.pth")
    
    final_logit2 += logit1/5
    final_logit2 += logit2/5
    final_logit2 += logit3/5
    final_logit2 += logit4/5
    final_logit2 += logit5/5
    
    return final_logit2
    
    #return final_logit, final_logit2


In [14]:
final_logit = ensemble()

test load
set testloader


Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifie

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:08<00:00, 63.56it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifie

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:08<00:00, 63.09it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifie

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:08<00:00, 62.45it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifie

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:08<00:00, 63.16it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifie

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:08<00:00, 63.41it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:21<00:00, 24.47it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:21<00:00, 24.53it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:22<00:00, 24.36it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:21<00:00, 24.56it/s]


complete predict
test load
set testloader


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

load saved models


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 537/537 [00:21<00:00, 24.53it/s]

complete predict





In [15]:
test_df['preds'] = final_logit.argmax(1)

In [26]:
test_df['preds'] = final_logit.argmax(1)
acc = accuracy_score(test_df['preds'], test_df['label'])
print(f'final acc : {acc:.4f}')

final acc : 0.9685


In [16]:
test_df

Unnamed: 0,contents,label,preds
0,놀이기구 부식이 심해 제보합니다. 위치는 의창구 #@주소#이고 조치 좀 해주세요.,0,0
1,어린이 학원 앞의 도로가 패여 있습니다. 조속한 보수를 해주시기 바랍니다.,0,0
2,창원시에서 평탄하지 못한 인도로 인하여 보행자 안전사고의 위험이 있습니다. 현장점검...,0,0
3,차가 다니는 골목길에 주차금지 장애물이 합법인지 문의합니다.,0,0
4,아파트 뒤편 도로 보수 공사를 요청합니다. 자칫하다가는 큰 사고로 이어질 수 있으니...,0,0
...,...,...,...
34320,진해구 돌아다니고 있는 야생멧돼지를 발견하고 신고 문의 드립니다.,3,3
34321,의창구 #@주소#에 있는 뉴트리아들 포획 부탁드려요.,3,3
34322,"문의합니다. 마산합포구 #@주소#의 화장실이 개방형 #@주소#,1, 2층 남녀 화장...",3,3
34323,성산구의 생활폐기물매립장 연락처 문의합니다.,3,3
