# 데이콘 Basic 쇼핑몰 리뷰 평점 분류 경진대회

- StratifiedKFold 적용하여, fold 별 3 Epoch를 적용하여 PLM 파인튜닝으로 학습을 하고 Inference를 합니다.

## Google Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import

In [None]:
## print()할 때 컬러풀하게 해주는 라이브러리
!pip install colorama

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import re
import os
import gc
import time
import random
import string

import copy
from copy import deepcopy

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

# Utils
from tqdm import tqdm

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
y_ = Fore.YELLOW
sr_ = Style.RESET_ALL

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

## Config

In [None]:
## Pytorch Import
import torch 
import torch.nn as nn

from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

In [None]:
config ={
    'model': "beomi/KcELECTRA-base", 
    'learning_rate':5e-5,
    'seed': 2022,
    'output_dim': 4,
    'n_folds' : 8,
    'n_epochs': 3,
    "train_batch_size": 64, #2*64,
    "valid_batch_size": 128, #2*64,
    "max_length": 128,
    "scheduler": 'CosineAnnealingLR', 
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "n_accumulate": 1,
    
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

## Colab에서 할당된 GPU 확인

In [None]:
!nvidia-smi

Fri Aug  5 05:13:05 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    26W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Data

In [None]:
data_path = '/content/drive/MyDrive/Colab Notebooks/쇼핑몰 리뷰 평점 분류 경진대회/data/'

train = pd.read_csv(data_path + "train.csv")
test= pd.read_csv(data_path + "test.csv")
ss = pd.read_csv(data_path + "sample_submission.csv")

In [None]:
print("Train Shape: ", train.shape, "Test Shape: ", test.shape)
train.head()

Train Shape:  (25000, 3) Test Shape:  (25000, 2)


Unnamed: 0,id,reviews,target
0,0,조아요 처음구입 싸게햇어요,2
1,1,생각보다 잘 안돼요 매지 바른지 하루밖에 안됐는데ㅠㅠ 25천원가량 주고 사기 너무 ...,1
2,2,디자인은괜찮은데 상품이 금이가서 교환했는데 두번째받은상품도 까져있고 안쪽에 금이가져...,2
3,3,기전에 이 제품말고 이마트 트레이더스에서만 팔던 프리미엄 제품을 사용했었습니다. 샘...,2
4,4,튼튼하고 손목을 잘 받쳐주네요~,5


In [None]:
# Target Class
train.target.unique()

array([2, 1, 5, 4])

## Target Categorical Encoding 

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
train['new_target'] = encoder.fit_transform(train['target'])

In [None]:
train.head()

Unnamed: 0,id,reviews,target,new_target
0,0,조아요 처음구입 싸게햇어요,2,1
1,1,생각보다 잘 안돼요 매지 바른지 하루밖에 안됐는데ㅠㅠ 25천원가량 주고 사기 너무 ...,1,0
2,2,디자인은괜찮은데 상품이 금이가서 교환했는데 두번째받은상품도 까져있고 안쪽에 금이가져...,2,1
3,3,기전에 이 제품말고 이마트 트레이더스에서만 팔던 프리미엄 제품을 사용했었습니다. 샘...,2,1
4,4,튼튼하고 손목을 잘 받쳐주네요~,5,3


## Column 정리 및 Reset

In [None]:
print(train.shape)
train.head()

(25000, 4)


Unnamed: 0,id,reviews,target,new_target
0,0,조아요 처음구입 싸게햇어요,2,1
1,1,생각보다 잘 안돼요 매지 바른지 하루밖에 안됐는데ㅠㅠ 25천원가량 주고 사기 너무 ...,1,0
2,2,디자인은괜찮은데 상품이 금이가서 교환했는데 두번째받은상품도 까져있고 안쪽에 금이가져...,2,1
3,3,기전에 이 제품말고 이마트 트레이더스에서만 팔던 프리미엄 제품을 사용했었습니다. 샘...,2,1
4,4,튼튼하고 손목을 잘 받쳐주네요~,5,3


In [None]:
print(test.shape)
test.head()

(25000, 2)


Unnamed: 0,id,reviews
0,0,채소가 약간 시들어 있어요
1,1,발톱 두껍고 단단한 분들 써도 소용없어요 이 테이프 물렁거리고 힘이없어서 들어 올리...
2,2,부들부들 좋네요 입어보고 시원하면 또 살게요
3,3,이런 1. 8 골드 주라니깐 파란개 오네 회사전화걸어도 받지도 않고 머하자는거임?
4,4,검수도 없이 보내구 불량 배송비 5000원 청구하네요 완전별로 별하나도 아까워요


In [None]:
train.drop(['id', 'target'], axis = 1, inplace = True) # Drop 
train.columns = ['reviews', 'target'] # rename columns
train.head()

Unnamed: 0,reviews,target
0,조아요 처음구입 싸게햇어요,1
1,생각보다 잘 안돼요 매지 바른지 하루밖에 안됐는데ㅠㅠ 25천원가량 주고 사기 너무 ...,0
2,디자인은괜찮은데 상품이 금이가서 교환했는데 두번째받은상품도 까져있고 안쪽에 금이가져...,1
3,기전에 이 제품말고 이마트 트레이더스에서만 팔던 프리미엄 제품을 사용했었습니다. 샘...,1
4,튼튼하고 손목을 잘 받쳐주네요~,3


In [None]:
test.drop(['id',], axis = 1, inplace = True) # Drop 
test.columns = ['reviews'] # rename columns
test.head()

Unnamed: 0,reviews
0,채소가 약간 시들어 있어요
1,발톱 두껍고 단단한 분들 써도 소용없어요 이 테이프 물렁거리고 힘이없어서 들어 올리...
2,부들부들 좋네요 입어보고 시원하면 또 살게요
3,이런 1. 8 골드 주라니깐 파란개 오네 회사전화걸어도 받지도 않고 머하자는거임?
4,검수도 없이 보내구 불량 배송비 5000원 청구하네요 완전별로 별하나도 아까워요


## Model

In [None]:
## Transformer 설치
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
## Transforemr Import
from transformers import AutoTokenizer, AutoModel, AdamW, AutoConfig

### MeaningPooler for PLM Bert Class

In [None]:
## MeanPooling for KoElectra PLM

class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min = 1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

In [None]:
## Model

class Model(nn.Module):
    
    def __init__(self, model_name):
        super(Model, self).__init__()
        self.model = AutoModel.from_pretrained(config['model'])
        self.config = AutoConfig.from_pretrained(config['model'])
        self.drop = nn.Dropout(p=0.25)
        self.pooler = MeanPooling()
        self.fc = nn.Linear(768, config['output_dim'])
        self.logsoftmax = nn.LogSoftmax(dim = -1)
        
    def forward(self, ids, mask):        
        out = self.model(input_ids=ids,
                         attention_mask=mask,
                         output_hidden_states=False)
        out = self.pooler(out.last_hidden_state, mask)
        out = self.drop(out)
        outputs = self.fc(out)
        outputs = self.logsoftmax(outputs)

        return outputs

In [None]:
# Model -> Cuda:0 if GPU is available
model = Model(config['model'])
model = model.to(config['device'])

Downloading config.json:   0%|          | 0.00/504 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/475M [00:00<?, ?B/s]

Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
model

Model(
  (model): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(50135, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [None]:
# Check if GPU is available One More
torch.cuda.is_available()

True

## Set Seed

 > SEED 값 고정.

In [None]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(config['seed'])

## AutoTokenizer

In [None]:
## PLM Tokenizer
config['tokenizer'] = AutoTokenizer.from_pretrained(config['model'])

Downloading tokenizer_config.json:   0%|          | 0.00/288 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/387k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
## Sample 확인
print(config['tokenizer'].tokenize(train['reviews'][0]))
print(config['tokenizer'](train['reviews'][0]))

['조아', '##요', '처음', '##구', '##입', '싸게', '##햇', '##어요']
{'input_ids': [2, 20338, 4071, 8804, 4230, 4068, 13930, 4436, 8184, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


## Dataset

In [None]:
class Dataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        self.df = df
        self.max_len = max_length
        self.tokenizer = tokenizer
        self.reviews = df['reviews']
        self.target = df['target']
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        reviews = self.reviews[index]
        inputs = self.tokenizer.encode_plus(
            reviews,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length'
            )
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'target': torch.tensor(self.target[index], dtype=torch.long)
        }

## StratifiedKFold - Classification에 유익한 Cross Validation 

In [None]:
from sklearn.model_selection import StratifiedKFold, KFold

## n_folds == 8
skf = StratifiedKFold(n_splits=config['n_folds'], shuffle=True, random_state=config['seed'])

# enumerate로 kfold 넘버(Validation 기준)를 fold에 받습니다.
# kfold 라는 컬럼에다 넣어줍니다.
for fold, ( _, val_) in enumerate(skf.split(X=train, y=train.target)):
    train.loc[val_ , "kfold"] = int(fold)

# 혹시 모르니 int로 타입 변환    
train["kfold"] = train["kfold"].astype(int)
train.head()

Unnamed: 0,reviews,target,kfold
0,조아요 처음구입 싸게햇어요,1,0
1,생각보다 잘 안돼요 매지 바른지 하루밖에 안됐는데ㅠㅠ 25천원가량 주고 사기 너무 ...,0,1
2,디자인은괜찮은데 상품이 금이가서 교환했는데 두번째받은상품도 까져있고 안쪽에 금이가져...,1,0
3,기전에 이 제품말고 이마트 트레이더스에서만 팔던 프리미엄 제품을 사용했었습니다. 샘...,1,6
4,튼튼하고 손목을 잘 받쳐주네요~,3,4


In [None]:
# 다시 한 번 kfolds, Epoch 수 확인 
config['n_folds'], config['n_epochs']

(8, 3)

## Dataset -> DataLoader

In [None]:
def prepare_loader(fold):

    ## 여기서 데이터를 n_folds 수 만큼 Split 해줍니다. 
    ## train 데이터와 Validation 데이터로!
    train_df = train[train.kfold != fold].reset_index(drop=True)
    valid_df = train[train.kfold == fold].reset_index(drop=True)

    ## train, valid -> Dataset
    train_ds = Dataset(train_df, 
                       tokenizer = config['tokenizer'],
                       max_length = config['max_length'])

    valid_ds = Dataset(valid_df, 
                       tokenizer = config['tokenizer'],
                       max_length = config['max_length'])
    
    ## Dataset -> DataLoader
    train_loader = DataLoader(train_ds,
                              batch_size = config['train_batch_size'],
                              num_workers = 2,
                              shuffle = True, 
                              pin_memory = True, 
                              drop_last= False)

    valid_loader = DataLoader(valid_ds,
                              batch_size = config['valid_batch_size'],
                              num_workers = 2,
                              shuffle = False, 
                              pin_memory = True, 
                              drop_last= False)
    
    return train_loader, valid_loader

## Loss Function -> NLLLoss
 - Model에서 마지막 레이어로 self.logsoftmax = nn.LogSoftmax(dim = -1),

   LogSoftmax(dim=-1)는 Loss Function으로 NLLLoss()를 같이 쓰면 성능이 좋습니다.

 - LogSoftmax도 GPU(Cuda:0)로 보낼 수 있습니다.  

In [None]:
loss_fn = nn.NLLLoss().to(config['device'])

## Optimizer & Scheduler

In [None]:
# optimizer
optimizer = torch.optim.AdamW(model.parameters(), 
                              lr=config['learning_rate'], #5e-5
                              weight_decay=config['weight_decay']
                              )

In [None]:
import torch.optim as optim

class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, warmup, max_iters):
        self.warmup = warmup
        self.max_num_iters = max_iters
        super().__init__(optimizer)

    def get_lr(self):
        lr_factor = self.get_lr_factor(epoch=self.last_epoch)
        return [base_lr * lr_factor for base_lr in self.base_lrs]

    def get_lr_factor(self, epoch):
        lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_num_iters))
        if epoch <= self.warmup:
            lr_factor *= epoch * 1.0 / self.warmup
        return lr_factor
        

## Accuracy Metric Func

In [None]:
# Pytorch - Classification에서 Accuracy 보는 함수코드
# 아래 train_one_epoch 함수와 valid_one_epoch 함수에 적용할 예정입니다. 

def calc_accuracy(X, Y):

    # torch.max를 하면, dim 별로 max 값과 argmax값이 나옵니다. 여기서는 max_indices가 argmax로 나온 index 값입니다. 
    max_vals, max_indices = torch.max(X, 1) 
    
    accuracy = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0] 
    # index 값이 Encoded Target 값(0, 1, 2 , 3)이기에 Target값과 일치할 때, true가 됩니다. 
    # 이를 sum으로 누적시키고 전체 길이로 나눠줍니다. (얼마나 맞았는지 확률이 나오게 됩니다.) 
    
    return accuracy * 100
    ## return 할 때 100 곱해서 퍼센트로

## Train One Epoch Function

In [None]:
def train_one_epoch(model, 
                    dataloader, 
                    optimizer,
                    scheduler,
                    epoch,
                    device = config['device']):
    y_true = []
    preds = []

    train_loss = 0
    dataset_size = 0

    bar = tqdm(enumerate(dataloader), total = len(dataloader))

    model.train()
    for step, data in bar:
        ids = data['ids'].to(device, dtype = torch.long)
        masks = data['mask'].to(device, dtype = torch.long)
        targets = data['target'].to(device, dtype = torch.long)

        batch_size = ids.size(0)

        y_preds = model(ids, masks)
        loss = loss_fn(y_preds, targets)

        optimizer.zero_grad()
        loss.backward()
        
        # Gradient-Clipping 
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)

        optimizer.step()

        if scheduler is not None:
            scheduler.step()

        # 실시간 Loss 계산
        dataset_size += batch_size
        train_loss += float(loss.item() * batch_size) 
        # 배치 사이즈별로 나온 Loss 값을 batch_size를 곱한 Loss값을 누적
        train_loss /= dataset_size 
        # 실시간으로 현재까지 누적된 배치사이즈 길이로 한 번에 나누어서 실시간 Train Loss 조회

        preds.append(y_preds)
        y_true.append(targets)

        bar.set_postfix(Epoch = epoch, 
                        Train_loss = train_loss,
                        LR = optimizer.param_groups[0]['lr']
                        )
        
    preds = torch.cat(preds, dim = 0)
    y_true = torch.cat(y_true, dim = 0)

    # Accuracy
    accuracy = calc_accuracy(preds, y_true)
    print()
    print("Train's Accuracy: %.2f percent" % accuracy)
    print()
    gc.collect()

    return train_loss

## Valid One Epoch Function

In [None]:
@torch.no_grad()
def valid_one_epoch(model, 
                    dataloader, 
                    epoch, 
                    device = config['device']):
    
    y_true = []
    preds = []
    
    valid_loss = 0
    dataset_size = 0
    
    bar = tqdm(enumerate(dataloader), total = len(dataloader))

    # 위 Annotation이 있지만. 
    model.eval()
    with torch.no_grad():
        for step, data in bar:
            ids = data['ids'].to(device, dtype = torch.long)
            masks = data['mask'].to(device, dtype = torch.long)
            targets = data['target'].to(device, dtype = torch.long)

            batch_size = ids.size(0)

            y_preds = model(ids, masks)
            loss = loss_fn(y_preds, targets)

            dataset_size += batch_size
            valid_loss += float(loss.item() * batch_size)
            valid_loss /= dataset_size

            preds.append(y_preds)
            y_true.append(targets)

            bar.set_postfix(Epoch = epoch, 
                            Valid_loss = valid_loss,
                            LR = optimizer.param_groups[0]['lr']
                            )
    
    preds = torch.cat(preds, dim = 0)
    y_true = torch.cat(y_true, dim = 0)

    accuracy = calc_accuracy(preds, y_true)
    print()
    print("Valid's Accuracy: : %.2f precent" % accuracy)
    print()
    gc.collect()

    return valid_loss

## Run Training Function

In [None]:
base_path = '/content/drive/MyDrive/Colab Notebooks/쇼핑몰 리뷰 평점 분류 경진대회/bs1_t18_koElectra_scheduler_Customized_base01_t10/'

In [None]:
def run_training(model, optimizer, scheduler, device, n_epochs, fold):

    if torch.cuda.is_available():
        print("INFO: GPU - {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())

    lowest_epoch = np.inf
    lowest_loss = np.inf
    train_history, valid_history = [],  []

    for epoch in range(1, n_epochs +1):
        gc.collect()

        train_epoch_loss = train_one_epoch(model= model,
                                           dataloader = train_loader,
                                           optimizer = optimizer,
                                           scheduler = scheduler,
                                           device = config['device'],
                                           epoch = epoch
                                           )
        
        valid_epoch_loss = valid_one_epoch(model,
                                           dataloader = valid_loader,
                                           device = config['device'],
                                           epoch = epoch)
        
        train_history += [train_epoch_loss]
        valid_history += [valid_epoch_loss]

        if valid_epoch_loss <= lowest_loss:
            print(f"{b_}Validation Loss Improved({lowest_loss}) --> ({valid_epoch_loss})")
            lowest_loss = valid_epoch_loss
            lowest_epoch = epoch
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = base_path + f"model/Loss-Fold-{fold}.bin"
            torch.save(model.state_dict(), PATH)
            print(f"Model Saved{sr_}")
        
        print()

    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Loss: %.4e at %d th Epoch of %dth Fold" % (lowest_loss, lowest_epoch, fold))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, train_history, valid_history

## Train Start

In [None]:
config['n_folds'], config['n_epochs']

(8, 3)

In [None]:
for fold in range(0, config['n_folds']):
    ## Fold 별, 3 Epoch 학습 
    ## 총 8 Fold
    print(f"{y_}==== Fold: {fold} ====={sr_}")


    # DataLoaders
    train_loader, valid_loader = prepare_loader(fold = fold)

    # 모델 정의
    model = Model(config['model'])
    model = model.to(config['device'])

    # Optimizer 정의
    optimizer = AdamW(model.parameters(),
                      lr = config['learning_rate'],
                      weight_decay = config['weight_decay'])
    
    # scheduler 정의
    scheduler = CosineWarmupScheduler(optimizer=optimizer, warmup=100, max_iters=2000)

    # 학습 시작
    model, train_histories, valid_histories = run_training(model, 
                                                       optimizer,
                                                       scheduler,
                                                       device = config['device'],
                                                       n_epochs = config['n_epochs'],
                                                       fold = fold
                                                       )
    
    ## 메모리를 아끼기 위해 일단 삭제
    del model, train_histories, train_loader, valid_loader, valid_histories

    _ = gc.collect()
    
    print()

[33m==== Fold: 0 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:27<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.00129]



Train's Accuracy: 63.91 percent



100%|██████████| 25/25 [00:11<00:00,  2.26it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0136]



Valid's Accuracy: : 69.22 precent

[34mValidation Loss Improved(inf) --> (0.01362597945165156)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00164]



Train's Accuracy: 71.90 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.015]



Valid's Accuracy: : 70.43 precent




100%|██████████| 342/342 [04:33<00:00,  1.25it/s, Epoch=3, LR=2.4e-5, Train_loss=0.0013]



Train's Accuracy: 75.93 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0144]



Valid's Accuracy: : 69.38 precent


Training complete in 0h 14m 4s
Best Loss: 1.3626e-02 at 1 th Epoch of 0th Fold

[33m==== Fold: 1 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:27<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.00156]



Train's Accuracy: 63.35 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.013]



Valid's Accuracy: : 68.96 precent

[34mValidation Loss Improved(inf) --> (0.013001715480527446)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00224]



Train's Accuracy: 72.05 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.0132]



Valid's Accuracy: : 69.41 precent




100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00111]



Train's Accuracy: 75.94 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0148]



Valid's Accuracy: : 68.86 precent


Training complete in 0h 13m 58s
Best Loss: 1.3002e-02 at 1 th Epoch of 1th Fold

[33m==== Fold: 2 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.00189]



Train's Accuracy: 63.52 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0174]



Valid's Accuracy: : 69.44 precent

[34mValidation Loss Improved(inf) --> (0.017406230394374833)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00194]



Train's Accuracy: 71.76 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.0152]



Valid's Accuracy: : 70.02 precent

[34mValidation Loss Improved(0.017406230394374833) --> (0.015161126282295862)
Model Saved[0m



100%|██████████| 342/342 [04:27<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.0012]



Train's Accuracy: 75.64 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0156]



Valid's Accuracy: : 69.82 precent


Training complete in 0h 13m 60s
Best Loss: 1.5161e-02 at 2 th Epoch of 2th Fold

[33m==== Fold: 3 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.0016]



Train's Accuracy: 63.65 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0128]



Valid's Accuracy: : 69.86 precent

[34mValidation Loss Improved(inf) --> (0.012845703662444724)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00162]



Train's Accuracy: 71.99 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.0139]



Valid's Accuracy: : 69.60 precent




100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00133]



Train's Accuracy: 75.91 percent



100%|██████████| 25/25 [00:10<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0155]



Valid's Accuracy: : 68.99 precent


Training complete in 0h 13m 57s
Best Loss: 1.2846e-02 at 1 th Epoch of 3th Fold

[33m==== Fold: 4 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.0018]



Train's Accuracy: 64.32 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0124]



Valid's Accuracy: : 69.98 precent

[34mValidation Loss Improved(inf) --> (0.012393325605566299)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00114]



Train's Accuracy: 71.81 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.0121]



Valid's Accuracy: : 69.89 precent

[34mValidation Loss Improved(0.012393325605566299) --> (0.01211300861113645)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00154]



Train's Accuracy: 75.87 percent



100%|██████████| 25/25 [00:10<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0133]



Valid's Accuracy: : 69.70 precent


Training complete in 0h 13m 59s
Best Loss: 1.2113e-02 at 2 th Epoch of 4th Fold

[33m==== Fold: 5 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.00156]



Train's Accuracy: 63.10 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0143]



Valid's Accuracy: : 70.40 precent

[34mValidation Loss Improved(inf) --> (0.01433878566370322)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00129]



Train's Accuracy: 71.52 percent



100%|██████████| 25/25 [00:11<00:00,  2.26it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.014]



Valid's Accuracy: : 70.40 precent

[34mValidation Loss Improved(0.01433878566370322) --> (0.014047877709187728)
Model Saved[0m



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00123]



Train's Accuracy: 75.67 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.015]



Valid's Accuracy: : 69.73 precent


Training complete in 0h 13m 59s
Best Loss: 1.4048e-02 at 2 th Epoch of 5th Fold

[33m==== Fold: 6 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=1, LR=4.65e-5, Train_loss=0.0018]



Train's Accuracy: 63.36 percent



100%|██████████| 25/25 [00:11<00:00,  2.25it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.0105]



Valid's Accuracy: : 70.21 precent

[34mValidation Loss Improved(inf) --> (0.010504859528835304)
Model Saved[0m



100%|██████████| 342/342 [04:27<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00179]



Train's Accuracy: 71.83 percent



100%|██████████| 25/25 [00:11<00:00,  2.27it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.0108]



Valid's Accuracy: : 67.84 precent




100%|██████████| 342/342 [04:26<00:00,  1.28it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00117]



Train's Accuracy: 76.17 percent



100%|██████████| 25/25 [00:11<00:00,  2.24it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.0107]



Valid's Accuracy: : 69.70 precent


Training complete in 0h 13m 58s
Best Loss: 1.0505e-02 at 1 th Epoch of 6th Fold

[33m==== Fold: 7 =====[0m


Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO: GPU - Tesla P100-PCIE-16GB



100%|██████████| 342/342 [04:28<00:00,  1.27it/s, Epoch=1, LR=4.65e-5, Train_loss=0.00167]



Train's Accuracy: 63.26 percent



100%|██████████| 25/25 [00:11<00:00,  2.20it/s, Epoch=1, LR=4.65e-5, Valid_loss=0.00979]



Valid's Accuracy: : 69.89 precent

[34mValidation Loss Improved(inf) --> (0.009788027383526007)
Model Saved[0m



100%|██████████| 342/342 [04:28<00:00,  1.28it/s, Epoch=2, LR=3.69e-5, Train_loss=0.00145]



Train's Accuracy: 71.89 percent



100%|██████████| 25/25 [00:11<00:00,  2.26it/s, Epoch=2, LR=3.69e-5, Valid_loss=0.00919]



Valid's Accuracy: : 70.37 precent

[34mValidation Loss Improved(0.009788027383526007) --> (0.00918922128646637)
Model Saved[0m



100%|██████████| 342/342 [04:29<00:00,  1.27it/s, Epoch=3, LR=2.4e-5, Train_loss=0.00147]



Train's Accuracy: 76.22 percent



100%|██████████| 25/25 [00:11<00:00,  2.26it/s, Epoch=3, LR=2.4e-5, Valid_loss=0.00878]



Valid's Accuracy: : 69.95 precent

[34mValidation Loss Improved(0.00918922128646637) --> (0.008780377173942921)
Model Saved[0m

Training complete in 0h 14m 8s
Best Loss: 8.7804e-03 at 3 th Epoch of 7th Fold



## Inference

### Dataset for Inference

In [None]:
class DatasetInfer(Dataset):
    
    def __init__(self, df, tokenizer, max_length):
        self.df = df
        self.max_len = max_length
        self.tokenizer = tokenizer
        self.reviews = df['reviews']
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        reviews = self.reviews[index]
        inputs = self.tokenizer.encode_plus(
            reviews,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length'
            )
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
        }

### Dataset -> DataLoader

In [None]:
test_ds = DatasetInfer(test, 
                       tokenizer = config['tokenizer'], 
                       max_length = config['max_length'])

test_loader = DataLoader(test_ds,
                         batch_size = config['valid_batch_size'],
                         num_workers = 2,
                         shuffle = False, 
                         pin_memory = True, 
                         drop_last= False)

### Test Function

In [None]:
@torch.no_grad()
def test_func(model, dataloader, device = config['device']):
    preds= []

    model.eval()
    with torch.no_grad():
        bar = tqdm(enumerate(dataloader), total = len(dataloader))
        for step, data in bar:
            ids = data['ids'].to(device, dtype = torch.long)
            masks = data['mask'].to(device, dtype = torch.long)

            y_preds = model(ids, masks)
            preds.append(y_preds.detach().cpu().numpy())

    predictions = np.concatenate(preds, axis= 0)
    gc.collect()

    return predictions

### Inference 

In [None]:
base_path = '/content/drive/MyDrive/Colab Notebooks/쇼핑몰 리뷰 평점 분류 경진대회/bs1_t18_koElectra_scheduler_Customized_base01_t10/'

In [None]:
## We need Path of Models

model_paths = [
    base_path + "model/Loss-Fold-0.bin",
    base_path + "model/Loss-Fold-1.bin",
    base_path + "model/Loss-Fold-2.bin",
    base_path + "model/Loss-Fold-3.bin",
    base_path + "model/Loss-Fold-4.bin",
    base_path + "model/Loss-Fold-5.bin",
    base_path + "model/Loss-Fold-6.bin", 
    base_path + "model/Loss-Fold-7.bin"
    ]

#### Inference Function

In [None]:
def inference(model_paths, dataloader, device=config['device']):
    # Fold 별로 저장된 Model 8개를 하나씩 불러와서 test 데이터에 대해서 inference를 합니다.
    final_preds = []
    for i, path in enumerate(model_paths):
        model = Model(config['model'])
        model.to(config['device'])
        model.load_state_dict(torch.load(path))
        
        print(f"Getting predictions for model {i+1}")
        preds = test_func(model, dataloader, device)
        final_preds.append(preds)
    
    # 평균을 내줍니다.
    final_preds = np.array(final_preds)
    final_preds = np.mean(final_preds, axis=0)
    return final_preds

### Inference Argmax

In [None]:
preds = inference(model_paths, test_loader, config['device'])

Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 2


100%|██████████| 196/196 [01:26<00:00,  2.27it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 3


100%|██████████| 196/196 [01:26<00:00,  2.27it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 4


100%|██████████| 196/196 [01:26<00:00,  2.27it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 5


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 6


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 7


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]
Some weights of the model checkpoint at beomi/KcELECTRA-base were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 8


100%|██████████| 196/196 [01:26<00:00,  2.26it/s]


In [None]:
# Inference 결과 확인
print(preds.shape) 
preds

(25000, 4)


array([[-2.622688  , -0.20551223, -2.7978616 , -3.5172527 ],
       [-0.73255706, -0.7320617 , -5.548335  , -5.702675  ],
       [-5.247631  , -4.271263  , -1.9777515 , -0.1885916 ],
       ...,
       [-0.9005831 , -0.58119255, -5.6412463 , -5.997843  ],
       [-4.7661514 , -3.862812  , -2.39254   , -0.14957453],
       [-0.6861673 , -0.7859127 , -5.5554876 , -5.6161795 ]],
      dtype=float32)

In [None]:
# argmax로 target class 확인
new_preds = np.argmax(preds, axis = 1) 
new_preds.shape

(25000,)

In [None]:
# 먼저 Target Class를 Submission CSV 파일에 넣습니다. 
ss['target'] = new_preds
ss.head()

Unnamed: 0,id,target
0,0,1
1,1,1
2,2,3
3,3,0
4,4,0


In [None]:
# Value Counts()로 Class 별 수치 확인
ss.target.value_counts()

3    12166
1     9656
0     2832
2      346
Name: target, dtype: int64

### Back to Original Value

In [None]:
## Encoded Target을 원래대로 돌립니다. 
ss['target'] = encoder.inverse_transform(ss.target)

In [None]:
## 확인
print(ss.shape)
ss.head()

(25000, 2)


Unnamed: 0,id,target
0,0,2
1,1,2
2,2,5
3,3,1
4,4,1


In [None]:
# 다시 한 번 확인
ss.target.value_counts() 

5    12166
2     9656
1     2832
4      346
Name: target, dtype: int64

## Submission CSV file Save

In [None]:
# Submission CSV 파일 경로 설정
submission_path = base_path + "submission_csv/"

In [None]:
# Submission CSV 파일 저장
ss.to_csv(submission_path + "bs1_t18_koElectra_scheduler_Customized_base01_t10_submission.csv", index=False) 