# 0 설정, 설치 임포트

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/015GithubRepos/Dacon_sentence_classification')

In [3]:
pip install transformers

Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 5.0 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 3.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 60.0 MB/s 
Collecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 51.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 53.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Foun

In [4]:
import pandas as pd 
import numpy as np 
import re
from tqdm import tqdm
import time

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

from transformers import AutoTokenizer, ElectraForSequenceClassification, AdamW, AutoModel, AutoModelForSequenceClassification
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup

from sklearn.model_selection import train_test_split

import warnings 
warnings.filterwarnings("ignore")

## 경로지정

In [5]:
# local = 'C:/Users/posick/Desktop/Dacon/open/'
# local2 = 'C:/Users/201/Desktop/Dacon/'
suv = 'data/'
# colab = '/content/drive/MyDrive/Dacon/'

# 1 하이퍼 파라미터 설정

### epoch

In [6]:
# epoch 5만 해도될듯
num_epochs = 10

### batch_size

In [7]:
batch_size = 32

### learning rate

In [8]:
lr = 0.00001

# 2 Data 처리

## 텍스트 전처리

In [9]:
# dev 데이터 더함

def load_data(path):
    train = pd.read_csv(path+'train_data.csv')
    test = pd.read_csv(path+'test_data.csv')
    train_dev = pd.read_csv(path+'train_dev.csv')
    sample_submission = pd.read_csv(path+'sample_submission.csv')

    train = pd.concat([train,train_dev], ignore_index=True)

    label_dict = {"entailment" : 0, "contradiction" : 1, "neutral" : 2}

    train['label'] = train['label'].map(label_dict)
    train['premise']=train['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    train['hypothesis']=train['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['premise']=test['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['hypothesis']=test['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))

    return train,test,sample_submission 

def text_clean(df):
    df["premise_"] = "[CLS]" + df["premise"] + "[SEP]"
    df["hypothesis_"] = df["hypothesis"] + "[SEP]"
    df["text_sum"] = df.premise_ + " " + df.hypothesis_
    df = df[['text_sum','label']]

    return df 

train, test, sample_submission = load_data(suv)
clean_train, clean_test = text_clean(train), text_clean(test)


## CustomDataset 클래스 선언

In [10]:
#%% data loader 

class CustomDataset(Dataset):
  
  def __init__(self, dataset, option, modelname):
    self.dataset = dataset 
    self.option = option
    self.tokenizer = AutoTokenizer.from_pretrained(modelname)
  
  def __len__(self):
    return len(self.dataset)
  
  def __getitem__(self, idx):
    row = self.dataset.iloc[idx, 0:2].values  # numpy array
    text = row[0]
    # y = row[1]

    inputs = self.tokenizer(
        text, 
        return_tensors='pt',
        truncation=True,
        max_length=70,
        pad_to_max_length=True,
        add_special_tokens=False
        )
    
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    
    if self.option =='train':
        y = row[1]
        return input_ids, attention_mask, y

    return input_ids, attention_mask


## CrossValidation 인덱스 생성

In [11]:
#%% Cross validation 

n_splits = 5

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = n_splits, shuffle=True, random_state=42)

folds=[]
for trn_idx, val_idx in skf.split(clean_train['text_sum'], clean_train['label']):
    folds.append((trn_idx,val_idx))

# 학습

## device 설정

In [12]:
device = torch.device("cuda")
device

device(type='cuda')

## 학습

### modelname

In [13]:
modelname = 'klue/roberta-large'
# modelname = 'monologg/koelectra-base-v3-discriminator'
modelname

'klue/roberta-large'

In [28]:
best_models = []
model_num = 0
for fold in range(5):
    start_time = time.time()
    print(f'=============================={fold+1}fold start==============================')

    # CV용 model 새로 선언
    model = AutoModelForSequenceClassification.from_pretrained(modelname, num_labels=3)
    model = nn.DataParallel(model).to(device)
    
    optimizer = AdamW(model.parameters(), lr=lr)
    
    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]

    train_data = clean_train.loc[train_idx]
    val_data = clean_train.loc[valid_idx]

    train_dataset = CustomDataset(train_data, 'train', modelname)
    valid_dataset = CustomDataset(val_data, 'train', modelname)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    
    warmup_ratio = 0.1
    total_steps = len(train_loader) * num_epochs
    warmup_step = int(total_steps * warmup_ratio)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=total_steps)
    valid_loss_min = 0.4
    valid_acc_max = 0.8
    
    
    for epoch in range(num_epochs):  # epoch = 10
        print(f'epoch : {epoch}-----------------------------------------------------------------')
        batches = 0
        total_loss = 0.0
        correct = 0
        total =0
        model.train()  # train 모드로 변경
        
        # 학습데이터로 학습 #############################################################
        print(f'train 학습..........')
        for input_ids_batch, attention_masks_batch, y_batch in train_loader:
            optimizer.zero_grad()  # 그래디언트 초기화
            y_batch = y_batch.to(device)  # y_batch를 gpu 올림
            y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]  # 순전파
            loss = F.cross_entropy(y_pred, y_batch)  # loss 계산
            loss.backward()  # 역전파
            optimizer.step()  # 가중치 업데이트
            total_loss += loss.item()
            _, predicted = torch.max(y_pred, 1)
            correct += (predicted == y_batch).sum()
            total += len(y_batch)
            batches += 1
            if batches % 100 == 0:
                acc = correct.float() / total
                print(f'iteration 누적 : {batches}, Train Loss: {total_loss:.4f}, Train Accuracy : {acc.item():.4f}')
        acc = correct.float() / total
        print(f'iteration 누적 : {batches}, Train Loss: {total_loss:.4f}, Train Accuracy : {acc.item():.4f}')
        # 학습데이터로 학습 #############################################################


        # 검증데이터로 검증 #############################################################
        val_loss = []
        val_acc = []
        print(f'validation 검증..........')
        for input_ids_batch, attention_masks_batch, y_batch in valid_loader:
            
            model.eval()
            with torch.no_grad():
                y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
                valid_loss = F.cross_entropy(y_pred,y_batch.to(device)).cpu().detach().numpy()

                preds = torch.argmax(y_pred,1)
                preds = preds.cpu().detach().numpy()
                y_batch = y_batch.cpu().detach().numpy()
                batch_acc = (preds==y_batch).mean()
                val_loss.append(valid_loss)
                val_acc.append(batch_acc)

        val_loss = np.mean(val_loss)
        val_acc = np.mean(val_acc)
        scheduler.step()
        print(f'Valid Loss: {val_loss:.4f}, Valid Accuracy : {val_acc:.4f}')
        print(f'Learning rate : {optimizer.param_groups[0]["lr"]:.6f}')
        # 검증데이터로 검증 #############################################################

        if valid_acc_max < val_acc:  # 이전보다 좋으면 best_models에 모델을 추가
            valid_acc_max = val_acc
            best_models.append(model)
            torch.save(model, f'koelectra-adddata{model_num}.pth')  # 모델 저장
            model_num += 1
            print(f'model \'koelectra-adddata{model_num}.pth\' save. the number of best_models: {len(best_models)}, model val acc : {val_acc:.6f}******************')
        print(f'--------------------------------------------------------------------------------')

    print(f'{fold+1}fold elapsed time : {time.time() - start_time}')
    



Downloading:   0%|          | 0.00/467 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/431M [00:00<?, ?B/s]

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Downloading:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/257k [00:00<?, ?B/s]

epoch : 0-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 109.9769, Train Accuracy : 0.3262
Batch : 200, Train Loss: 219.9861, Train Accuracy : 0.3253
Batch : 300, Train Loss: 329.8607, Train Accuracy : 0.3300
Batch : 400, Train Loss: 439.8445, Train Accuracy : 0.3293
Batch : 500, Train Loss: 549.7905, Train Accuracy : 0.3274
Batch : 600, Train Loss: 659.7315, Train Accuracy : 0.3276
Batch : 700, Train Loss: 769.7412, Train Accuracy : 0.3280
Batch 누적 : 700, Train Loss: 769.7412, Train Accuracy : 0.3280
validation 검증..........
Valid Loss: 1.0994, Valid Accuracy : 0.3179
Learning rate : 0.000010
--------------------------------------------------------------------------------
epoch : 1-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 104.8061, Train Accuracy : 0.4834
Batch : 200, Train Loss: 170.7515, Train Accuracy : 0.6255
Batch : 300, Train Loss: 223.2042, Train Accu

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

epoch : 0-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 109.8392, Train Accuracy : 0.3550
Batch : 200, Train Loss: 219.8459, Train Accuracy : 0.3487
Batch : 300, Train Loss: 329.6248, Train Accuracy : 0.3522
Batch : 400, Train Loss: 439.6365, Train Accuracy : 0.3496
Batch : 500, Train Loss: 549.5495, Train Accuracy : 0.3489
Batch : 600, Train Loss: 659.5355, Train Accuracy : 0.3465
Batch : 700, Train Loss: 769.5228, Train Accuracy : 0.3460
Batch 누적 : 700, Train Loss: 769.5228, Train Accuracy : 0.3460
validation 검증..........
Valid Loss: 1.0986, Valid Accuracy : 0.3395
Learning rate : 0.000010
--------------------------------------------------------------------------------
epoch : 1-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 103.7221, Train Accuracy : 0.4887
Batch : 200, Train Loss: 165.9060, Train Accuracy : 0.6383
Batch : 300, Train Loss: 219.8783, Train Accu

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

epoch : 0-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 109.7429, Train Accuracy : 0.3541
Batch : 200, Train Loss: 219.7632, Train Accuracy : 0.3469
Batch : 300, Train Loss: 329.7021, Train Accuracy : 0.3470
Batch : 400, Train Loss: 439.8077, Train Accuracy : 0.3439
Batch : 500, Train Loss: 549.9281, Train Accuracy : 0.3408
Batch : 600, Train Loss: 659.8600, Train Accuracy : 0.3407
Batch : 700, Train Loss: 769.7101, Train Accuracy : 0.3423
Batch 누적 : 700, Train Loss: 769.7101, Train Accuracy : 0.3423
validation 검증..........
Valid Loss: 1.0989, Valid Accuracy : 0.3431
Learning rate : 0.000010
--------------------------------------------------------------------------------
epoch : 1-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 105.5377, Train Accuracy : 0.4762
Batch : 200, Train Loss: 172.7548, Train Accuracy : 0.6170
Batch : 300, Train Loss: 229.6980, Train Accu

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

epoch : 0-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 110.2279, Train Accuracy : 0.3475
Batch : 200, Train Loss: 220.2652, Train Accuracy : 0.3428
Batch : 300, Train Loss: 330.5144, Train Accuracy : 0.3410
Batch : 400, Train Loss: 440.7746, Train Accuracy : 0.3366
Batch : 500, Train Loss: 550.9394, Train Accuracy : 0.3368
Batch : 600, Train Loss: 661.1805, Train Accuracy : 0.3351
Batch : 700, Train Loss: 771.3084, Train Accuracy : 0.3355
Batch 누적 : 700, Train Loss: 771.3084, Train Accuracy : 0.3355
validation 검증..........
Valid Loss: 1.1026, Valid Accuracy : 0.3326
Learning rate : 0.000010
--------------------------------------------------------------------------------
epoch : 1-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 104.5985, Train Accuracy : 0.4753
Batch : 200, Train Loss: 172.4253, Train Accuracy : 0.6109
Batch : 300, Train Loss: 223.9137, Train Accu

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

epoch : 0-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 110.4316, Train Accuracy : 0.3253
Batch : 200, Train Loss: 220.7977, Train Accuracy : 0.3311
Batch : 300, Train Loss: 331.0129, Train Accuracy : 0.3327
Batch : 400, Train Loss: 441.5261, Train Accuracy : 0.3348
Batch : 500, Train Loss: 551.9383, Train Accuracy : 0.3328
Batch : 600, Train Loss: 662.4597, Train Accuracy : 0.3314
Batch : 700, Train Loss: 772.8713, Train Accuracy : 0.3324
Batch 누적 : 700, Train Loss: 772.8713, Train Accuracy : 0.3324
validation 검증..........
Valid Loss: 1.1030, Valid Accuracy : 0.3342
Learning rate : 0.000010
--------------------------------------------------------------------------------
epoch : 1-----------------------------------------------------------------
train 학습..........
Batch : 100, Train Loss: 103.6276, Train Accuracy : 0.4822
Batch : 200, Train Loss: 168.2963, Train Accuracy : 0.6234
Batch : 300, Train Loss: 222.7940, Train Accu

In [None]:
test_dataset = CustomDataset(clean_test,'test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

preds = []
for idx, m in enumerate(best_models): 
    print(f'{idx+1}/{len(best_models)}번째 모델 예측 진행중')
    bestm = m
    bestm.eval()
    answer = []
    with torch.no_grad():
        for input_ids_batch, attention_masks_batch in tqdm(test_loader):
            y_pred = bestm(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
            answer.extend(y_pred.argmax(axis=1))
            
    preds.append(answer)

1/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.12it/s]


2/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.17it/s]


3/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.17it/s]


4/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.17it/s]


5/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.17it/s]


6/6번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.17it/s]


In [None]:
from collections import Counter

np_pred = np.array(preds).T

pred = []
for i in range(1666):
    cnt = Counter(np_pred[i])
    pred.append(cnt.most_common()[0][0])
    

In [None]:
label_dict1 = {0:"entailment" , 1: "contradiction" , 2:"neutral"}

sample_submission['label'] = [label_dict1[_] for _ in pred]

In [None]:
sample_submission

Unnamed: 0,index,label
0,0,contradiction
1,1,neutral
2,2,entailment
3,3,contradiction
4,4,contradiction
...,...,...
1661,1661,neutral
1662,1662,entailment
1663,1663,neutral
1664,1664,neutral


In [None]:
sample_submission.to_csv(suv + 'aaaaaaaaa.csv', index=False)

In [None]:
suv + 'kc_roberta-large_3fold.csv'

'data/kc_roberta-large_3fold.csv'

In [None]:
os.getcwd()

'/content/drive/MyDrive/015GithubRepos/Dacon_sentence_classification'

In [None]:
for _ in np_pred[:50]:
    print(f'{_}\t{Counter(_)}')

[1 1 1 1 1 1]	Counter({1: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[0 0 0 0 0 0]	Counter({0: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[1 1 1 2 2 2]	Counter({1: 3, 2: 3})
[2 2 2 0 0 0]	Counter({2: 3, 0: 3})
[0 0 0 0 0 0]	Counter({0: 6})
[2 2 2 1 1 1]	Counter({2: 3, 1: 3})
[1 1 1 1 1 1]	Counter({1: 6})
[0 0 0 0 0 0]	Counter({0: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[0 0 0 0 0 0]	Counter({0: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[0 0 0 2 2 2]	Counter({0: 3, 2: 3})
[0 0 0 0 0 0]	Counter({0: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[0 0 0 2 2 2]	Counter({0: 3, 2: 3})
[0 0 0 0 0 0]	Counter({0: 6})
[0 0 0 0 0 0]	Counter({0: 6})
[2 2 2 2 2 2]	Counter({2: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[1 1 1 1 1 1]	Counter({1: 6})
[1 1 1 1 1

In [None]:
for _ in range(11, 11+len(best_models)):
    torch.save(best_models[_-11], f'roberta-large{_}.pth')

In [None]:
torch.load()

# 불러와서 예측

In [None]:
import time

test_dataset = CustomDataset(clean_test,'test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)
preds = []
for _ in range(13):
    start = time.time()
    print(f'{_+1}/13 번째 모델 예측 진행중')
    m = torch.load(f'roberta-large-adddata{_}.pth')
    m.eval()
    answer = []
    with torch.no_grad():
        for input_ids_batch, attention_masks_batch in tqdm(test_loader):
            y_pred = m(
                input_ids_batch.to(device),
                attention_mask=attention_masks_batch.to(device)
                )[0].detach().cpu().numpy()
            answer.extend(y_pred.argmax(axis=1))
            
    preds.append(answer)
    print(f'elapsed time : {time.time() - start}')

1/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 20.160715579986572
2/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 21.847057104110718
3/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 24.572529792785645
4/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 19.216731071472168
5/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 20.553385972976685
6/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 19.54999041557312
7/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 19.084368228912354
8/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 19.55402183532715
9/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 17.870598793029785
10/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 17.861972093582153
11/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 17.829195976257324
12/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.16it/s]


elapsed time : 18.046534776687622
13/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]

elapsed time : 17.859816551208496





In [None]:
from collections import Counter

np_pred = np.array(preds).T

pred = []
for i in range(1666):
    cnt = Counter(np_pred[i])
    pred.append(cnt.most_common()[0][0])

In [None]:
label_dict1 = {0:"entailment" , 1: "contradiction" , 2:"neutral"}

sample_submission['label'] = [label_dict1[_] for _ in pred]

In [None]:
for _ in np_pred[:50]:
    print(f'{_}\t{Counter(_).most_common()[0][0]}\t{Counter(_)}')

In [None]:
sample_submission.to_csv(suv + 'roberta-large-adddata.csv', index=False)

# 불러와서 앙상블

In [57]:
def soft_max(x):
    
    max = np.max(x,axis=1,keepdims=True) #returns max of each row and keeps same dims
    e_x = np.exp(x - max) #subtracts each row with its max value
    sum = np.sum(e_x,axis=1,keepdims=True) #returns sum of each row and keeps same dims
    f_x = e_x / sum 
    return f_x

In [77]:
import time

print(f'불러오는 model name : {modelname}')
test_dataset = CustomDataset(clean_test,'test', modelname)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)
sum_probs = np.zeros((test_dataset.dataset.shape[0], 3), dtype = 'f')

for _ in range(13):
    # 모델 1개로 예측 ----------------------------------------------------------
    start = time.time()
    print(f'{_+1}/13 번째 모델 예측 진행중')
    m = torch.load(f'roberta-large-adddata{_}.pth')
    m.eval()
    probs = np.empty((0,3), float)

    with torch.no_grad():
        for input_ids_batch, attention_masks_batch in tqdm(test_loader):
            y_pred = m(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
            probs = np.vstack((probs, soft_max(np.array(y_pred))))  # 3가지 클래스에 대한 확률값. 행렬 shape : 사이즈 X 3
            
    sum_probs += probs  # 예측 끝나면 더해줌 ********************************
    print(f'elapsed time : {time.time() - start}')
    # 모델 1개로 예측 ----------------------------------------------------------

불러오는 model name : klue/roberta-large
1/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.13it/s]


elapsed time : 15.155702829360962
2/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 17.991841554641724
3/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 17.89419913291931
4/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.14it/s]


elapsed time : 17.921390533447266
5/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 17.901498079299927
6/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.14it/s]


elapsed time : 17.77363133430481
7/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.14it/s]


elapsed time : 17.686158418655396
8/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 17.878915309906006
9/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 19.047866344451904
10/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.15it/s]


elapsed time : 21.194138288497925
11/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.14it/s]


elapsed time : 21.39119243621826
12/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.13it/s]


elapsed time : 21.056869745254517
13/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:12<00:00,  2.14it/s]

elapsed time : 23.942049741744995





In [80]:
for _ in range(23):
    # 모델 1개로 예측 ----------------------------------------------------------
    start = time.time()
    print(f'{_+1}/23 번째 모델 예측 진행중')
    m = torch.load(f'koelectra-adddata{_}.pth')
    m.eval()
    probs = np.empty((0,3), float)

    with torch.no_grad():
        for input_ids_batch, attention_masks_batch in tqdm(test_loader):
            y_pred = m(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
            probs = np.vstack((probs, soft_max(np.array(y_pred))))  # 3가지 클래스에 대한 확률값 배치만큼 쌓기. 행렬 shape : 사이즈 X 3
            
    sum_probs += probs  # 모델 예측 끝나면 더해줌 ********************************
    print(f'elapsed time : {time.time() - start}')
    # 모델 1개로 예측 ----------------------------------------------------------

1/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.11it/s]


elapsed time : 7.340185642242432
2/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.14it/s]


elapsed time : 7.902773141860962
3/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  5.90it/s]


elapsed time : 8.583792448043823
4/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.11it/s]


elapsed time : 8.964864015579224
5/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.09it/s]


elapsed time : 7.811848163604736
6/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.12it/s]


elapsed time : 8.005470037460327
7/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.13it/s]


elapsed time : 8.252460956573486
8/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.10it/s]


elapsed time : 8.193335056304932
9/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.15it/s]


elapsed time : 7.675370454788208
10/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  5.99it/s]


elapsed time : 8.700007677078247
11/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  5.99it/s]


elapsed time : 7.560232400894165
12/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.04it/s]


elapsed time : 9.878109216690063
13/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.09it/s]


elapsed time : 8.3476722240448
14/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  5.96it/s]


elapsed time : 8.220571279525757
15/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.13it/s]


elapsed time : 8.693399667739868
16/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.16it/s]


elapsed time : 8.267611980438232
17/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.11it/s]


elapsed time : 8.438199520111084
18/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.11it/s]


elapsed time : 8.641277313232422
19/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.13it/s]


elapsed time : 8.357936382293701
20/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.12it/s]


elapsed time : 8.37936520576477
21/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.15it/s]


elapsed time : 8.022168397903442
22/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.13it/s]


elapsed time : 8.532229900360107
23/13 번째 모델 예측 진행중


100%|██████████| 27/27 [00:04<00:00,  6.00it/s]

elapsed time : 8.447734355926514





In [85]:
pred = sum_probs.argmax(axis=1)

In [86]:
label_dict1 = {0:"entailment" , 1: "contradiction" , 2:"neutral"}

sample_submission['label'] = [label_dict1[_] for _ in pred]

In [89]:
sample_submission.head(5)

Unnamed: 0,index,label
0,0,contradiction
1,1,entailment
2,2,entailment
3,3,contradiction
4,4,contradiction


In [90]:
sample_submission.to_csv(suv + 'robertaNkoelectra_ensem.csv', index=False)