# 월간 데이콘 발화자의 감정인식 AI 경진대회

- Baseline code를 기준으로 작성되었습니다.  

- emoberta-large 모델, StratifiedKFold를 적용하고, 문맥이나 발화 순서를 고려하지 않고 학습시키는 방향으로 수행했습니다.

In [2]:
!nvidia-smi

Sun Dec 11 01:29:57 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.72       Driver Version: 410.72       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:AF:00.0 Off |                    0 |
| N/A   27C    P0    26W / 250W |      0MiB / 32480MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

## Import

In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import random
import os

In [5]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

from transformers import AutoModel, AutoConfig, AutoTokenizer
from torch.optim import Adam

import matplotlib as mpl
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action='ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from transformers import AutoModel, AutoConfig, AutoTokenizer

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [8]:
CFG = {
    'EPOCHS': 5,
    'LEARNING_RATE': 1e-6,
    'BATCH_SIZE': 8,
    'SEED': 42,
    'PLM': "tae898/emoberta-large",
    'OPTIMIZER': "Adam",
    'split': '8-2',
    'NFOLD': 7
}

## Fixed RandomSeed

In [9]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load

In [10]:
path = "/MyFiles/DACON/Speaker_emotion/data/"
data = pd.read_csv(path + 'train.csv')
data.tail()

Unnamed: 0,ID,Utterance,Speaker,Dialogue_ID,Target
9984,TRAIN_9984,You or me?,Chandler,1038,neutral
9985,TRAIN_9985,"I got it. Uh, Joey, women don't have Adam's ap...",Ross,1038,neutral
9986,TRAIN_9986,"You guys are messing with me, right?",Joey,1038,surprise
9987,TRAIN_9987,Yeah.,All,1038,neutral
9988,TRAIN_9988,"That was a good one. For a second there, I was...",Joey,1038,joy


#### Speaker 제외

In [11]:
train_ds = data[["ID", "Utterance", "Dialogue_ID", "Target"]]
train_ds

Unnamed: 0,ID,Utterance,Dialogue_ID,Target
0,TRAIN_0000,also I was the point person on my company’s tr...,0,neutral
1,TRAIN_0001,You must’ve had your hands full.,0,neutral
2,TRAIN_0002,That I did. That I did.,0,neutral
3,TRAIN_0003,So let’s talk a little bit about your duties.,0,neutral
4,TRAIN_0004,My duties? All right.,0,surprise
...,...,...,...,...
9984,TRAIN_9984,You or me?,1038,neutral
9985,TRAIN_9985,"I got it. Uh, Joey, women don't have Adam's ap...",1038,neutral
9986,TRAIN_9986,"You guys are messing with me, right?",1038,surprise
9987,TRAIN_9987,Yeah.,1038,neutral


In [12]:
train_ds["Target"].value_counts()

neutral     4710
joy         1743
surprise    1205
anger       1109
sadness      683
disgust      271
fear         268
Name: Target, dtype: int64

## Label encoding

In [14]:
le = LabelEncoder()
le = le.fit(train_ds['Target'])
train_ds['Target']=le.transform(train_ds['Target'])

In [15]:
for i, label in enumerate(le.classes_):
    print(i, '->', label)    

0 -> anger
1 -> disgust
2 -> fear
3 -> joy
4 -> neutral
5 -> sadness
6 -> surprise


## Row Shuffling

In [16]:
train_ds = train_ds.sample(frac=1, random_state=42).reset_index(drop=True)  # shuffling하고 index reset
train_ds

Unnamed: 0,ID,Utterance,Dialogue_ID,Target
0,TRAIN_5839,"Can I just say, I really admire what you’re do...",619,4
1,TRAIN_3058,"Look, things like last night they don’t just h...",320,4
2,TRAIN_8746,Damn! I thought that was going to be romantic ...,919,6
3,TRAIN_2664,I’m wearing his briefs right now.,281,4
4,TRAIN_0035,"What, what, what?!",3,6
...,...,...,...,...
9984,TRAIN_5734,I was,610,6
9985,TRAIN_5191,Okay!,551,4
9986,TRAIN_5390,Don’t you talk to my husband like that you stu...,572,0
9987,TRAIN_0860,This sucks! I didn’t know I had to stay up all...,89,0


## StratifiedKFold

In [17]:
from sklearn.model_selection import StratifiedKFold, KFold

skf = StratifiedKFold(n_splits=CFG['NFOLD'], shuffle=True, random_state=CFG['SEED'])

for fold, ( _, val_) in enumerate(skf.split(X=train_ds, y=train_ds.Target)):
    train_ds.loc[val_ , "Kfold"] = int(fold)

train_ds["Kfold"] = train_ds["Kfold"].astype(int)
train_ds.head()

Unnamed: 0,ID,Utterance,Dialogue_ID,Target,Kfold
0,TRAIN_5839,"Can I just say, I really admire what you’re do...",619,4,1
1,TRAIN_3058,"Look, things like last night they don’t just h...",320,4,2
2,TRAIN_8746,Damn! I thought that was going to be romantic ...,919,6,2
3,TRAIN_2664,I’m wearing his briefs right now.,281,4,4
4,TRAIN_0035,"What, what, what?!",3,6,3


In [18]:
CFG['NFOLD'], CFG['EPOCHS']

(7, 5)

## Train/Validation split

In [19]:
train_df = train_ds[train_ds.Kfold != fold].reset_index(drop=True)
valid_df = train_ds[train_ds.Kfold == fold].reset_index(drop=True)

train_len=len(train_df)
val_len=len(valid_df)

print(train_len)
print(val_len)

8562
1427


## Tokenizer Define

In [20]:
tokenizers = AutoTokenizer.from_pretrained(CFG["PLM"])

## CustomDataset

In [21]:
class CustomDataset(Dataset):
  
    def __init__(self, data, mode = "train"):
        self.dataset = data
        self.tokenizer = tokenizers
        self.mode = mode
        
    def __len__(self):
        return len(self.dataset)
  
    def __getitem__(self, idx):
        text = self.dataset['Utterance'][idx]
        inputs = self.tokenizer(text, padding='max_length', max_length = 512, truncation=True, return_tensors="pt")
        input_ids = inputs['input_ids'][0]
        attention_mask = inputs['attention_mask'][0]
    
        if self.mode == "train":
            y = self.dataset['Target'][idx]
            return input_ids, attention_mask, y
        
        else:
            return input_ids, attention_mask

In [22]:
train = CustomDataset(train_df, mode = "train")
valid = CustomDataset(valid_df, mode = "train")

train_dataloader = torch.utils.data.DataLoader(train, batch_size= CFG['BATCH_SIZE'], shuffle=True)
val_dataloader = torch.utils.data.DataLoader(valid, batch_size= CFG['BATCH_SIZE'], shuffle=False)

## Model Define

In [23]:
class BaseModel(nn.Module):

    def __init__(self, dropout=0.5, num_classes=len(le.classes_)):

        super(BaseModel, self).__init__()

        self.bert = AutoModel.from_pretrained(CFG["PLM"])

        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(1024, num_classes)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,
                                     return_dict=False
                                    )
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

## Train

In [25]:
def train(model, optimizer, train_loader, test_loader, device, fold=CFG["NFOLD"]):

    model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)

    best_score = 0
    best_model = "None"
    for epoch_num in range(CFG["EPOCHS"]):

        model.train()
        train_loss = []
        for input_ids, attention_mask, train_label in tqdm(train_loader):

            optimizer.zero_grad()

            train_label = train_label.to(device)
            input_id = input_ids.to(device)
            mask = attention_mask.to(device)

            output = model(input_id, mask)     
    
            batch_loss = criterion(output, train_label.long()) 
            train_loss.append(batch_loss.item())
            
            batch_loss.backward()
            optimizer.step()

        val_loss, val_score = validation(model, criterion, test_loader, device)
        print(f'Epoch [{epoch_num}] of {fold}th Fold, Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if best_score < val_score:
            best_model = model
            best_score = val_score
            torch.save(model.state_dict(), os.path.join(RECORDER_DIR, f"best_model-Fold-{fold}.pt"))
        
    return best_model                         

In [26]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation(model, criterion, test_loader, device):
    model.eval()

    val_loss = []
    model_preds = []
    true_labels = []  
    with torch.no_grad():
        for input_ids, attention_mask, valid_label in tqdm(test_loader):
            
            valid_label = valid_label.to(device)
            input_id = input_ids.to(device)
            mask = attention_mask.to(device)

            output = model(input_id, mask)     
    
            batch_loss = criterion(output, valid_label.long()) 
            val_loss.append(batch_loss.item())      
            
            model_preds += output.argmax(1).detach().cpu().numpy().tolist()
            true_labels += valid_label.detach().cpu().numpy().tolist()
        val_f1 = competition_metric(true_labels, model_preds)
    return val_loss, val_f1    

## Run!!

In [27]:
from datetime import datetime, timezone, timedelta
# 시간 고유값 
PROJECT_DIR = './'
os.chdir(PROJECT_DIR)
kst = timezone(timedelta(hours=9))        
train_serial = datetime.now(tz=kst).strftime("%Y%m%d_%H%M%S")

# 기록 경로
RECORDER_DIR = os.path.join(PROJECT_DIR, 'results', train_serial)
# 현재 시간 기준 폴더 생성
os.makedirs(RECORDER_DIR, exist_ok=True)    

In [28]:
for fold in range(0, CFG['NFOLD']):
    print(f"======== Fold: {fold} =========")

    model = BaseModel()
    # model.eval()
    optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])

    infer_model = train(model, optimizer, train_dataloader, val_dataloader, device, fold)



Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1071/

Epoch [0] of 0th Fold, Train Loss : [1.05248] Val Loss : [0.78328] Val F1 Score : [0.51210]


100%|██████████| 1071/1071 [15:05<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [1] of 0th Fold, Train Loss : [0.79200] Val Loss : [0.76008] Val F1 Score : [0.55343]


100%|██████████| 1071/1071 [15:05<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [2] of 0th Fold, Train Loss : [0.72169] Val Loss : [0.75015] Val F1 Score : [0.64240]


100%|██████████| 1071/1071 [15:05<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [3] of 0th Fold, Train Loss : [0.66617] Val Loss : [0.74237] Val F1 Score : [0.65562]


100%|██████████| 1071/1071 [15:06<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.64it/s]


Epoch [4] of 0th Fold, Train Loss : [0.60494] Val Loss : [0.75199] Val F1 Score : [0.66331]


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1071/

Epoch [0] of 1th Fold, Train Loss : [1.03196] Val Loss : [0.78223] Val F1 Score : [0.53664]


100%|██████████| 1071/1071 [15:05<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [1] of 1th Fold, Train Loss : [0.78709] Val Loss : [0.73613] Val F1 Score : [0.64157]


 32%|███▏      | 345/1071 [04:51<10:14,  1.18it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1071/1071 [15:06<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [0] of 6th Fold, Train Loss : [1.01972] Val Loss : [0.76816] Val F1 Score : [0.50725]


100%|██████████| 1071/1071 [15:06<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.62it/s]


Epoch [1] of 6th Fold, Train Loss : [0.78568] Val Loss : [0.76635] Val F1 Score : [0.62561]


100%|██████████| 1071/1071 [15:06<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [2] of 6th Fold, Train Loss : [0.71192] Val Loss : [0.73057] Val F1 Score : [0.66525]


100%|██████████| 1071/1071 [15:07<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]


Epoch [3] of 6th Fold, Train Loss : [0.65489] Val Loss : [0.73549] Val F1 Score : [0.68189]


100%|██████████| 1071/1071 [15:06<00:00,  1.18it/s]
100%|██████████| 179/179 [00:49<00:00,  3.65it/s]

Epoch [4] of 6th Fold, Train Loss : [0.60812] Val Loss : [0.74850] Val F1 Score : [0.66108]





## Inference

In [None]:
base_path = f'/MyFiles/DACON/Speaker_emotion/results/{train_serial}/'
print(base_path)

model_paths = [
    base_path + "best_model-Fold-0.pt",
    base_path + "best_model-Fold-1.pt",
    base_path + "best_model-Fold-2.pt",
    base_path + "best_model-Fold-3.pt",
    base_path + "best_model-Fold-4.pt",
    base_path + "best_model-Fold-5.pt",
    base_path + "best_model-Fold-6.pt",
    ]

In [30]:
test = pd.read_csv(path + 'test.csv')

In [31]:
test = CustomDataset(test, mode = "test")
test_dataloader = torch.utils.data.DataLoader(test, batch_size= 10, #CFG['BATCH_SIZE'], 
                                              shuffle=False)

In [32]:
def inference(model_paths, test_loader, device):

    test_predicts = []

    with torch.no_grad():

        for i, path in enumerate(model_paths):  
            test_predict = []
            
            model = BaseModel().to(device)
            model.load_state_dict(torch.load(path))
            model.eval()

            print(f"Prediction for model {i+1}")
            for input_ids, attention_mask in tqdm(test_loader):
                input_id = input_ids.to(device)
                mask = attention_mask.to(device)
                y_pred = model(input_id, mask)
                test_predict.append(y_pred.detach().cpu().numpy())

            test_predict1 = np.concatenate(np.array(test_predict), axis = 0) # test_predict1: [total_bs, 7]
            print(test_predict1.shape)
            
            test_predicts.append(test_predict1) # test_predicts: [[total_bs, 7],  [total_bs, 7],  .... ]

    test_predicts_final = np.mean(test_predicts, axis=0)
    
    return test_predicts_final

In [34]:
preds = inference(model_paths, test_dataloader, device)

Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 1


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 2


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 3


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 4


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 5


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 6


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]


(2610, 7)


Some weights of the model checkpoint at tae898/emoberta-large were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Prediction for model 7


100%|██████████| 261/261 [01:25<00:00,  3.06it/s]

(2610, 7)





In [35]:
n_preds = np.argmax(preds, axis = 1)
N_preds = le.inverse_transform(n_preds) 

## Submit

In [38]:
submit = pd.read_csv(path + 'sample_submission.csv')
submit.head()

Unnamed: 0,ID,Target
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,0


In [40]:
submit['Target'] = N_preds
submit.head()

Unnamed: 0,ID,Target
0,TEST_0000,surprise
1,TEST_0001,neutral
2,TEST_0002,neutral
3,TEST_0003,neutral
4,TEST_0004,joy


In [44]:
submit.to_csv(f"/MyFiles/DACON/Speaker_emotion/results/submit.csv", index=False)