### If this notebook is helpful, please upvote [the original version](https://www.kaggle.com/code/arvissu/roberta-base-inference-v2-0)! (score: 0.649)

# 1. Import & Def & Set & Load

In [None]:
import gc
import pickle

import numpy as np
import pandas as pd

from tqdm import tqdm

import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

from transformers import AutoModel, AutoTokenizer

import warnings
warnings.simplefilter('ignore')

In [None]:
def fetch_essay(essay_id, dir_name):
    essay_path = f"{COMP_DIR}{dir_name}/{essay_id}.txt"
    essay_text = open(essay_path, 'r').read()
    
    return essay_text


def essay_test(essay_id: str):    
    return fetch_essay(essay_id, 'test')


def essay_train(essay_id: str):    
    return fetch_essay(essay_id, 'train')


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
            
        with torch.no_grad():
            output = model(inputs)
        
        preds.append(F.softmax(output).to('cpu').numpy())

    return np.concatenate(preds)  


def show_gradient(df, n_row=None):
    if not n_row:
        n_row = 5

    return df.head(n_row) \
                .assign(all_mean=lambda x: x.mean(axis=1)) \
                    .style.background_gradient(cmap=cm, axis=1)

In [None]:
pd.set_option('display.precision', 4)
cm = sns.light_palette('green', as_cmap=True)
props_param = "color:white; font-weight:bold; background-color:green;"

N_ROW = 10

COMP_DIR = "../input/feedback-prize-effectiveness/"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
test_path = COMP_DIR + "test.csv"
submission_path = COMP_DIR + "sample_submission.csv"

test_origin = pd.read_csv(test_path)
submission_origin = pd.read_csv(submission_path)

In [None]:
test_origin.head()

# 2. Extract predictions

In [None]:
def prepare_input(cfg, text1, text2):
    inputs = cfg.tokenizer(text1, text2,
                           padding="max_length",
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           truncation=True)

    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.discourse = df['target_text'].values
        self.essay = df['essay_text'].values
        
    def __len__(self):
        return len(self.discourse)
    
    def __getitem__(self, item):
        discourse = self.discourse[item]
        essay = self.essay[item]
        
        inputs = prepare_input(self.cfg, discourse, essay)
        
        return inputs
        
class FeedBackModel(nn.Module):
    def __init__(self, model_path):
        super(FeedBackModel, self).__init__()
        self.model = AutoModel.from_pretrained(model_path)
        self.linear = nn.Linear(768, 3)

    def forward(self, inputs):
        last_hidden_states = self.model(**inputs)[0][:, 0, :]
        outputs = self.linear(last_hidden_states)
        
        return outputs

In [None]:
model_list = pickle.load(
    open("../input/feedback-roberta-ep1/roberta_modellist_ep2.pkl", "rb")
)

class CFG:
    path = "../input/roberta-base/"
    folds = 5
    batch = 16
    max_len = 512
    num_workers = 2

In [None]:
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path)

In [None]:
df = test_origin.copy()

txt_sep = " "
df['target_text'] = df['discourse_type'].str.lower().str.strip() + txt_sep \
                  + df['discourse_text'].str.lower().str.strip()

df['essay_text'] = df['essay_id'].apply(essay_test).str.lower().str.strip()
df.head()

In [None]:
test_dataset = TestDataset(CFG, df)
test_loader = DataLoader(test_dataset, batch_size=CFG.batch,
                         shuffle=False, num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

In [None]:
roberta_predicts = []
for i in range(CFG.folds):
    model = model_list[i]
    
    prediction = inference_fn(test_loader, model, DEVICE)
    roberta_predicts.append(prediction)
    
    del model, prediction
    torch.cuda.empty_cache()    
    gc.collect()
    
del model_list
gc.collect()

In [None]:
rob_ineffective = []
rob_effective = []
rob_adequate = []

for x in roberta_predicts:
    rob_ineffective.append(x[:, 0])
    rob_adequate.append(x[:, 1])
    rob_effective.append(x[:, 2])

In [None]:
rob_ineffective = pd.DataFrame(rob_ineffective).T

show_gradient(
    rob_ineffective,
    N_ROW)

In [None]:
rob_adequate = pd.DataFrame(rob_adequate).T

show_gradient(
    rob_adequate,
    N_ROW)

In [None]:
rob_effective = pd.DataFrame(rob_effective).T

show_gradient(
    rob_effective,
    N_ROW)

# 3. Create submission

In [None]:
submission = submission_origin.copy()

submission['Ineffective'] = rob_ineffective.mean(axis=1)
submission['Adequate'] = rob_adequate.mean(axis=1)
submission['Effective'] = rob_effective.mean(axis=1)

submission.head(N_ROW)

In [None]:
# 0	a261b6e14276	0.0187	0.5211	0.4602
# 1	5a88900e7dc1	0.0298	0.7725	0.1977
# 2	9790d835736b	0.0149	0.6092	0.3759
# 3	75ce6d68b67b	0.0135	0.5422	0.4443
# 4	93578d946723	0.0159	0.5700	0.4141
# 5	2e214524dbe3	0.0072	0.3674	0.6254
# 6	84812fc2ab9f	0.0114	0.3854	0.6032
# 7	c668ff840720	0.0088	0.4532	0.5380
# 8	739a6d00f44a	0.0076	0.3373	0.6551
# 9	bcfae2c9a244	0.0201	0.7737	0.2063

In [None]:
submission.to_csv('submission.csv',index=False)