<center><b>Method:Concatenate Last 4 Layers - Roberta Large</b><center>

Method: **Conv1D Pooling** - **LB:0.500** - Version-3 <br>
Method: **LSTM/GRU Pooling** - **LB:0.494** - Version-4 <br>
Method: **Concatenate Pooling Roberta Base** - **LB:0.478** -Version-5 <br>
Method: **Concatenate Pooling Roberta Large** - Version-6 <br>
Method: **Concatenate Pooling Roberta Large Inference** - Version-7 <br>

**Code Credits:** <br>
**Pretrained Models:** https://www.kaggle.com/maunish/clrp-roberta-base <br>
**Fine Tuning Code:** https://www.kaggle.com/maunish/clrp-pytorch-roberta-finetune <br>
**Techniques:** https://www.kaggle.com/rhtsingh/utilizing-transformer-representations-efficiently

Concatenate Pooling is the technique where we concatenate outputs from different layers into one. In the experiments performed by BERT Authors we saw that Concatenation of Last 4 Layers gave the best results.

**Inference: Roberta Large Concatenate Pooling**

In [None]:
import os
import gc
import sys
import math
import time
import tqdm
import random
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# from accelerate import Accelerator
from transformers import (AutoModel,AutoConfig,
                          AutoTokenizer,get_cosine_schedule_with_warmup)

from colorama import Fore, Back, Style
r_ = Fore.RED
b_ = Fore.BLUE
c_ = Fore.CYAN
g_ = Fore.GREEN
y_ = Fore.YELLOW
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [None]:
train_data = pd.read_csv('../input/commonlitreadabilityprize/train.csv')
test_data = pd.read_csv('../input/commonlitreadabilityprize/test.csv')
sample = pd.read_csv('../input/commonlitreadabilityprize/sample_submission.csv')

train_data['excerpt'] = train_data['excerpt'].apply(lambda x: x.replace('\n',''))

num_bins = int(np.floor(1 + np.log2(len(train_data))))
train_data.loc[:,'bins'] = pd.cut(train_data['target'],bins=num_bins,labels=False)

bins = train_data.bins.to_numpy()
target = train_data.target.to_numpy()

def rmse_score(y_true,y_pred):
    return np.sqrt(mean_squared_error(y_true,y_pred))


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
config = {
    'lr': 2e-5,
    'wd':0.01,
    'batch_size':12,
    'valid_step':10,
    'max_len':256,
    'epochs':3,
    'nfolds':5,
    'seed':42,
    'model_path':'../input/clrp-roberta-large-pretrain/clrp_roberta_large',
}

for i in range(config['nfolds']):
    os.makedirs(f'model{i}',exist_ok=True)

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONASSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed=config['seed'])

train_data['Fold'] = -1
kfold = StratifiedKFold(n_splits=config['nfolds'],shuffle=True,random_state=config['seed'])
for k , (train_idx,valid_idx) in enumerate(kfold.split(X=train_data,y=bins)):
    train_data.loc[valid_idx,'Fold'] = k

In [None]:
class Model(nn.Module):
    def __init__(self,path):
        super(Model,self).__init__()
        self.config = AutoConfig.from_pretrained(path)
        self.config.update({'output_hidden_states':True,"hidden_dropout_prob": 0.0})
        self.roberta = AutoModel.from_pretrained(path,config=self.config)  
        self.linear = nn.Linear(self.config.hidden_size*4, 1, 1)

    def forward(self,**xb):
        x = self.roberta(**xb)
        x = torch.stack(x[2])
        x = torch.cat((x[-1], x[-2], x[-3], x[-4]),-1)
        x = x[:, 0]
        x = self.linear(x)
        return x

**Inference**

In [None]:
class CLRPDataset(Dataset):
    def __init__(self,df,tokenizer):
        self.excerpt = df['excerpt'].to_numpy()
        self.tokenizer = tokenizer
   
    def __getitem__(self,idx):
        encode = self.tokenizer(self.excerpt[idx],return_tensors='pt',
                                max_length=config['max_len'],
                                padding='max_length',truncation=True)
        return encode
    
    def __len__(self):
        return len(self.excerpt)

In [None]:
def get_prediction(df,path,model_path,device='cuda'):        
    model = Model(model_path)
    model.load_state_dict(torch.load(path,map_location=device))
    model.to(device)
    model.eval()
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    test_ds = CLRPDataset(df,tokenizer)
    test_dl = DataLoader(test_ds,
                        batch_size = config["batch_size"],
                        shuffle=False,
                        num_workers = 4,
                        pin_memory=True)
    
    predictions = list()
    for i, (inputs) in tqdm(enumerate(test_dl)):
        inputs = {key:val.reshape(val.shape[0],-1).to(device) for key,val in inputs.items()}
        outputs = model(**inputs)
        outputs = outputs.cpu().detach().numpy().ravel().tolist()
        predictions.extend(outputs)
        
    torch.cuda.empty_cache()
    return np.array(predictions)

In [None]:
pred1 = get_prediction(test_data,'../input/clrp-distilroberta-baseuncased-finetune/model0/model0.bin','../input/clrp-roberta-large-pretrain/clrp_roberta_large')
pred2 = get_prediction(test_data,'../input/clrp-distilroberta-baseuncased-finetune/model1/model1.bin','../input/clrp-roberta-large-pretrain/clrp_roberta_large')
pred3 = get_prediction(test_data,'../input/clrp-distilroberta-baseuncased-finetune/model2/model2.bin','../input/clrp-roberta-large-pretrain/clrp_roberta_large')
pred4 = get_prediction(test_data,'../input/clrp-distilroberta-baseuncased-finetune/model3/model3.bin','../input/clrp-roberta-large-pretrain/clrp_roberta_large')
pred5 = get_prediction(test_data,'../input/clrp-distilroberta-baseuncased-finetune/model4/model4.bin','../input/clrp-roberta-large-pretrain/clrp_roberta_large')

In [None]:
predictions = (pred1 + pred2 + pred3 + pred4 + pred5)/5
sample['target'] = predictions
sample.to_csv('submission.csv',index=False)