In [None]:
!pip install -U transformers --no-index --find-links=file:///kaggle/input/trans-47-whl/transformers==4.8.1/

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import RobertaModel
import torch.nn as nn
import os
import torch
from torch.utils.data import Dataset, DataLoader
import gc
gc.enable()
device = "cuda:0"

In [None]:
model_dir = "../input/roberta-large"
tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True, model_max_length=256)

In [None]:
def init_params(module_lst):
    for module in module_lst:
        for param in module.parameters():
            if param.dim() > 1:
                torch.nn.init.xavier_uniform_(param)
    return

class SpatialDropout(nn.Module):
    def __init__(self,p=0.5):
        super(SpatialDropout, self).__init__()
        self.dropout2D = nn.Dropout2d(p=p)
        
    def forward(self, x):
        x = x.unsqueeze(2)
        x = x.permute(0, 3, 2, 1)
        x = self.dropout2D(x)
        x = x.permute(0, 3, 2, 1)
        x = x.squeeze(2)
        return x

class Custom_bert(nn.Module):
    def __init__(self):
        super().__init__()

        config = AutoConfig.from_pretrained(model_dir)
        config.update({"output_hidden_states":True, 
                       "hidden_dropout_prob": 0.0,
                       "layer_norm_eps": 1e-7})                       
        
        self.roberta = AutoModel.from_pretrained(model_dir, config=config)  
        
        dim = self.roberta.pooler.dense.bias.shape[0]
        
        self.dropout = nn.Dropout(p=0.2)
        self.high_dropout = nn.Dropout(p=0.5)
        
        n_weights = 24
        weights_init = torch.zeros(n_weights).float()
        weights_init.data[:-1] = -3
        self.layer_weights = torch.nn.Parameter(weights_init)
            
        self.attention = nn.Sequential(
            nn.Linear(1024, 1024),            
            nn.Tanh(),
            nn.Linear(1024, 1),
            nn.Softmax(dim=1)
        ) 
        self.cls = nn.Sequential(
            nn.Linear(dim,1)
        )
        init_params([self.cls,self.attention])
        
    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids,
                                      attention_mask=attention_mask)

        cls_outputs = torch.stack(
            [self.dropout(layer) for layer in roberta_output[2][-24:]], dim=0
        )
        cls_output = (torch.softmax(self.layer_weights, dim=0).unsqueeze(1).unsqueeze(1).unsqueeze(1) * cls_outputs).sum(0)
    
        logits = torch.mean(
            torch.stack(
                [torch.sum(self.attention(self.high_dropout(cls_output)) * cls_output, dim=1) for _ in range(5)],
                dim=0,
            ),
            dim=0,
        )
        return self.cls(logits)

In [None]:
class CLRPDataset(Dataset):
    def __init__(self,df,tokenizer):
        self.excerpt = df['excerpt'].to_numpy()
        self.tokenizer = tokenizer
    
    def __getitem__(self,idx):
        encode = self.tokenizer(self.excerpt[idx],return_tensors='pt',
                                max_length=256,
                                padding='max_length',truncation=True)
        encoded = {'input_ids':encode['input_ids'][0],
                   'attention_mask':encode['attention_mask'][0]
                  }
        
        return encoded
    
    def __len__(self):
        return len(self.excerpt)

In [None]:
sample = pd.read_csv('../input/commonlitreadabilityprize/sample_submission.csv')
test_data = pd.read_csv('../input/commonlitreadabilityprize/test.csv')

In [None]:
def run_fold(fold_num,pth):
    model = Custom_bert().to(device)
    _ = model.eval()
    model.load_state_dict(torch.load(f"{pth}/roberta_large_{fold_num}.pt"), strict=False)
    
    test_ds = CLRPDataset(test_data,tokenizer)
    test_dl = DataLoader(test_ds,
                        batch_size = 16,
                        shuffle=False,
                        pin_memory=True)
    
    pred = []
    with torch.no_grad():
        for batch in test_dl:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            output = model(input_ids, attention_mask)
            pred.extend(output.detach().cpu().numpy())
            
    del model, test_dl, test_ds
    gc.collect()
    torch.cuda.empty_cache()
    
    return np.array(pred)

In [None]:
def get_single_model(pth):
    pred0 = run_fold(0,pth)
    pred1 = run_fold(1,pth)
    pred2 = run_fold(2,pth)
    pred3 = run_fold(3,pth)
    pred4 = run_fold(4,pth)
    
    return np.expand_dims(np.mean(np.concatenate((pred0, pred1, pred2, pred3, pred4),axis=1),axis=1),axis=1)

In [None]:
pred_1 = get_single_model('../input/clrp-bag2/4808 LB448')

In [None]:
model_dir = "../input/deberta-l"
tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True, model_max_length=256)

In [None]:
def init_params(module_lst):
    for module in module_lst:
        for param in module.parameters():
            if param.dim() > 1:
                torch.nn.init.xavier_uniform_(param)
    return

class Custom_bert(nn.Module):
    def __init__(self):
        super().__init__()

        config = AutoConfig.from_pretrained(model_dir)
        config.update({"output_hidden_states":True,
                       "hidden_dropout_prob": 0.0,
                       "layer_norm_eps": 1e-7})                       
        
        self.base = AutoModel.from_pretrained(model_dir, config=config)  
        
        dim = 1024
        
        self.dropout = nn.Dropout(p=0.2)
        self.high_dropout = nn.Dropout(p=0.5)
        
        n_weights = 24
        weights_init = torch.zeros(n_weights).float()
        weights_init.data[:-1] = -3
        self.layer_weights = torch.nn.Parameter(weights_init)
            
        self.attention = nn.Sequential(
            nn.Linear(1024, 1024),            
            nn.Tanh(),
            nn.Linear(1024, 1),
            nn.Softmax(dim=1)
        ) 
        self.cls = nn.Sequential(
            nn.Linear(dim,1)
        )
        init_params([self.cls,self.attention])
        
    def forward(self, input_ids, attention_mask):
        base_output = self.base(input_ids=input_ids,
                                      attention_mask=attention_mask)
        
        cls_outputs = torch.stack(
            [self.dropout(layer) for layer in base_output['hidden_states'][-24:]], dim=0
        )
        cls_output = (torch.softmax(self.layer_weights, dim=0).unsqueeze(1).unsqueeze(1).unsqueeze(1) * cls_outputs).sum(0)
    
        logits = torch.mean(
            torch.stack(
                [torch.sum(self.attention(self.high_dropout(cls_output)) * cls_output, dim=1) for _ in range(5)],
                dim=0,
            ),
            dim=0,
        )
        return self.cls(logits)

In [None]:
def run_fold(fold_num,pth):
    model = Custom_bert().to(device)
    _ = model.eval()
    model.load_state_dict(torch.load(f"{pth}/deberta_large_{fold_num}.pt"), strict=False)
    
    test_ds = CLRPDataset(test_data,tokenizer)
    test_dl = DataLoader(test_ds,
                        batch_size = 16,
                        shuffle=False,
                        pin_memory=True)
    
    pred = []
    with torch.no_grad():
        for batch in test_dl:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            output = model(input_ids, attention_mask)
            pred.extend(output.detach().cpu().numpy())
            
    del model, test_dl, test_ds
    gc.collect()
    torch.cuda.empty_cache()
    
    return np.array(pred)

In [None]:
pred_2 = get_single_model('../input/clrp-bag/4724 LB448')
pred_3 = get_single_model('../input/clrp-bag/4714 LB448')

In [None]:
pred_1

In [None]:
pred_2

In [None]:
pred_3

In [None]:
cat = np.concatenate((pred_1,pred_2,pred_3),axis=1)

In [None]:
sort = np.sort(cat, axis=1)

In [None]:
pred = np.mean(sort,axis=1)

In [None]:
pred

In [None]:
sample['target'] = pred
sample.to_csv('submission.csv',index=False)

In [None]:
sample