In [None]:
import numpy as np 
import pandas as pd 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# from wtfml.utils import EarlyStopping

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

import transformers
from transformers import AutoModel, BertTokenizerFast
from transformers import RobertaTokenizer, RobertaModel
from transformers import AdamW
from transformers import AutoConfig

from tqdm import tqdm


In [None]:
# reading the data
train = pd.read_csv('/kaggle/input/commonlitreadabilityprize/train.csv')
test = pd.read_csv('/kaggle/input/commonlitreadabilityprize/test.csv')
sample = pd.read_csv('/kaggle/input/commonlitreadabilityprize/sample_submission.csv')

In [None]:
# parameters
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_LENGTH = 311
BATCH_SIZE = 16
NUM_EPOCHS = 40
NUM_WORKERS = 6
CHECKPOINT_FILE = ''
PIN_MEMORY = True
SAVE_MODEL = True
LOAD_MODEL = False
PRETRAINED_MODEL = 'roberta-base'
FREEZE = False
PATIENCE = 5
N_FOLDS = 5
tokenizer = RobertaTokenizer.from_pretrained('../input/roberta-transformers-pytorch/roberta-base')

In [None]:
# creating our dataset
class DRDataset(Dataset):
    def __init__(self, text_col, label_col, train=True):
        super().__init__()
        self.text_col = text_col
        self.label_col = label_col
        self.train = train

    def __len__(self):
        return self.text_col.shape[0] 

    def __getitem__(self, index):
        # get text and label if test use -1 for label
        text, label = (self.text_col.iloc[index], self.label_col.iloc[index]) if self.train else \
                      (self.text_col.iloc[index], -1)
        
        # tokenize and encode
        tokens = tokenizer.encode_plus(
                    text,
                    padding='max_length',
                    max_length=MAX_LENGTH,
                    truncation=True,
                    return_token_type_ids=False
            )
        
        # seq, mask, and label to tensor
        seq = torch.tensor(tokens['input_ids'])
        mask = torch.tensor(tokens['attention_mask'])
        y = torch.tensor(label) if self.train else torch.tensor(-1)  
        
        return seq, mask, y
    

In [None]:
# make prediction
def make_prediction(model, loader, output_csv="submission.csv"):
    preds = []
    model.eval()

    for batch in tqdm(loader):
        batch = [b.to(device=DEVICE) for b in batch]
        seq, mask, _ = batch
        
        with torch.no_grad():
            pred = model(seq, mask)
            preds.extend(pred.squeeze(1).cpu().numpy())
        
    sample['target'] = preds
    print(sample.head())
 #   sample.to_csv("submission.csv", index=False)
    
    model.train()
    print("Done with predictions")
    return preds

# load model    
def load_checkpoint(checkpoint, model):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint)


In [None]:
# model architecture
class Roberta_Arch(nn.Module):
    
    def __init__(self, roberta):
        super(Roberta_Arch, self).__init__()
        self.roberta = roberta 
        self.dropout = nn.Dropout(0.1)
        self.relu =  nn.ReLU()
        self.fc1 = nn.Linear(768, 512)
        self.fc2 = nn.Linear(512, 1)

    def forward(self, sent_id, mask):
        _, cls_hs = self.roberta(sent_id, attention_mask=mask, return_dict=False)
        x = self.fc1(cls_hs)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [None]:
# main function
def main_test(test_loader, model_path):
    
    roberta = AutoModel.from_pretrained('../input/roberta-transformers-pytorch/roberta-base')
    model = Roberta_Arch(roberta)
    model = model.to(DEVICE)
    
    # loading the model
    load_checkpoint(torch.load(model_path), model)
    print(f"model {model_path} loaded successfully")

    return make_prediction(model, test_loader)


In [None]:
# loading the data test
def loading_data_test(data_text):
    data_ds = DRDataset(
            text_col=data_text,
            label_col=-1,
            train=False,
    )
    
    data_loader = DataLoader(
        data_ds,
        batch_size=BATCH_SIZE,
        shuffle=False,
    )
   
    return data_loader


In [None]:
# pred for each kfold
def test_pred_kfold():
    kfold_preds = []
    test_text = test['excerpt']
    test_loader = loading_data_test(test_text)
    for fold in range(1):
        model_path = f'../input/commonlitkfoldmodels/model.roberta-base.fold.{fold}.lr.3e-05.wd.0.01'
        pred = main_test(test_loader, model_path)
        kfold_preds.append(pred)
    
    return kfold_preds

In [None]:
# prediction for each fold
pred_k = test_pred_kfold()

In [None]:
# # final pred for submission
# def final_mean_pred_weighted():
# #    pred_k = test_pred_kfold()
#     # auc of each model
#     auc_kfold = torch.tensor([0.49808, 0.56205, 0.50327, 0.51976, 0.51563])    
#     m = nn.Softmax(dim=0)  
#     weights = m(1/auc_kfold)
    
#     final_pred = torch.tensor(pred_k[0])*weights[0] \
#                + torch.tensor(pred_k[1])*weights[1] \
#                + torch.tensor(pred_k[2])*weights[2] \
#                + torch.tensor(pred_k[3])*weights[3] \
#                + torch.tensor(pred_k[4])*weights[4]
                                
#     sample['target'] = final_pred
#     print(sample.head())
#   #  sample.to_csv("submission.csv", index=False)

# final_mean_pred_weighted()

In [None]:
# final pred for submission
def final_best_pred_auc():
    # pred_k = test_pred_kfold()
    # auc of each model
    final_pred = pred_k[0]
                                
    sample['target'] = final_pred
    print(sample.head())
    sample.to_csv("submission.csv", index=False)

final_best_pred_auc()

In [None]:
'''
patience = 5
using kfold 
fold 0: val auc 0.49808 attained in epoch 13
fold 1: val auc 0.56205 attained in epoch 1
fold 2: val auc 0.50327 attained in epoch 9
fold 3: val auc 0.51976 attained in epoch 0
fold 4: val auc 0.51563 attained in epoch 3

got 0.499 auc on test set when using mean of kfold preds
got 0.498 auc on test set when using weighted average (1/softmax(auc)) of kfold preds






'''