In [None]:
# ========================================
# library
# ========================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, KFold,GroupKFold
from sklearn.metrics import mean_squared_error
%matplotlib inline
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel,AutoConfig
import transformers
from transformers import RobertaModel,RobertaTokenizer
from transformers import AlbertModel,AlbertTokenizer
from transformers import XLNetModel,XLNetTokenizer,XLNetConfig
from transformers import DebertaModel, DebertaTokenizer
from transformers import ElectraModel, ElectraTokenizer, ElectraForSequenceClassification
from transformers import BartModel
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import MPNetModel,MPNetTokenizer
from transformers import FunnelModel,FunnelTokenizer, FunnelBaseModel
import logging
import sys
from contextlib import contextmanager
import time
import random
from tqdm import tqdm
import os
import pickle
import gc

In [None]:
# ==================
# Constant
# ==================
ex = "423"
TRAIN_PATH = "../input/commonlitreadabilityprize/train.csvv"
LOGGER_PATH = f"ex{ex}.txt"
FOLD_PATH = "../input/fe001-step-1-create-folds/fe001_train_folds.csv"
MODEL_PATH_BASE = f"ex{ex}"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# ===============
# Settings
# ===============
BATCH_SIZE = 8
max_len = 256

robeota_large_MODEL_PATH = '../input/roberta-transformers-pytorch/roberta-large'
robeota_large_tokenizer = RobertaTokenizer.from_pretrained(robeota_large_MODEL_PATH)

roberta_base_MODEL_PATH = '../input/roberta-transformers-pytorch/roberta-base'
roberta_base_tokenizer = RobertaTokenizer.from_pretrained(roberta_base_MODEL_PATH)

roberta_base_MODEL_PATH2 = '../input/clrp-pytorch-roberta-pretrain-fold0/clrp_roberta_base'
roberta_base_tokenizer2 = AutoTokenizer.from_pretrained(roberta_base_MODEL_PATH2)

albert_large_MODEL_PATH = "../input/pretrained-albert-pytorch/albert-large-v2"
albert_large_tokenizer = AlbertTokenizer.from_pretrained(albert_large_MODEL_PATH)

xlnet_large_MODEL_PATH = '../input/xlnet-pretrained-models-pytorch/xlnet-large-cased-pytorch_model.bin'
xlnet_large_tokenizer = XLNetTokenizer.from_pretrained("../input/xlnet-pretrained-models-pytorch/xlnet-large-cased-spiece.model")

deberta_large_MODEL_PATH = "../input/deberta/large"
deberta_large_tokenizer = DebertaTokenizer.from_pretrained(deberta_large_MODEL_PATH)

xlnet_base_MODEL_PATH = '../input/xlnet-pretrained-models-pytorch/xlnet-base-cased-pytorch_model.bin'
xlnet_base_tokenizer = XLNetTokenizer.from_pretrained("../input/xlnet-pretrained-models-pytorch/xlnet-base-cased-spiece.model")

electra_large_MODEL_PATH = "../input/electra/large-discriminator"
electra_large_tokenizer = ElectraTokenizer.from_pretrained(electra_large_MODEL_PATH)

bart_large_MODEL_PATH = '../input/bart-models-hugging-face-model-repository/bart-large'
bart_large_tokenizer = RobertaTokenizer.from_pretrained(robeota_large_MODEL_PATH)

deberta_xlarge_MODEL_PATH = "../input/deberta/v2-xlarge"
deberta_xlarge_tokenizer = AutoTokenizer.from_pretrained(deberta_xlarge_MODEL_PATH)

mpnet_base_MODEL_PATH = "../input/mpnet-base"
mpnet_base_tokenizer = MPNetTokenizer.from_pretrained(mpnet_base_MODEL_PATH)

deberta_v2_xxlarge_MODEL_PATH = "../input/deberta/v2-xxlarge"
deberta_v2_xxlarge_tokenizer = AutoTokenizer.from_pretrained(deberta_v2_xxlarge_MODEL_PATH)

funnel_large_MODEL_PATH = '../input/funnel-large-base-save/funnel-large/'
funnel_large_tokenizer = FunnelTokenizer.from_pretrained(funnel_large_MODEL_PATH )

muppet_roberta_large_MODEL_PATH = "../input/muppet-roberta-large/muppet-roberta-large/"
muppet_roberta_large_tokenizer = RobertaTokenizer.from_pretrained(muppet_roberta_large_MODEL_PATH)

funnel_medium_MODEL_PATH = '../input/funnel-medium-save/funnel-medium'
funnel_medium_tokenizer = FunnelTokenizer.from_pretrained(funnel_medium_MODEL_PATH)

In [None]:
# ===============
# Functions
# ===============

class CommonLitDataset(Dataset):
    def __init__(self, excerpt, tokenizer, max_len, target=None):
        self.excerpt = excerpt
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.target = target

    def __len__(self):
        return len(self.excerpt)

    def __getitem__(self, item):
        text = str(self.excerpt[item])
        inputs = self.tokenizer(
            text, 
            max_length=self.max_len, 
            padding="max_length", 
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True
        )
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        if self.target is not None:
            return {
                "input_ids": torch.tensor(ids, dtype=torch.long),
                "attention_mask": torch.tensor(mask, dtype=torch.long),
                "token_type_ids" : torch.tensor(token_type_ids, dtype=torch.long),
                "target" : torch.tensor(self.target[item], dtype=torch.float32)
            }
        else:
            return {
                "input_ids": torch.tensor(ids, dtype=torch.long),
                "attention_mask": torch.tensor(mask, dtype=torch.long),
                "token_type_ids" : torch.tensor(token_type_ids, dtype=torch.long)
            }

    
class roberta_large_model(nn.Module):
    def __init__(self):
        super(roberta_large_model, self).__init__()
        self.roberta = RobertaModel.from_pretrained(
            robeota_large_MODEL_PATH, 
            hidden_dropout_prob = 0,
            attention_probs_dropout_prob = 0
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(1024, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.roberta(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb, axis=1)
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output
    

class roberta_base_model(nn.Module):
    def __init__(self):
        super(roberta_base_model, self).__init__()
        self.roberta = RobertaModel.from_pretrained(
            roberta_base_MODEL_PATH, 
        )
        self.drop = nn.Dropout(0.2)
        self.fc = nn.Linear(768, 256)
        self.layernorm = nn.LayerNorm(256)
        self.drop2 = nn.Dropout(0.2)
        self.relu = nn.ReLU()
        self.out = nn.Linear(256, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.roberta(ids, attention_mask=mask,token_type_ids=token_type_ids)['pooler_output']
        output = self.drop(emb)
        output = self.fc(output)
        output = self.layernorm(output)
        output = self.drop2(output)
        output = self.relu(output)
        output = self.out(output)
        return output,emb
    
class roberta_base_model2(nn.Module):
    def __init__(self):
        super().__init__()

        config = AutoConfig.from_pretrained(roberta_base_MODEL_PATH2)
        config.update({"output_hidden_states":True, 
                       "hidden_dropout_prob": 0.0,
                       "layer_norm_eps": 1e-7})                       
        
        self.roberta = AutoModel.from_pretrained(roberta_base_MODEL_PATH, config=config)  
            
        self.attention = nn.Sequential(            
            nn.Linear(768, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(                        
            nn.Linear(768, 1)                        
        )
        

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids,
                                      attention_mask=attention_mask)        

        last_layer_hidden_states = roberta_output.hidden_states[-1]
        weights = self.attention(last_layer_hidden_states)
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)        
        return self.regressor(context_vector)
    
    

class deberta_large_model(nn.Module):
    def __init__(self):
        super(deberta_large_model, self).__init__()
        self.deberta_model = DebertaModel.from_pretrained(deberta_large_MODEL_PATH, 
                                                        hidden_dropout_prob = 0,
                                                        attention_probs_dropout_prob = 0,
                                                        hidden_act = "gelu_new")
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(1024, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.deberta_model(ids, attention_mask=mask,token_type_ids=token_type_ids)['last_hidden_state'][:,0,:]
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output


class xlnet_base_model(nn.Module):
    def __init__(self):
        super(xlnet_base_model, self).__init__()
        xlnet_config = XLNetConfig.from_json_file('../input/xlnet-pretrained-models-pytorch/xlnet-base-cased-config.json')
        xlnet_config.hidden_dropout_prob = 0
        xlnet_config.attention_probs_dropout_prob = 0
        xlnet_config.dropout = 0
        self.xlnet_model = XLNetModel.from_pretrained(xlnet_base_MODEL_PATH, config=xlnet_config)
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(768)
        self.out = nn.Linear(768, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.xlnet_model(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb,axis=1)
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output
    
    
class electra_large_model(nn.Module):
    def __init__(self):
        super(electra_large_model, self).__init__()
        self.electra = ElectraForSequenceClassification.from_pretrained(
            electra_large_MODEL_PATH, 
            hidden_dropout_prob = 0,
            attention_probs_dropout_prob = 0,
            summary_last_dropout = 0,
            num_labels = 1
        )

    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        output = self.electra(ids, attention_mask=mask,token_type_ids=token_type_ids)["logits"]
        return output
    
    
    
class bart_large_model(nn.Module):
    def __init__(self):
        super(bart_large_model, self).__init__()
        self.bart = BartModel.from_pretrained(
            bart_large_MODEL_PATH, 
            dropout=0.0, attention_dropout=0.0
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(1024, 1)
    
    def forward(self, ids, mask):
        # pooler
        emb = self.bart(ids, attention_mask=mask)['last_hidden_state']
        emb = torch.mean(emb,axis=1)
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output
    
class deberta_xlarge_model(nn.Module):
    def __init__(self):
        super(deberta_xlarge_model, self).__init__()
        self.deberta_model =  AutoModel.from_pretrained(deberta_xlarge_MODEL_PATH, 
                                                        hidden_dropout_prob = 0,
                                                        attention_probs_dropout_prob = 0)
        
        #self.dropout = nn.Dropout(p=0.2)
        #self.ln = nn.LayerNorm(1536)
        self.out = nn.Linear(1536, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.deberta_model(ids, attention_mask=mask,token_type_ids=token_type_ids)['last_hidden_state'][:,0,:]
        #output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(emb)
        return output
    
class mpnet_base_model(nn.Module):
    def __init__(self):
        super(mpnet_base_model, self).__init__()
        self.mpnet = MPNetModel.from_pretrained(
            mpnet_base_MODEL_PATH, 
            hidden_dropout_prob = 0,
            attention_probs_dropout_prob = 0
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(768)
        self.out = nn.Linear(768, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.mpnet(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb, axis=1)
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output
    
class deberta_v2_xxlarge_model(nn.Module):
    def __init__(self):
        super(deberta_v2_xxlarge_model, self).__init__()
        self.deberta_model =  AutoModel.from_pretrained(deberta_v2_xxlarge_MODEL_PATH, 
                                                        hidden_dropout_prob = 0,
                                                        attention_probs_dropout_prob = 0)
        
        #self.dropout = nn.Dropout(p=0.2)
        #self.ln = nn.LayerNorm(1536)
        self.out = nn.Linear(1536, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.deberta_model(ids, attention_mask=mask,token_type_ids=token_type_ids)['last_hidden_state'][:,0,:]
        #output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(emb)
        return output
    
class funnel_large_model(nn.Module):
    def __init__(self):
        super(funnel_large_model, self).__init__()
        self.funnel = FunnelBaseModel.from_pretrained(
            funnel_large_MODEL_PATH, 
            hidden_dropout = 0,
            attention_dropout = 0,
            hidden_act = "gelu"
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(1024, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.funnel(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb, axis=1)
        #output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(emb)
        return output
    
    
class muppet_roberta_large_model(nn.Module):
    def __init__(self):
        super(muppet_roberta_large_model, self).__init__()
        self.roberta = RobertaModel.from_pretrained(
            muppet_roberta_large_MODEL_PATH, 
            hidden_dropout_prob = 0,
            attention_probs_dropout_prob = 0
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(1024, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.roberta(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb, axis=1)
        output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(output)
        return output
    
class funnel_medium_model(nn.Module):
    def __init__(self):
        super(funnel_medium_model, self).__init__()
        self.funnel = FunnelModel.from_pretrained(
            funnel_medium_MODEL_PATH, 
            hidden_dropout = 0,
            attention_dropout = 0
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        #self.ln = nn.LayerNorm(1024)
        self.out = nn.Linear(768, 1)
    
    def forward(self, ids, mask, token_type_ids):
        # pooler
        emb = self.funnel(ids, attention_mask=mask,token_type_ids=token_type_ids)["last_hidden_state"]
        emb = torch.mean(emb, axis=1)
        #output = self.ln(emb)
        #output = self.dropout(output)
        output = self.out(emb)
        return output
    
    
    
def calc_loss(y_true, y_pred):
    return  np.sqrt(mean_squared_error(y_true, y_pred))
    
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True



def setup_logger(out_file=None, stderr=True, stderr_level=logging.INFO, file_level=logging.DEBUG):
    LOGGER.handlers = []
    LOGGER.setLevel(min(stderr_level, file_level))

    if stderr:
        handler = logging.StreamHandler(sys.stderr)
        handler.setFormatter(FORMATTER)
        handler.setLevel(stderr_level)
        LOGGER.addHandler(handler)

    if out_file is not None:
        handler = logging.FileHandler(out_file)
        handler.setFormatter(FORMATTER)
        handler.setLevel(file_level)
        LOGGER.addHandler(handler)

    LOGGER.info("logger set up")
    return LOGGER


@contextmanager
def timer(name):
    t0 = time.time()
    yield 
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s')
    
    
LOGGER = logging.getLogger()
FORMATTER = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
setup_logger(out_file=LOGGER_PATH)

In [None]:
# ================================
# Main
# ================================
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")

In [None]:
# ================================
# funnel_medium
# ================================
if len(test) > 0:
    with timer("funnel_medium"):
        y_test_funnel_medium = []
        # dataset
        test_ = CommonLitDataset(test["excerpt"].values, funnel_medium_tokenizer, max_len, None)

        # loader
        test_loader = DataLoader(dataset=test_, batch_size=BATCH_SIZE, shuffle = False , num_workers=2)

        for fold in tqdm(range(5)):

            # model
            model = funnel_medium_model()
            model.load_state_dict(torch.load(f"../input/commonlit-ex423/ex423_{fold}.pth"))
            model.to(device)
            model.eval()
            test_preds = np.ndarray((0,1))

            # svr
            #svr = pickle.load(open(f"../input/ex040-svr/ex040_svr_roberta_emb_{fold}.pkl","rb"))

            with torch.no_grad():  
                # Predicting on validation set
                for d in test_loader:
                    # =========================
                    # data loader
                    # =========================
                    input_ids = d['input_ids']
                    mask = d['attention_mask']
                    token_type_ids = d["token_type_ids"]

                    input_ids = input_ids.to(device)
                    mask = mask.to(device)
                    token_type_ids = token_type_ids.to(device)
                    output = model(input_ids, mask,token_type_ids )

                    test_preds = np.concatenate([test_preds, output.detach().cpu().numpy()], axis=0)        
            y_test_funnel_medium.append(test_preds)
            del model
            gc.collect()
        del test_, test_loader
        gc.collect()
        y_test_funnel_medium = np.mean(y_test_funnel_medium,axis=0)

In [None]:
print(y_test_funnel_medium)

In [None]:
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
submission.target = y_test_funnel_medium.reshape(-1)
submission.to_csv("submission.csv", index=False)