## Experiments

<!-- |  |  |  | -->
    
| Experiment Name | CV | LB |
| :--- | ---: | ---: |
| Baselien |  |  |


## Import Library

In [None]:
import os
import random
import pathlib
from typing import Optional

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import transformers

from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader
from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoTokenizer,
)

## Dataset

In [None]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
)

MAX_LEN = 256  # 248

class LitDataset(Dataset):
    def __init__(self, df, model_name_or_path="roberta-base", inference_only=False):
        super().__init__()

        self.df = df        
        self.inference_only = inference_only
        self.text = df.excerpt.tolist()
        
        if not self.inference_only:
            self.target = torch.tensor(df.target.values, dtype=torch.float32)        

        tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
        self.encoded = tokenizer.batch_encode_plus(
            self.text,
            padding = 'max_length',            
            max_length = MAX_LEN,
            truncation = True,
            return_attention_mask=True
        )        

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):        
        input_ids = torch.tensor(self.encoded['input_ids'][index])
        attention_mask = torch.tensor(self.encoded['attention_mask'][index])
        
        if self.inference_only:
            return (input_ids, attention_mask)            
        else:
            target = self.target[index]
            return (input_ids, attention_mask, target)

In [None]:
def get_dataloader(model_name):
    test = pd.read_csv("../input/commonlitreadabilityprize/test.csv", usecols=["id", "excerpt"])
    dataset = LitDataset(test, model_name, inference_only=True)
    dataloader = DataLoader(dataset, batch_size=32, drop_last=False, shuffle=False, num_workers=4)
    return dataloader

## Inference

In [None]:
def load_data():
    data = pd.read_csv("../input/commonlitreadabilityprize/train.csv", usecols=["target", "standard_error"])
    data.drop(data[(data.target == 0) & (data.standard_error == 0)].index, inplace=True)
    data.reset_index(drop=True, inplace=True)
    return data

target = load_data()["target"].to_numpy()

In [None]:
all_oof = []
all_pred = []
all_pred_for_stack = []

In [None]:
def predict_by_roberta(
    model: nn.Module,
    model_name_or_path: str,
    model_dir: str,
    num_fold: int = 5,
):
    model_dir = pathlib.Path(model_dir)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dataloader = get_dataloader(model_name_or_path)
    
    pred = []
    for i in range(num_fold):
        model = model.to(device)
        model.load_state_dict(torch.load(str(model_dir / f"model_{i}.pth")))
        model.eval()  # Ignore dropout and bn layers.

        pred_by_fold = []
        with torch.no_grad():  # Skip gradient calculation
            for batch in dataloader:
                batch[0] = batch[0].to(device)
                batch[1] = batch[1].to(device)

                z = model(*batch)
                pred_by_fold.append(z)

        pred_by_fold = torch.cat(pred_by_fold, dim=0).detach().cpu().numpy().copy()
        pred.append(pred_by_fold)

    return np.mean(pred, axis=0)

In [None]:
SEEDS = [42, 422, 12, 123, 7]

#### [commonlit-finetuned-roberta-base](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-base)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
        
        hidden_size = self.config.hidden_size
        self.attention = nn.Sequential(            
            nn.Linear(hidden_size, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(    
            nn.LayerNorm(hidden_size),                    
            nn.Linear(hidden_size, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        last_layer_hidden_states = roberta_output.hidden_states[-1]
        weights = self.attention(last_layer_hidden_states)
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)
        # Now we reduce the context vector to the prediction score.
        return self.regressor(context_vector)

In [None]:
data_dir = "commonlit-finetuned-roberta-base"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-base"

for seed in SEEDS:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path), 
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)
    
all_pred_for_stack.append(np.mean(all_pred[-5:], axis=0))
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-5:], axis=0), squared=False))

#### [commonlit-finetuned-roberta-base-init-4layers](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-base-init-4layers)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
            
        self.attention = nn.Sequential(            
            nn.Linear(768, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(                        
            nn.Linear(768, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
        

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids,
                                      attention_mask=attention_mask)        

        # There are a total of 13 layers of hidden states.
        # 1 for the embedding layer, and 12 for the 12 Roberta layers.
        # We take the hidden states from the last Roberta layer.
        last_layer_hidden_states = roberta_output.hidden_states[-1]

        # The number of cells is MAX_LEN.
        # The size of the hidden state of each cell is 768 (for roberta-base).
        # In order to condense hidden states of all cells to a context vector,
        # we compute a weighted average of the hidden states of all cells.
        # We compute the weight of each cell, using the attention neural network.
        weights = self.attention(last_layer_hidden_states)
                
        # weights.shape is BATCH_SIZE x MAX_LEN x 1
        # last_layer_hidden_states.shape is BATCH_SIZE x MAX_LEN x 768        
        # Now we compute context_vector as the weighted average.
        # context_vector.shape is BATCH_SIZE x 768
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)        
        
        # Now we reduce the context vector to the prediction score.
        return self.regressor(context_vector)

In [None]:
data_dir = "commonlit-finetuned-roberta-base-init-4layers"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-base"

for seed in SEEDS:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path),
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)

In [None]:
all_pred_for_stack.append(np.mean(all_pred[-5:], axis=0))

In [None]:
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-5:], axis=0), squared=False))

#### [commonlit-finetuned-roberta-base-squad2](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-base-squad2)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
        
        hidden_size = self.config.hidden_size
        self.attention = nn.Sequential(            
            nn.Linear(hidden_size, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(                        
            nn.Linear(hidden_size, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        last_layer_hidden_states = roberta_output.hidden_states[-1]
        weights = self.attention(last_layer_hidden_states)
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)
        # Now we reduce the context vector to the prediction score.
        return self.regressor(context_vector)

In [None]:
data_dir = "commonlit-finetuned-roberta-base-squad2"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-base"

for seed in SEEDS:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path), 
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)

In [None]:
all_pred_for_stack.append(np.mean(all_pred[-5:], axis=0))

In [None]:
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-5:], axis=0), squared=False))

#### [commonlit-finetuned-roberta-large](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-large)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
        
        hidden_size = self.config.hidden_size
        self.attention = nn.Sequential(            
            nn.Linear(hidden_size, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(                        
            nn.LayerNorm(hidden_size),
            nn.Linear(hidden_size, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
        

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        last_layer_hidden_states = roberta_output.hidden_states[-1]
        weights = self.attention(last_layer_hidden_states)
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)
        # Now we reduce the context vector to the prediction score.
        return self.regressor(context_vector)

In [None]:
data_dir = "commonlit-finetuned-roberta-large"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-large"

for seed in [42, 422, 12]:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path), 
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)

In [None]:
all_pred_for_stack.append(np.mean(all_pred[-3:], axis=0))

In [None]:
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-3:], axis=0), squared=False))

#### [commonlit-finetuned-roberta-large-squad2](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-large-squad2)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
        
        hidden_size = self.config.hidden_size
        self.attention = nn.Sequential(            
            nn.Linear(hidden_size, 512),            
            nn.Tanh(),                       
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )        

        self.regressor = nn.Sequential(                        
            nn.LayerNorm(hidden_size),
            nn.Linear(hidden_size, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
        

    def forward(self, input_ids, attention_mask):
        roberta_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        last_layer_hidden_states = roberta_output.hidden_states[-1]
        weights = self.attention(last_layer_hidden_states)
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)
        # Now we reduce the context vector to the prediction score.
        return self.regressor(context_vector)

In [None]:
data_dir = "commonlit-finetuned-roberta-large-squad2"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-large"

for seed in [42, 422, 12]:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path), 
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)

In [None]:
all_pred_for_stack.append(np.mean(all_pred[-3:], axis=0))

In [None]:
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-3:], axis=0), squared=False))

#### [commonlit-finetuned-roberta-large-meanpool](https://www.kaggle.com/konumaru/commonlit-finetuned-roberta-large-meanpool)

In [None]:
import torch
import torch.nn as nn
import transformers
from transformers import (
    AutoConfig,
    AutoModel,
)

class LitModel(nn.Module):
    def __init__(self, model_name_or_path="roberta-base"):
        super().__init__()

        self.config = AutoConfig.from_pretrained(model_name_or_path)
        self.config.update({
            "output_hidden_states":True, 
            "hidden_dropout_prob": 0.0,
            "layer_norm_eps": 1e-7
        })                       
        
        self.roberta = AutoModel.from_pretrained(model_name_or_path, config=self.config)  
        
        hidden_size = self.config.hidden_size
        self.regressor = nn.Sequential(                        
            nn.Linear(hidden_size, 1)                        
        )

        self._init_embed_layers(reinit_layers=4)

    def _init_embed_layers(self, reinit_layers: int = 4):
        if reinit_layers > 0:
            for layer in self.roberta.encoder.layer[-reinit_layers:]:
                for module in layer.modules():
                    if isinstance(module, nn.Linear):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.bias is not None:
                            module.bias.data.zero_()
                    elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                    elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
        

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        last_hidden_state = outputs[0]
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        # Now we reduce the context vector to the prediction score.
        return self.regressor(mean_embeddings)

In [None]:
data_dir = "commonlit-finetuned-roberta-large-meanpool"
model_name_or_path = "../input/roberta-transformers-pytorch/roberta-large"

for seed in [42, 422, 12]:
    oof = np.load(f"../input/{data_dir}/seed{seed}/oof.npy").reshape(-1, 1)
    pred = predict_by_roberta(
        model=LitModel(model_name_or_path), 
        model_name_or_path=model_name_or_path,
        model_dir=f"../input/{data_dir}/seed{seed}/models",
    )
    
    all_oof.append(oof)
    all_pred.append(pred)

In [None]:
all_pred_for_stack.append(np.mean(all_pred[-3:], axis=0))

In [None]:
print("RMSE: ", mean_squared_error(target,np.mean(all_oof[-3:], axis=0), squared=False))

## Averaging

In [None]:
all_oof = np.concatenate(all_oof, axis=1)
print("RMSE: ", mean_squared_error(target, np.mean(all_oof, axis=1), squared=False))

### Feature Engineering for stacking model

In [None]:
import sys
sys.path.append("../input/textfeatmodule")
sys.path.append("../input/textfeatmodule/readability-package")

from textfeat import create_text_feat

In [None]:
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
text_feat = create_text_feat(test)

feat_cols = [
    'chars_per_word',
    'syll_per_word',
#     'words_per_sent',
#     'kincaid',
#     'ari',
#     'coleman_liau',
#     'flesch',
#     'gunning_fog',
#     'lix',
#     'smog',
#     'rix',
#     'dale_chall',
#     'tobeverb',
#     'auxverb',
#     'conjunction',
#     'pronoun',
#     'preposition',
#     'nominalization',
#     'pronoun_b',
    'interrogative',
#     'article',
#     'subordination',
#     'conjunction_b',
#     'preposition_b',
]

# feat_cols += [f"spacy_{i}" for i in range(300)]

feat_cols += [
#     'CC',
#     'CD',
#     'DT',
#     'EX',
#     'FW',
#     'IN',
#     'JJ',
#     'JJR',
#     'JJS',
#     'LS',
#     'MD',
#     'NN',
#     'NNS',
#     'NNP',
#     'NNPS',
#     'PDT',
#     'POS',
#     'PRP',
#     'RB',
#     'RBR',
#     'RBS',
#     'RP',
#     'TO',
#     'UH',
#     'VB',
#     'VBD',
#     'VBG',
#     'VBZ',
#     'WDT',
#     'WP',
#     'WRB',
#     'periods',
    'commas',
#     'semis',
#     'exclaims',
#     'questions',
#     'num_char',
#     'num_words',
#     'unique_words',
#     'word_diversity',
#     'longest_word',
#     'avg_len_word',
]

In [None]:
X_stack = np.concatenate(all_pred_for_stack, axis=1)
X_stack = np.concatenate([X_stack, text_feat[feat_cols].to_numpy()], axis=1)

## Submission

In [None]:
import pickle

# Predict function for stacking.
def predict(
    data: pd.DataFrame, 
    model_dir: str,
    seed: int = 42,
    n_splits: int = 5,
    num_seed: int = 5,
) -> np.ndarray:
    all_pred = []
    for i in range(num_seed):
        _seed = seed + i
        pred = np.zeros(data.shape[0])
        for n_fold in range(n_splits):
            with open(os.path.join(model_dir, f"seed{_seed}/{n_fold}-fold.pkl"), mode="rb") as file:
                model = pickle.load(file)
            pred += model.predict(data) / n_splits
        all_pred.append(pred.reshape(-1, 1))
    return np.mean(all_pred, axis=0)

In [None]:
model_names = ("bayesian_ridge", "ridge", "mlp", "svr", "xgb")

pred_stacked = []
for model_name in model_names:
    pred = predict(
        X_stack, 
        f"../input/k/konumaru/train-stack-models-roberta-base-rsa/{model_name}",
        num_seed=7
    )
    pred_stacked.append(pred)

In [None]:
# submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
# # Averaging for submission
# submission["target"] = (
#     0.7 * np.mean(np.concatenate(all_pred, axis=1), axis=1).reshape(-1, 1)
#     + 0.3 * np.mean(np.concatenate(pred_stacked, axis=1), axis=1).reshape(-1, 1)
# )

# submission.head()

In [None]:
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
# Averaging for submission
submission["target"] = (
    0.7 * np.mean(np.concatenate(all_pred, axis=1), axis=1).reshape(-1, 1)
    + 0.3 * np.mean(np.concatenate(pred_stacked, axis=1), axis=1).reshape(-1, 1)
) * 1.01

submission.head()

In [None]:
submission.to_csv("submission.csv", index=False)