In [13]:
# ====================================================
# Library
# ====================================================

import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import itertools
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset


import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
from transformers import DataCollatorWithPadding
%env TOKENIZERS_PARALLELISM=false

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

tokenizers.__version__: 0.13.1
transformers.__version__: 4.23.1
env: TOKENIZERS_PARALLELISM=false


# CFG

In [14]:
models = [
    {
        "model_name": "microsoft/deberta-v3-base",
        "model_path": "../PL_MODEL_DIR/exp01/microsoft-deberta-v3-base",
        "tok_path": "../PL_MODEL_DIR/exp01/microsoft-deberta-v3-base/tokenizer/",
        "pool": "MeanPool",
        "batch_size": 24
    },
    {
        "model_name": "microsoft/deberta-v3-large",
        "model_path": "../PL_MODEL_DIR/exp01/microsoft-deberta-v3-large",
        "tok_path": "../PL_MODEL_DIR/exp01/microsoft-deberta-v3-large/tokenizer/",
        "pool": "MeanPool",
        "batch_size": 12
    },
    {
        "model_name": "roberta-large",
        "model_path": "PL_MODEL_DIR/exp12/roberta-large",
        "tok_path": "PL_MODEL_DIR/exp12/roberta-large/tokenizer/",
        "pool": "ConcatPool",
        "batch_size": 12        
    },

    {
        "model_name": "microsoft/deberta-v3-base",
        "model_path": "../PL_MODEL_DIR/exp14/microsoft-deberta-v3-base",
        "tok_path": "../PL_MODEL_DIR/exp14/microsoft-deberta-v3-base/tokenizer/",
        "pool": "GeM",
        "batch_size": 24
    },
    {
        "model_name": "roberta-large",
        "model_path": "../PL_MODEL_DIR/exp14/roberta-large",
        "tok_path": "../PL_MODEL_DIR/exp14/roberta-large/tokenizer/",
        "pool": "GeM",
        "batch_size": 12
    },
    {
        "model_name": "microsoft/deberta-v3-large",
        "model_path": "../PL_MODEL_DIR/exp02/microsoft-deberta-v3-large",
        "tok_path": "../PL_MODEL_DIR/exp02/microsoft-deberta-v3-large/tokenizer/",
        "pool": "ConcatPool",
        "batch_size": 12
    },
    {
        "model_name": "microsoft/deberta-v3-large",
        "model_path": "../PL_MODEL_DIR/exp13/microsoft-deberta-v3-large",
        "tok_path": "../PL_MODEL_DIR/exp13/microsoft-deberta-v3-large/tokenizer/",
        "pool": "WLP",
        "batch_size": 12
    },
    {
        "model_name": "distilbert-base-uncased",
        "model_path": "../PL_MODEL_DIR/exp02/distilbert-base-uncased",
        "tok_path": "../PL_MODEL_DIR/exp02/distilbert-base-uncased/tokenizer/",
        "pool": "ConcatPool",
        "batch_size": 24
    },
    {
        "model_name": "sshleifer/distilbart-cnn-12-6",
        "model_path": "../PL_MODEL_DIR/exp02/sshleifer-distilbart-cnn-12-6",
        "tok_path": "../PL_MODEL_DIR/exp02/sshleifer-distilbart-cnn-12-6/tokenizer/",
        "pool": "ConcatPool",
        "batch_size": 12
    },
    {
        "model_name": "roberta-large",
        "model_path": "../PL_MODEL_DIR/exp03/roberta-large",
        "tok_path": "../PL_MODEL_DIR/exp03/roberta-large/tokenizer/",
        "pool": "WLP",
        "batch_size": 12
    }
    
]


In [15]:
num_workers=4
gradient_checkpointing=False
target_cols=['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']
seed=42
n_fold=5
trn_fold=[0, 1, 2, 3, 4]
fold = 0

In [16]:
tokenizer = AutoTokenizer.from_pretrained(f"{models[0]['tok_path']}")

# Utils

In [17]:
# ====================================================
# Utils
# ====================================================
def MCRMSE(y_trues, y_preds):
    scores = []
    idxes = y_trues.shape[1]
    for i in range(idxes):
        y_true = y_trues[:,i]
        y_pred = y_preds[:,i]
        score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
        scores.append(score)
    mcrmse_score = np.mean(scores)
    return mcrmse_score, scores


def get_score(y_trues, y_preds):
    mcrmse_score, scores = MCRMSE(y_trues, y_preds)
    return mcrmse_score, scores


def get_logger(filename='inference'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

# Data Loading

In [18]:
# ====================================================
# Data Loading 
# ====================================================

test = pd.read_csv('../data/train_5folds.csv') 
print(f"test.shape: {test.shape}")

test.shape: (3911, 9)


Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,fold
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0,1
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5,0
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5,4
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0,3
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5,1


In [20]:
# sort by length to speed up inference
test['tokenize_length'] = [len(tokenizer(text)['input_ids']) for text in test['full_text'].values]
test = test.sort_values('tokenize_length', ascending=True).reset_index(drop=True)

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,fold,tokenize_length
0,F69C85F4C3CA,the memorable teacher ever had\r\n\r\nwas a te...,1.0,1.0,1.5,1.0,1.0,1.5,2,16
1,7835355C55D8,"Dear TEACHER_NAME,\n\nI WRITE THIS LETTER TO S...",2.5,2.5,3.0,2.5,2.5,3.0,1,57
2,5FFB959ED7BA,"I agree.\n\nI agree with this statement, becau...",2.5,2.5,2.5,2.5,3.0,3.0,0,61
3,9A24CB0724FE,"Dear, principal\r\n\r\nI think that is require...",2.5,2.5,2.5,2.0,3.0,2.5,3,62
4,0DB44DDF91E6,Dear principal.\r\n\r\nWe like to clean litter...,3.0,3.0,3.0,2.5,3.0,3.0,3,62


# Dataset

In [21]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(text,tokenizer, model_type):
    
    if "roberta" in model_type or "distilbert" in model_type or "facebook/bart" in model_type or "distilbart" in model_type:
        inputs = tokenizer.encode_plus(
            text, 
            return_tensors=None, 
            add_special_tokens=True, 
            max_length=512, 
            pad_to_max_length=True, 
            truncation=True 
        )
    else:
        inputs = tokenizer.encode_plus(
            text, 
            return_tensors=None, 
            add_special_tokens=True, 
            #max_length=CFG.max_len, 
            #pad_to_max_length=True, 
            #truncation=True 
        )
        
    for k, v in inputs.items(): 
        inputs[k] = torch.tensor(v, dtype=torch.long) 
    return inputs 


class TestDataset(Dataset):
    def __init__(self, df,tokenizer, model_type=None):
        self.texts = df['full_text'].values
        self.model_type=model_type
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.texts[item],self.tokenizer, self.model_type)
        return inputs

# Model

In [22]:
# MeanPool
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings
    
# WLP 
class WeightedLayerPooling(nn.Module):
    def __init__(self, num_hidden_layers, layer_start: int = 4, layer_weights = None):
        super(WeightedLayerPooling, self).__init__()
        self.layer_start = layer_start
        self.num_hidden_layers = num_hidden_layers
        self.layer_weights = layer_weights if layer_weights is not None \
            else nn.Parameter(
                torch.tensor([1] * (num_hidden_layers+1 - layer_start), dtype=torch.float)
            )

    def forward(self, features):
        ft_all_layers = features['all_layer_embeddings']

        all_layer_embedding = torch.stack(ft_all_layers)
        all_layer_embedding = all_layer_embedding[self.layer_start:, :, :, :]

        weight_factor = self.layer_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).expand(all_layer_embedding.size())
        weighted_average = (weight_factor*all_layer_embedding).sum(dim=0) / self.layer_weights.sum()

        features.update({'token_embeddings': weighted_average})
        return features

# GeM
class GeMText(nn.Module):
    def __init__(self, dim=1, cfg=None, p=3, eps=1e-6):
        super(GeMText, self).__init__()
        self.dim = dim
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps
        self.feat_mult = 1
        # x seeems last hidden state

    def forward(self, x, attention_mask):
        attention_mask_expanded = attention_mask.unsqueeze(-1).expand(x.shape)
        x = (x.clamp(min=self.eps) * attention_mask_expanded).pow(self.p).sum(self.dim)
        ret = x / attention_mask_expanded.sum(self.dim).clip(min=self.eps)
        ret = ret.pow(1 / self.p)
        return ret
    



In [23]:
# ====================================================
# Model
# ====================================================

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.config = torch.load(config_path)
        
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg["model_name"], config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
            
        if gradient_checkpointing:
            self.model.gradient_checkpointing_enable()
           
        if cfg["pool"] == "MeanPool" or cfg["pool"] == "ConcatPool":
            self.pooling = MeanPooling()
        elif cfg["pool"] == "WLP":
            self.pooling = WeightedLayerPooling(self.config.num_hidden_layers, layer_start=9)
        elif cfg["pool"] == "GeM":
            self.pooling = GeMText()
            
        if cfg["pool"] == "ConcatPool":
            self.head = nn.Linear(self.config.hidden_size*4, 6)       
        else:
            self.head = nn.Linear(self.config.hidden_size, 6)
            
        if 'facebook/bart' in cfg["model_name"] or 'distilbart' in cfg["model_name"]:
            self.config.use_cache = False
            self.initializer_range = self.config.init_std
        else:
            self.initializer_range = self.config.initializer_range
         
        self._init_weights(self.head)
        
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        attention_mask = inputs["attention_mask"]
        input_ids = inputs["input_ids"]
        
        if self.cfg["pool"] == "WLP":
            x = self.model(input_ids=input_ids, attention_mask=attention_mask)
            tmp = {
                'all_layer_embeddings': x.hidden_states
            }
            feature = self.pooling(tmp)['token_embeddings'][:, 0]
        
        elif self.cfg["pool"] == "ConcatPool":
            
            
            if 'facebook/bart' in self.cfg["model_name"] or 'distilbart' in self.cfg["model_name"]:
                x = torch.stack(self.model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True).decoder_hidden_states)
            else:
                x = torch.stack(self.model(input_ids=input_ids, attention_mask=attention_mask).hidden_states)

            p1 = self.pooling(x[-1], attention_mask)
            p2 = self.pooling(x[-2], attention_mask)
            p3 = self.pooling(x[-3], attention_mask)
            p4 = self.pooling(x[-4], attention_mask)

            feature = torch.cat(
                (p1, p2, p3, p4),-1
            )
        else:
            outputs = self.model(**inputs)
            x = outputs[0]
            feature = self.pooling(x, inputs['attention_mask'])
            
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.head(feature)
        return output

# inference

In [24]:
# ====================================================
# inference
# ====================================================
def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [81]:
all_preds = []

test = pd.read_csv('../data/train_5folds.csv') #.head(1000)

for cfg in models:
    tokenizer = AutoTokenizer.from_pretrained(f"{cfg['tok_path']}")
    batch_size=cfg["batch_size"]
    
    predictions = []
    for fold in [fold]:
        
        df = test.loc[test.fold == fold]
        
        test_dataset = TestDataset(df, tokenizer, cfg["model_name"])

        test_loader = DataLoader(test_dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         collate_fn=DataCollatorWithPadding(tokenizer=tokenizer, padding='longest'),
                         num_workers=num_workers, pin_memory=True, drop_last=False)
    
        
        
        model = CustomModel(cfg, config_path=cfg["model_path"]+"/config.pth", pretrained=False)    
        state = torch.load(f"{cfg['model_path']}/checkpoint_{fold}.pth",
                       map_location=torch.device('cpu'))


        model.load_state_dict(state['model'])
        prediction = inference_fn(test_loader, model, device)
        predictions.append(prediction)
        del model, state, prediction; gc.collect()
        torch.cuda.empty_cache()
#     predictions = np.mean(predictions, axis=0)
    all_preds.append(predictions)
    del tokenizer, test_dataset, test_loader; gc.collect()



  0%|          | 0/33 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fas

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fas

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenize

  0%|          | 0/33 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fas

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenize

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fas

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fas

  0%|          | 0/33 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a DistilBertTokenizerFast tokenizer. Please note that with 

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `_

  0%|          | 0/66 [00:00<?, ?it/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenize

In [83]:
params = {
 'w1': 0.9817485733634937,
 'w2': 0.9106920834649074,
 'w3': 0.5578901478122944,
 'w4': 0.6902450546679916,
 'w5': 0.5688885036917446,
 'w6': 0.5169081384036303,
 'w7': 0.7091282443515627,
 'w8': 0.21827946432761933,
 'w9': 0.23860988779596035,
 'w10': 0.30982893640786663
}


In [85]:
preds = None
for index, val in enumerate(params.keys()):
    if index == 0:
        preds = params[val]*all_preds[0][0]
    else:
        preds += params[val]*all_preds[index][0]


param_sum = 0
for key, val in params.items():
    param_sum += val

predictions = preds/param_sum

In [86]:
df[target_cols] = predictions

In [96]:
df.to_csv("../data/pl_train.csv", index=False)

## Generate train_pl_df.csv

In [None]:
df1 = pd.read_csv("../data/train_5folds.csv")
df1 = df1.sort_values('text_id').reset_index(drop=True)


df2 = pd.read_csv("../data/pl_train.csv")
df2 = df2.sort_values('text_id').reset_index(drop=True)


df1[target_cols] = (df1[target_cols] + df2[target_cols]) / 2

df1.to_csv("../data/train_pl_df.csv", index=False)