In [1]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math 
import string
import pickle
import random
import joblib
import itertools
import warnings
warnings.filterwarnings("ignore")
from IPython. display import clear_output

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

#os.system('pip install iterative-stratification==0.1.7')
#from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
sys.path.append('../input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

os.system('pip uninstall -y transformers')
os.system('pip uninstall -y tokenizers')
os.system('python -m pip install --no-index --find-links=../input/fb3-my-pip-wheels transformers')
os.system('python -m pip install --no-index --find-links=../input/fb3-my-pip-wheels tokenizers')
import tokenizers
import transformers
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
from transformers import DataCollatorWithPadding
%env TOKENIZERS_PARALLELISM=false

from nltk import sent_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from string import punctuation

clear_output()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
print('device:', device)

tokenizers.__version__: 0.12.1
transformers.__version__: 4.20.1
device: cuda


In [2]:
BASE_PATH = '/kaggle/input/feedback-prize-english-language-learning'
SUBMISSION_PATH = os.path.join(BASE_PATH, 'sample_submission.csv')
TRAIN_PATH = os.path.join(BASE_PATH, 'train.csv')
TEST_PATH = os.path.join(BASE_PATH, 'test.csv')

In [3]:
class Config(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    
    def init(self, kwargs):
        super().init(kwargs)

        for k, v in kwargs.items():
            setattr(self, k, v)

    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)
        
def get_logger(filename='inference'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.propagate = False
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

In [4]:
def seed_everything(seed=42):
    '''
    Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.
    '''
    random.seed(seed)
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # When running on the CuDNN backend, two further options must be set
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
seed_everything(seed=42)

def mc_rmse(y_true, y_pred):
    scores = []
    ncols = y_true.shape[1]
    
    for n in range(ncols):
        yn_true = y_true[:, n]
        yn_pred = y_pred[:, n]
        rmse_ = mean_squared_error(yn_true, yn_pred, squared=False)
        scores.append(rmse_)
    score = np.mean(scores) 
    return score, scores

def get_result(cfg, oof_df):
    labels = oof_df[cfg.target_cols].values
    preds = oof_df[[f"pred_{c}" for c in cfg.target_cols]].values
    score, scores = mc_rmse(labels, preds)
    print(f'score: {score:<.6f}  scores: {scores}')

In [5]:
# ====================================================
# Dataset
# ====================================================
def encode_text(cfg, text):
    if cfg.pretrained:
        inputs = cfg.tokenizer(
            text,
            None,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            max_length=cfg.max_len,
            return_tensors='pt'
        )
        inputs = {k:v.squeeze(0) for k,v in inputs.items()}
    else:
        inputs = cfg.tokenizer.encode_plus(
            text, 
            return_tensors=None, 
            add_special_tokens=True, 
            #max_length=CFG.max_len,
            #pad_to_max_length=True,
            #truncation=True
        )
        for k, v in inputs.items():
            inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs 

def preprocess(texts):
    texts = (
        texts
        .str.replace(r'\r\n', '<newline>', regex=True)
        .str.replace(r'\n', '<newline>', regex=True)
        .str.replace('<newline><newline>', '<newline>', regex=False)
        .values 
    )
    return texts

class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        if cfg.pretrained:
            self.texts = df['full_text'].values
        else:
            self.texts = preprocess(df['full_text'])

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = encode_text(self.cfg, self.texts[item])
        return inputs

In [6]:
def load_config(input_path, inference_weight=1):
    # Load CFG class.
    cfg = Config(**json.load(open(os.path.join(input_path, 'CFG.json'), 'r')))
    cfg.path = input_path
    cfg.config_path = os.path.join(cfg.path, 'config.pth')
    # Load tokenizer.
    tokenizer = AutoTokenizer.from_pretrained(os.path.join(cfg.path, 'tokenizer'))
    cfg.tokenizer = tokenizer
    
    cfg.inference_weight = inference_weight
    return cfg

def load_model(cfg, fold, **model_kwargs):
    # Load torch model.
    model = FB3Model(cfg, config_path=cfg.config_path, pretrained=False, **model_kwargs)
    state = torch.load(
        os.path.join(cfg.path, f"{cfg.model.replace('/', '-')}_fold{fold}_best.pth"),
        map_location=torch.device('cpu'))
    model.load_state_dict(state['model'])
    return model

In [7]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output.last_hidden_state.detach().cpu()
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )

def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    #tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in test_loader:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

class Inferencer:
    def __init__(self, input_path=None, cfg=None, inference_weight=1):
        if cfg == None:
            self.cfg = load_config(input_path, inference_weight)
        else:
            self.cfg = cfg
    
    def predict(self, test_loader, device, stat_fn=np.mean):
        preds = []
        start = time.time()
        print('#'*10, cfg.path, '#'*10)
        for fold in self.cfg.trn_fold:
            print(f'Predicting fold {fold}...')
            model = load_model(self.cfg, fold, pool=self.cfg.pool_head)
            pred = inference_fn(test_loader, model, device)
            preds.append(pred)
            del model, pred; gc.collect()
            torch.cuda.empty_cache()
        end = time.time() - start
        print('#'*10, f'ETA: {end:.2f}s', '#'*10, '\n')
        
        self.preds = stat_fn(preds, axis=0) 
        self.preds = np.clip(self.preds, 1, 5)
        return self.preds
    
    def get_oof_result(self):
        return get_result(pd.read_pickle(os.path.join(cfg.path, 'oof_df.pkl')))
    
    def get_text_embedding(self, data_loader, device, fold=None): 
        # pretrained=True: not fine-tuned models.
        if not self.cfg.pretrained:
            model = load_model(self.cfg, fold, pool=self.cfg.pool_head)            
        else:
            model = AutoModel.from_pretrained(self.cfg.model)
        model.to(device)
        model.eval()
            
        fold_emb = []
        for inputs in data_loader:
            for k, v in inputs.items():
                inputs[k] = v.to(device)
            if not self.cfg.pretrained:
                with torch.no_grad():
                    emb = model.feature(**inputs)
            else:
                input_ids = inputs['input_ids'].to(device)
                attention_mask = inputs['attention_mask'].to(device)
                token_type_ids = inputs['token_type_ids'].to(device)
                
                with torch.no_grad():
                    output = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
                emb = mean_pooling(output, attention_mask.detach().cpu())
                emb = F.normalize(emb, p=2, dim=1)
                emb = emb.squeeze(0)
            fold_emb.extend(emb.detach().cpu().numpy())
            del emb; gc.collect(); torch.cuda.empty_cache();
            #print(torch.cuda.memory_allocated() /1024/1024)
            
        fold_emb = np.array(fold_emb)
        return fold_emb

# SVR

## config

In [8]:
##################################################
deberta_base = Config(
    model='../input/huggingface-deberta-variants/deberta-base/deberta-base',
    file_name='microsoft_deberta_base_768',
    pretrained=True, inference_weight=1, max_len=1024) #
deberta_large = Config(
    model='../input/huggingface-deberta-variants/deberta-large/deberta-large', 
    file_name='microsoft_deberta_large_1024',
    pretrained=True, inference_weight=1, max_len=1024) #
deberta_xlarge = Config(
    model='../input/huggingface-deberta-variants/deberta-xlarge/deberta-xlarge', 
    file_name='microsoft_deberta_xlarge_1024',
    pretrained=True, inference_weight=1, max_len=1024)
deberta_v2_xlarge = Config(
    model='../input/bert-shopping-mall/deberta-v2-xlarge', 
    file_name='microsoft_deberta_v2_xlarge_1536',
    pretrained=True, inference_weight=1, max_len=1024)
deberta_v2_xxlarge = Config(
    model='../input/bert-shopping-mall/deberta-v2-xxlarge', 
    file_name='microsoft_deberta_v2_xxlarge_1536',
    pretrained=True, inference_weight=1, max_len=1024)

deberta_v3_base = Config(
    model='../input/bert-shopping-mall/deberta-v3-base',
    file_name='microsoft_deberta_v3_base_768',
    pretrained=True, inference_weight=1, max_len=1024) #
deberta_v3_large = Config(
    model='../input/bert-shopping-mall/deberta-v3-large', 
    file_name='microsoft_deberta_v3_large_1024',
    pretrained=True, inference_weight=1, max_len=1024) # 

deberta_large_mnli = Config(
    model='../input/huggingface-deberta-variants/deberta-large-mnli/deberta-large-mnli',
    file_name='microsoft_deberta_large_mnli_1024',
    pretrained=True, inference_weight=1, max_len=1024) # 

gpt2 = Config(
    model='../input/hugging-face-gpt2/gpt2',
    file_name='gpt2_768',
    pretrained=True, inference_weight=1, max_len=512) #

roberta_base = Config(
    model='../input/transformers/roberta-base', 
    file_name='roberta_base_768',
    pretrained=True, inference_weight=1, max_len=512) #
roberta_large = Config(
    model='../input/transformers/roberta-large',
    file_name='roberta_large_1024',
    pretrained=True, inference_weight=1, max_len=512) # 

xlnet_base = Config(
    model='../input/transformers/xlnet-base-cased',
    file_name='xlnet_base_cased_768',
    pretrained=True, inference_weight=1, max_len=1024) #
xlnet_large = Config(
    model='../input/transformers/xlnet-large-cased', 
    file_name='xlnet_large_cased_1024',
    pretrained=True, inference_weight=1, max_len=1024) #

bart_base = Config(
    model='../input/transformers/facebook-bart-base',
    file_name='facebook_bart_base_768',
    pretrained=True, inference_weight=1, max_len=1024)
bart_large = Config(
    model='../input/transformers/facebook-bart-large',
    file_name='facebook_bart_large_1024',
    pretrained=True, inference_weight=1, max_len=1024)
bart_large_mnli = Config(
    model='../input/facebook-bart-large-mnli',
    file_name='facebook_bart_large_mnli_1024',
    pretrained=True, inference_weight=1, max_len=1024)

bert_base_uncased = Config(
    model='../input/transformers/bert-base-uncased',
    file_name='bert_base_uncased_768',
    pretrained=True, inference_weight=1, max_len=512)
bert_large_uncased = Config(
    model='../input/transformers/bert-large-uncased',
    file_name='bert_large_uncased_1024',
    pretrained=True, inference_weight=1, max_len=512)

muppet_roberta_large = Config(
    model='../input/muppet-roberta-large',
    file_name='facebook_muppet_roberta_large_1024',
    pretrained=True, inference_weight=1, max_len=512)
# muppet_roberta_base = Config(model='facebook/muppet-roberta-base', pretrained=True, inference_weight=1, max_len=512)

funnel_small = Config(
    model='../input/transformers/funnel-transformer-small',
    file_name='funnel_transformer_small_768',
    pretrained=True, inference_weight=1, max_len=1024)
funnel_large = Config(
    model='../input/transformers/funnel-transformer-large',
    file_name='funnel_transformer_large_1024',
    pretrained=True, inference_weight=1, max_len=1024)

##################################################

target_cols = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']

## load embeddings

In [9]:
from sklearn.metrics import make_scorer
from joblib import dump, load
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import StackingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import RidgeCV, Ridge, Lasso, BayesianRidge, LinearRegression
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor

if str(device) == 'cpu':
    from sklearn.svm import SVR
else:
    from cuml.svm import SVR
    import cuml
device

device(type='cuda')

In [10]:
train = pd.read_csv(TRAIN_PATH)
feature_names = pd.read_csv('../input/fb3-feature-engineering/train_fe_mean_std.csv')['feature'].tolist()
train_fe = pd.read_csv('../input/fb3-feature-engineering/train_fe.csv')
test = pd.read_csv(TEST_PATH)

In [11]:
import sys
sys.path.append('../input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
svr_folds = 15

skf = MultilabelStratifiedKFold(n_splits=svr_folds, shuffle=True, random_state=42)
for i,(train_index, val_index) in enumerate(skf.split(train,train[target_cols])):
    train.loc[val_index,'fold'] = i

In [12]:
train['fold'].value_counts()

14.0    261
9.0     261
5.0     261
12.0    261
10.0    261
8.0     261
0.0     261
6.0     261
2.0     261
1.0     261
11.0    261
4.0     260
7.0     260
3.0     260
13.0    260
Name: fold, dtype: int64

In [13]:
from glob import glob 

def get_text_embedding(cfg, dfs):
    cfg.tokenizer = AutoTokenizer.from_pretrained(cfg.model)
    infer_ = Inferencer(cfg=cfg, inference_weight=cfg.inference_weight)
    if cfg.model == 'gpt2':
        cfg.tokenizer.pad_token = cfg.tokenizer.eos_token
    text_embs = []
    for df in dfs:
        dataset = TestDataset(cfg, df)
        loader = DataLoader(
            dataset,
            batch_size=4,
            shuffle=False)

        # Text embedding for SVM
        test_text_emb = []
        if not cfg.pretrained:
            for fold in infer_.cfg.trn_fold:
                test_text_emb.append(infer_.get_text_embedding(loader, device, fold))
            text_emb = np.mean(text_emb, axis=0)
        else:
            text_emb = infer_.get_text_embedding(loader, device)
        text_embs.append(text_emb)
        del dataset, loader; gc.collect(); torch.cuda.empty_cache();
    del infer_; gc.collect(); torch.cuda.empty_cache();
    return text_embs

def learner_cv(features, learner, folds=15, save=False, verbose=False):
    scores = []
    for fold in range(folds):
        dftr_ = train[train['fold']!=fold]
        dfev_ = train[train['fold']==fold]

        tr_text_feats = features[list(dftr_.index),:]
        ev_text_feats = features[list(dfev_.index),:]
        #print(f'Number of features: {len(tr_text_feats)}')

        # clf = MultiOutputRegressor(SVR(C=2.0))
        clf = MultiOutputRegressor(learner)
        clf.fit(tr_text_feats, dftr_[target_cols].values)
        ev_preds = clf.predict(ev_text_feats)

        score,_ = mc_rmse(dfev_[target_cols].values, ev_preds)
        scores.append(score)

        if verbose:
            print('#'*25)
            print('### Fold',fold+1)
            print("Score: {}".format(score))
        if save:
            dump(clf, f'svr_{fold}.model')

    # print('#'*25)
    # print('Overall CV =', np.mean(scores))
    return np.mean(scores)

def get_learner_score(models_cfg, learner, folds=5, manual_features=None, save=False, verbose=False):
    for i, model_cfg in enumerate(models_cfg):
        model_name = model_cfg.model.split('/')[-1].replace('-', '_')
        models_cfg[i].model_name = model_name
        model_file = f'../input/fb3embeddings/train_text_emb_{model_cfg.file_name}.npy'
        if 'embedding' in model_cfg:
            continue
        with open(model_file, 'rb') as f:
            models_cfg[i].embedding = np.load(f)   
    embeddings = np.concatenate(
        [model_cfg.embedding for model_cfg in models_cfg],
        axis=1)
    if type(manual_features) != type(None):
        embeddings = np.concatenate(
            [embeddings, manual_features],
            axis=1)
    #print(embeddings.shape)
    svr_score = learner_cv(embeddings, learner, folds=folds, save=save, verbose=verbose)
    #print('\n')
    print(f'model_set={[m.model_name for m in models_cfg]};   score={svr_score}')
    return svr_score, models_cfg

# models selection

In [14]:
model_selection = False

pretrained_models_cfg = [
    deberta_large_mnli,
    gpt2,
    roberta_base,
    roberta_large,
    xlnet_base, 
    xlnet_large,
    deberta_base, 
    deberta_large, 
    deberta_xlarge,
    deberta_v2_xlarge, 
    deberta_v2_xxlarge,
    deberta_v3_base, 
    deberta_v3_large,
    
    bart_base,
    bart_large,
    bart_large_mnli,
    bert_base_uncased,
    bert_large_uncased,
    muppet_roberta_large,
    funnel_small,
    funnel_large
]

In [15]:
pretrained_models_cfg_score = []
for cfg in pretrained_models_cfg:
    score, c = get_learner_score([cfg], Ridge(alpha=4.4), folds=4, save=False)
    pretrained_models_cfg_score.append((score, c[0]))

model_set=['deberta_large_mnli'];   score=0.4780494349953554
model_set=['gpt2'];   score=0.6488122378338913
model_set=['roberta_base'];   score=0.5148243161379941
model_set=['roberta_large'];   score=0.5823614424366776
model_set=['xlnet_base_cased'];   score=0.6012977007579187
model_set=['xlnet_large_cased'];   score=0.6198186255238233
model_set=['deberta_base'];   score=0.5119876822876401
model_set=['deberta_large'];   score=0.4738268967405775
model_set=['deberta_xlarge'];   score=0.4828337051123891
model_set=['deberta_v2_xlarge'];   score=0.5163828322242732
model_set=['deberta_v2_xxlarge'];   score=0.5271556434448376
model_set=['deberta_v3_base'];   score=0.47566183177648963
model_set=['deberta_v3_large'];   score=0.47098341043780884
model_set=['facebook_bart_base'];   score=0.47838965248719434
model_set=['facebook_bart_large'];   score=0.4801957967788051
model_set=['facebook_bart_large_mnli'];   score=0.47818931986075836
model_set=['bert_base_uncased'];   score=0.4867411715776929
mo

In [16]:
pretrained_models_cfg_score = sorted(pretrained_models_cfg_score, key=lambda tup:tup[0], reverse=False)
pretrained_models_cfg = [c for score, c in pretrained_models_cfg_score]

In [17]:
if model_selection:
    for i, first_model in enumerate(tqdm(pretrained_models_cfg)):
        features = [first_model]
        prev_score,_ = get_learner_score(features, Ridge(alpha=4.4), folds=4, save=False)
        cur_score = 0
        
        while True:
            models = [feat.model for feat in features]
            if len(models) == len(pretrained_models_cfg):
                break
                
            scores_and_cfgs = [get_learner_score(features + [feat], Ridge(alpha=4.4), folds=4, save=False) for feat in pretrained_models_cfg if feat.model not in models]
            scores = [s for s,c in scores_and_cfgs]
            cur_features = [c for s,c in scores_and_cfgs]
            
            cur_score = np.min(scores)
            cur_best_feature = cur_features[np.argmin(scores)][-1]
            features.append(cur_best_feature)
            
            if prev_score < cur_score:
                break
            prev_score = cur_score

            del scores_and_cfgs, scores, cur_best_feature, cur_features; gc.collect(); torch.cuda.empty_cache();

        #BEST SCORE:0.4483291121589826
        #MODELS SET: ['deberta_large_mnli', 'deberta_v3_large', 'deberta_v3_base', 'roberta_large', 'deberta_v3_large', 'deberta_v2_xlarge', 'deberta_v3_large', 'deberta_v2_xlarge', 'deberta_v3_large', 'deberta_v2_xlarge', 'roberta_large', 'deberta_v3_large', 'deberta_v2_xlarge']
        
        LOGGER.info(f'Interation {i+1}:')
        LOGGER.info(f'model_set={[c.model_name for c in features]} \nbest_score={cur_score}')
        LOGGER.info('#'*50)
        LOGGER.info('\n')

In [18]:
# model_set=['deberta_large_mnli', 'deberta_v3_large', 'facebook_bart_large', 'deberta_v3_base', 'muppet_roberta_large', 'funnel_transformer_small', 'roberta_base', '', 'funnel_transformer_large', 'gpt2', 'deberta_v2_xxlarge', 'roberta_large', 'xlnet_base_cased', 'xlnet_large_cased'] 
# best_score=0.45262457850522697

# fit & save svr

In [19]:
pretrained_models_cfg = [
    deberta_large_mnli,
    roberta_base,
    roberta_large,
    #xlnet_base, 
    #xlnet_large,
    deberta_base, 
    deberta_large, 
    deberta_xlarge,
    deberta_v2_xlarge, 
    deberta_v2_xxlarge,
    deberta_v3_base, 
    deberta_v3_large
]

In [20]:
filtered_model_cfgs = []
for cfg in pretrained_models_cfg:
    score, c = get_learner_score([cfg], Ridge(alpha=4.4), folds=4, save=False)
    if score < 0.5:
        filtered_model_cfgs.append(cfg)
#learner = Ridge(alpha=4.4)
learner = SVR(C=2.0)
#learner = BayesianRidge()
svr_score, models_cfg = get_learner_score(filtered_model_cfgs, learner, folds=svr_folds, save=False, verbose=True)

model_set=['deberta_large_mnli'];   score=0.4780494349953554
model_set=['roberta_base'];   score=0.5148243161379941
model_set=['roberta_large'];   score=0.5823614424366776
model_set=['deberta_base'];   score=0.5119876822876401
model_set=['deberta_large'];   score=0.4738268967405775
model_set=['deberta_xlarge'];   score=0.4828337051123891
model_set=['deberta_v2_xlarge'];   score=0.5163828322242732
model_set=['deberta_v2_xxlarge'];   score=0.5271556434448376
model_set=['deberta_v3_base'];   score=0.47566183177648963
model_set=['deberta_v3_large'];   score=0.47098341043780884
#########################
### Fold 1
Score: 0.4649014278164291
#########################
### Fold 2
Score: 0.44381013713319484
#########################
### Fold 3
Score: 0.45025832075534894
#########################
### Fold 4
Score: 0.45890894464202114
#########################
### Fold 5
Score: 0.44670712003356017
#########################
### Fold 6
Score: 0.45484160250934164
#########################
### Fold 7


In [21]:
from cuml import LinearRegression, LogisticRegression, Ridge, Lasso, ElasticNet, ForestInference
from cuml.naive_bayes import MultinomialNB, BernoulliNB, GaussianNB, CategoricalNB
from cuml.ensemble import RandomForestRegressor

In [22]:
# best_score = np.inf
# best_model = None
# for model in [
#     LinearRegression,
#     LogisticRegression, 
#     Ridge, 
#     Lasso, 
#     ElasticNet, 
#     SVR,
#     MultinomialNB, 
#     BernoulliNB, 
#     GaussianNB, 
#     CategoricalNB,
#     RandomForestRegressor
# ]:
#     svr_score, models_cfg = get_learner_score(pretrained_models_cfg, model(), folds=svr_folds, save=False, verbose=True)
#     if svr_score < best_score:
#         best_score = svr_score 
#         best_model = model
#         print(best_model)

In [23]:
#learner = Ridge(alpha=2.0)
learner = SVR(C=2.0)
#learner = BayesianRidge()
#learner = LinearRegression(positive=True)
svr_score, models_cfg = get_learner_score(pretrained_models_cfg, learner, folds=svr_folds, save=False, verbose=True)

#########################
### Fold 1
Score: 0.4611352297648386
#########################
### Fold 2
Score: 0.4455631938399686
#########################
### Fold 3
Score: 0.44824615108490207
#########################
### Fold 4
Score: 0.45683002067370887
#########################
### Fold 5
Score: 0.4463190051086067
#########################
### Fold 6
Score: 0.4529797722366456
#########################
### Fold 7
Score: 0.4376779134862339
#########################
### Fold 8
Score: 0.43295210081368674
#########################
### Fold 9
Score: 0.45809241686972846
#########################
### Fold 10
Score: 0.4570004911259682
#########################
### Fold 11
Score: 0.44454187373990606
#########################
### Fold 12
Score: 0.44979555053609893
#########################
### Fold 13
Score: 0.4431794291273042
#########################
### Fold 14
Score: 0.45831270686047226
#########################
### Fold 15
Score: 0.456480321861058
model_set=['deberta_large_mnli', 'roberta_b

In [24]:
len(models_cfg)

10

---

In [34]:
pretrained_models_cfg = [
    deberta_large_mnli,
    #gpt2,
    roberta_base,
    roberta_large,
    #xlnet_base, 
    #xlnet_large,
    deberta_base, 
    deberta_large, 
    deberta_xlarge,
    deberta_v2_xlarge, 
    deberta_v2_xxlarge,
    deberta_v3_base, 
    deberta_v3_large,
    
    #bart_base,
    bart_large,
    #bart_lage_mnli,
    #bert_base_uncased,
    bert_large_uncased,
    #muppet_roberta_large,
    funnel_small,
    funnel_large
]
print(len(pretrained_models_cfg))

# learner = Ridge(alpha=2.0)
learner = SVR(C=2.0, tol=0.0001)
svr_score, models_cfg = get_learner_score(pretrained_models_cfg, learner, folds=svr_folds, save=True, verbose=True)

14
#########################
### Fold 1
Score: 0.46187953648724395
#########################
### Fold 2
Score: 0.44586148247944607
#########################
### Fold 3
Score: 0.44858766598142025
#########################
### Fold 4
Score: 0.4559360917770415
#########################
### Fold 5
Score: 0.44542110910704596
#########################
### Fold 6
Score: 0.4521296113672048
#########################
### Fold 7
Score: 0.4391163709721421
#########################
### Fold 8
Score: 0.43110553529886464
#########################
### Fold 9
Score: 0.45439678510384834
#########################
### Fold 10
Score: 0.4575696147121531
#########################
### Fold 11
Score: 0.44354563236007777
#########################
### Fold 12
Score: 0.4502317118302322
#########################
### Fold 13
Score: 0.44421093988159877
#########################
### Fold 14
Score: 0.4567753881450587
#########################
### Fold 15
Score: 0.45616062403519525
model_set=['deberta_large_mnli', 'rob