In [1]:
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
train_df = pd.read_csv("../../input/feedback-prize-english-language-learning/train.csv")
train_df.head(2)

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5


In [3]:
model_path_list = [
    ('../../14_Baseline4/exp/result/14_v1_01/oof_df.csv',0.2), # deberta-v3-base
    ('../../14_Baseline4/exp/result/14_v1_07/oof_df.csv',0.2), # deberta-v3-base, 10folds
    ('../../14_Baseline4/exp/result/14_v1_08/oof_df.csv',0.1), # deberta-base
    ('../../14_Baseline4/exp/result/14_v1_09/oof_df.csv',0.1), # roberta-base
    ('../../14_Baseline4/exp/result/14_v1_10/oof_df.csv',0.1), # deberta-v3-large
    ('../../14_Baseline4/exp/result/14_v1_11/oof_df.csv',0.1), # xlm-roberta-base
    ('../../14_Baseline4/exp/result/14_v1_12/oof_df.csv',0.2), # deberta-large
]

oof_df_list = [
    pd.read_csv(model_path) for model_path,_ in model_path_list
]
weights_list = [
    w for _,w in model_path_list
]
weights_list = [w/sum(weights_list) for w in weights_list]

for oof_df in oof_df_list:
    oof_df = train_df[['text_id']].merge(oof_df, how='left', on='text_id')
    
num_models = len(model_path_list)

TARGET_COLS = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']

preds = 0
for oof_df, w in zip(oof_df_list, weights_list):
    preds += oof_df[TARGET_COLS].values * w

oofs = []
for oof_df in oof_df_list:
    oofs.append(oof_df[TARGET_COLS].values)

In [4]:
oof_df = oof_df_list[0].copy()
for i,col in enumerate(TARGET_COLS):
    oof_df[col] = preds[:,i]

In [5]:
import numpy as np

def calc_metric(pred, gt):
    '''
    pred : (num_data, num_labels)
    gt : (num_data, num_labels)
    '''
    score = np.sqrt(np.mean((pred - gt)**2, axis=0))
    score = score.mean()
    return score

In [6]:
score = calc_metric(pred=oof_df[TARGET_COLS].values, gt=train_df[TARGET_COLS].values)
print('CV={:.4f}'.format(score))

CV=0.4441


In [None]:
ensemble_predictions=np.stack(oofs)

In [None]:
def ensemble_score(weights,return_pred=False):
    weights=np.array(weights)
    weights=weights.reshape(-1,1,1)/weights.sum()
    p=weights.reshape(-1,1,1)*ensemble_predictions
    p=p.sum(0)
    score=calc_metric(p, train_df[TARGET_COLS])
    if return_pred:
        return score,p
    else:
        return score   

In [None]:
from skopt import gp_minimize

results=gp_minimize(ensemble_score, np.array([[0.1,1] for i in range(len(ensemble_predictions))]),
                    verbose=True,random_state=2022)

In [None]:
best_weights=np.array(results['x'])/sum(results['x'])
print(best_weights)

In [None]:
# smaller is better for this metric
score,ensemble_pred=ensemble_score(best_weights,True)
score

In [None]:
print('CV={:.4f}'.format(score))

# Error Analysis - Check Corr

In [7]:
cols = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
train_df[cols].corr()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
cohesion,1.0,0.695459,0.666151,0.690058,0.638689,0.666151
syntax,0.695459,1.0,0.680562,0.725467,0.709525,0.700025
vocabulary,0.666151,0.680562,1.0,0.735261,0.654852,0.664292
phraseology,0.690058,0.725467,0.735261,1.0,0.719746,0.666842
grammar,0.638689,0.709525,0.654852,0.719746,1.0,0.673301
conventions,0.666151,0.700025,0.664292,0.666842,0.673301,1.0


In [8]:
cols = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
oof_df[cols].corr()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
cohesion,1.0,0.941221,0.909484,0.903134,0.822716,0.881044
syntax,0.941221,1.0,0.933178,0.955576,0.916389,0.923403
vocabulary,0.909484,0.933178,1.0,0.952704,0.873637,0.887465
phraseology,0.903134,0.955576,0.952704,1.0,0.944964,0.876956
grammar,0.822716,0.916389,0.873637,0.944964,1.0,0.853665
conventions,0.881044,0.923403,0.887465,0.876956,0.853665,1.0
