In [1]:
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
train_df = pd.read_csv("../../input/feedback-prize-english-language-learning/train.csv")
train_df.head(2)

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5


In [3]:
model_path_list = [
    '../../14_Baseline4/exp/result/14_v1_01/oof_df.csv', # deberta-v3-base
    '../../14_Baseline4/exp/result/14_v1_04/oof_df.csv', # deberta-v3-large
]

oof_df_list = [
    pd.read_csv(model_path) for model_path in model_path_list
]

for oof_df in oof_df_list:
    oof_df = train_df[['text_id']].merge(oof_df, how='left', on='text_id')
    
num_models = len(model_path_list)

TARGET_COLS = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']

preds = 0
for oof_df in oof_df_list:
    preds += oof_df[TARGET_COLS].values / num_models

oofs = []
for oof_df in oof_df_list:
    oofs.append(oof_df[TARGET_COLS].values)

In [4]:
oof_df = oof_df_list[0].copy()
for i,col in enumerate(TARGET_COLS):
    oof_df[col] = preds[:,i]

In [5]:
import numpy as np

def calc_metric(pred, gt):
    '''
    pred : (num_data, num_labels)
    gt : (num_data, num_labels)
    '''
    score = np.sqrt(np.mean((pred - gt)**2, axis=0))
    score = score.mean()
    return score

In [6]:
score = calc_metric(pred=oof_df[TARGET_COLS].values, gt=train_df[TARGET_COLS].values)
print('CV={:.4f}'.format(score))

CV=0.4464


In [7]:
ensemble_predictions=np.stack(oofs)

In [8]:
def ensemble_score(weights,return_pred=False):
    weights=np.array(weights)
    weights=weights.reshape(-1,1,1)/weights.sum()
    p=weights.reshape(-1,1,1)*ensemble_predictions
    p=p.sum(0)
    score=calc_metric(p, train_df[TARGET_COLS])
    if return_pred:
        return score,p
    else:
        return score   

In [9]:
from skopt import gp_minimize

results=gp_minimize(ensemble_score, np.array([[0.1,1] for i in range(len(ensemble_predictions))]),
                    verbose=True,random_state=2022)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0035
Function value obtained: 0.4468
Current minimum: 0.4468
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.0018
Function value obtained: 0.4465
Current minimum: 0.4465
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0017
Function value obtained: 0.4464
Current minimum: 0.4464
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0016
Function value obtained: 0.4468
Current minimum: 0.4464
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0016
Function value obtained: 0.4466
Current minimum: 0.4464
Iteration No: 6 started. Evaluating

Iteration No: 41 ended. Search finished for the next optimal point.
Time taken: 0.4769
Function value obtained: 0.4473
Current minimum: 0.4464
Iteration No: 42 started. Searching for the next optimal point.
Iteration No: 42 ended. Search finished for the next optimal point.
Time taken: 0.4586
Function value obtained: 0.4465
Current minimum: 0.4464
Iteration No: 43 started. Searching for the next optimal point.
Iteration No: 43 ended. Search finished for the next optimal point.
Time taken: 0.4486
Function value obtained: 0.4475
Current minimum: 0.4464
Iteration No: 44 started. Searching for the next optimal point.
Iteration No: 44 ended. Search finished for the next optimal point.
Time taken: 0.4432
Function value obtained: 0.4464
Current minimum: 0.4464
Iteration No: 45 started. Searching for the next optimal point.
Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 0.4648
Function value obtained: 0.4468
Current minimum: 0.4464
Iteration No: 46 started. Sea



Iteration No: 63 ended. Search finished for the next optimal point.
Time taken: 0.5675
Function value obtained: 0.4464
Current minimum: 0.4464
Iteration No: 64 started. Searching for the next optimal point.
Iteration No: 64 ended. Search finished for the next optimal point.
Time taken: 0.7006
Function value obtained: 0.4464
Current minimum: 0.4464
Iteration No: 65 started. Searching for the next optimal point.
Iteration No: 65 ended. Search finished for the next optimal point.
Time taken: 0.5687
Function value obtained: 0.4464
Current minimum: 0.4464
Iteration No: 66 started. Searching for the next optimal point.
Iteration No: 66 ended. Search finished for the next optimal point.
Time taken: 0.6330
Function value obtained: 0.4466
Current minimum: 0.4464
Iteration No: 67 started. Searching for the next optimal point.
Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 0.6208
Function value obtained: 0.4471
Current minimum: 0.4464
Iteration No: 68 started. Sea

In [10]:
best_weights=np.array(results['x'])/sum(results['x'])
print(best_weights)

[0.48183549 0.51816451]


In [11]:
# smaller is better for this metric
score,ensemble_pred=ensemble_score(best_weights,True)
score

0.44641308363624127

In [12]:
print('CV={:.4f}'.format(score))

CV=0.4464


# Error Analysis - Check Corr

In [13]:
cols = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
train_df[cols].corr()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
cohesion,1.0,0.695459,0.666151,0.690058,0.638689,0.666151
syntax,0.695459,1.0,0.680562,0.725467,0.709525,0.700025
vocabulary,0.666151,0.680562,1.0,0.735261,0.654852,0.664292
phraseology,0.690058,0.725467,0.735261,1.0,0.719746,0.666842
grammar,0.638689,0.709525,0.654852,0.719746,1.0,0.673301
conventions,0.666151,0.700025,0.664292,0.666842,0.673301,1.0


In [14]:
cols = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
oof_df[cols].corr()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
cohesion,1.0,0.935795,0.897036,0.891201,0.810074,0.858698
syntax,0.935795,1.0,0.918322,0.947042,0.902815,0.90818
vocabulary,0.897036,0.918322,1.0,0.943039,0.859324,0.869988
phraseology,0.891201,0.947042,0.943039,1.0,0.936392,0.863915
grammar,0.810074,0.902815,0.859324,0.936392,1.0,0.83142
conventions,0.858698,0.90818,0.869988,0.863915,0.83142,1.0
