# Scope

Ensemble experiments for Feedback Learning models. Read in and prepare OOF files.

Uses MultiOutput models for ensembling.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np
import os, sys, glob
from pathlib import Path

Adjust the path to the logs here:

In [2]:
path_logs = Path("./logs")

The competition metric:

In [3]:
# | export
def comp_metric_score(preds, targs):
    """This competition is evaluated using "columnwise root mean squared error". Expects numpy arrays."""
    len_target_cols = targs.shape[1]
    score = [0] * len_target_cols
    for i in range(len_target_cols):
        score[i] = np.sqrt(mean_squared_error(preds[:, i], targs[:, i]))
    return np.mean(score)

# Get OOF data

In [4]:
files = glob.glob(str(path_logs) + "/*.csv")
files_oof = [f for f in files if "oof" in f]
files = [f for f in files if not "oof" in f]

Read all the oof files at once:

In [5]:
oof = pd.concat((pd.read_csv(f).assign(learner_name = f) for f in files_oof))

In [6]:
oof['learner_name'] = oof['learner_name'].str.replace("logs/oof_", "")

oof[['experiment','run_id']] = oof['learner_name'].str.split('_202', expand=True)
oof[['run_id', 'fold']] = oof['run_id'].str.split('_fold_', expand=True)
oof['run_id'] = oof['run_id'].str.slice(0, -2)

oof['run_id'] = "202" + oof['run_id'].str.replace("", "", regex=True)

oof['learner_name'] = oof['learner_name'].str.replace(".csv", "", regex = False)
oof['fold'] = oof['fold'].str.replace(".csv", "", regex = False).astype('int')
oof['model'] = oof['experiment'] + "_" + oof['run_id']

In [7]:
oof.shape

(113419, 18)

In [8]:
oof.head()

Unnamed: 0,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,run_id,grid_id,learner_name,experiment,fold,model
0,2.90625,2.601562,3.029297,2.757812,2.605469,2.761719,3.5,3.5,3.5,3.5,3.0,3.5,2022_11_22_03_26,1,blurr_deberta_v3_small_2022_11_22_03_26_1_fold_1,blurr_deberta_v3_small,1,blurr_deberta_v3_small_2022_11_22_03_26
1,3.8125,3.527344,3.777344,3.582031,3.677734,3.916016,4.0,3.5,3.5,4.0,4.5,4.0,2022_11_22_03_26,1,blurr_deberta_v3_small_2022_11_22_03_26_1_fold_1,blurr_deberta_v3_small,1,blurr_deberta_v3_small_2022_11_22_03_26
2,2.720703,2.384766,2.763672,2.400391,2.267578,2.820312,3.0,2.5,2.0,2.0,2.0,3.0,2022_11_22_03_26,1,blurr_deberta_v3_small_2022_11_22_03_26_1_fold_1,blurr_deberta_v3_small,1,blurr_deberta_v3_small_2022_11_22_03_26
3,3.017578,2.707031,3.136719,2.832031,2.673828,2.923828,4.0,2.5,3.0,3.5,3.0,2.5,2022_11_22_03_26,1,blurr_deberta_v3_small_2022_11_22_03_26_1_fold_1,blurr_deberta_v3_small,1,blurr_deberta_v3_small_2022_11_22_03_26
4,3.359375,3.052734,3.410156,3.134766,3.056641,3.326172,3.5,3.5,3.0,3.5,3.0,3.0,2022_11_22_03_26,1,blurr_deberta_v3_small_2022_11_22_03_26_1_fold_1,blurr_deberta_v3_small,1,blurr_deberta_v3_small_2022_11_22_03_26


### Define ensemble models through `run_id`

In [9]:
ensemble_run_id = ['2022_11_23_16_10', '2022_11_23_16_18', '2022_11_25_20_33', '2022_11_23_19_08',
'2022_11_23_21_13', '2022_11_23_21_16', '2022_11_25_20_40', '2022_11_25_20_54']

### Load the train data folds

In [10]:
train = pd.read_csv("~/kaggle_feedback_ell/data/clean/train_folds.csv")
train.head(2)

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,k_fold
0,976E2E05C4EB,Some businesses only work 4 days in the week m...,4.0,3.0,3.5,3.5,3.0,3.5,4
1,5AA45740A848,"Dear, TEACHER_NAME\n\nI think phone policy at ...",3.5,2.5,3.5,3.0,3.5,2.5,3


Prepare OOF for best models

In [11]:
oof_ensemble = oof[oof.run_id.isin(ensemble_run_id)].drop(['run_id', 'grid_id', 'learner_name', 'experiment'], axis = "columns").reset_index()
oof_ensemble = oof_ensemble.sort_values(['fold', 'index', 'model']).reset_index(drop = True)
oof_ensemble

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,fold,model
0,0,1.700195,1.538086,2.048828,1.802734,1.776367,1.657227,2.0,2.0,2.0,2.5,2.0,2.0,0,bart_large_2022_11_25_20_40
1,0,2.164062,2.009766,2.332031,2.015625,1.941406,2.164062,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_large_2022_11_25_20_33
2,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_base_2022_11_23_16_10
3,0,1.950195,1.750000,2.201172,1.950195,1.857422,1.984375,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_large_2022_11_23_16_18
4,0,1.773438,1.750977,2.152344,1.828125,1.786133,1.911133,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_small_2022_11_23_19_08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31283,782,3.083984,2.990234,3.183594,3.117188,3.052734,2.667969,3.5,3.5,4.0,4.0,3.5,3.0,4,blurr_deberta_v3_large_2022_11_23_16_18
31284,782,3.169922,3.142578,3.123047,3.210938,3.003906,2.792969,3.5,3.5,4.0,4.0,3.5,3.0,4,blurr_deberta_v3_small_2022_11_23_19_08
31285,782,3.248047,3.044922,3.283203,3.177734,2.996094,2.554688,3.5,3.5,4.0,4.0,3.5,3.0,4,blurr_deberta_xlarge_2022_11_25_20_54
31286,782,3.146484,2.947266,3.082031,3.279297,3.046875,2.500000,3.5,3.5,4.0,4.0,3.5,3.0,4,blurr_roberta_large_2022_11_23_21_13


In [12]:
oof_ensemble[oof_ensemble.model == "blurr_deberta_v3_base_2022_11_23_16_10"]

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,fold,model
2,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_base_2022_11_23_16_10
10,1,2.925781,2.685547,3.074219,2.960938,2.923828,2.882812,3.0,3.5,3.0,3.0,4.0,3.5,0,blurr_deberta_v3_base_2022_11_23_16_10
18,2,3.726562,3.644531,3.869141,3.925781,3.929688,3.835938,3.5,3.5,4.5,4.0,4.0,4.5,0,blurr_deberta_v3_base_2022_11_23_16_10
26,3,3.478516,3.207031,3.427734,3.345703,2.994141,3.228516,3.5,3.5,3.0,2.5,2.5,3.0,0,blurr_deberta_v3_base_2022_11_23_16_10
34,4,3.318359,3.027344,3.302734,3.158203,2.968750,3.304688,2.5,2.5,3.0,3.0,3.0,2.5,0,blurr_deberta_v3_base_2022_11_23_16_10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31250,778,3.332031,3.216797,3.369141,3.414062,3.146484,3.263672,2.5,3.5,3.0,3.0,2.5,3.0,4,blurr_deberta_v3_base_2022_11_23_16_10
31258,779,3.431641,3.326172,3.429688,3.480469,3.238281,3.431641,4.0,3.5,3.5,3.5,3.0,3.0,4,blurr_deberta_v3_base_2022_11_23_16_10
31266,780,3.330078,3.212891,3.376953,3.447266,3.158203,3.259766,3.5,3.5,4.0,3.0,2.5,3.5,4,blurr_deberta_v3_base_2022_11_23_16_10
31274,781,3.199219,3.109375,3.279297,3.343750,3.205078,3.185547,3.5,3.0,3.0,3.5,3.5,3.0,4,blurr_deberta_v3_base_2022_11_23_16_10


Scoring example for single oof & fold - also sanity check as QA for building `oof_ensemble_train`:

In [13]:
check = oof_ensemble[(oof_ensemble.model == "blurr_deberta_v3_base_2022_11_23_16_10") & (oof_ensemble.fold == 0)].reset_index(drop = True)
check_preds = check.loc[:, [col for col in check if col.startswith("pred_")]]
check_targs = check.loc[:, [col for col in check if col.startswith("targ_")]]
comp_metric_score(np.array(check_preds), np.array(check_targs))

0.47893095671523817

In [14]:
check_preds

Unnamed: 0,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions
0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352
1,2.925781,2.685547,3.074219,2.960938,2.923828,2.882812
2,3.726562,3.644531,3.869141,3.925781,3.929688,3.835938
3,3.478516,3.207031,3.427734,3.345703,2.994141,3.228516
4,3.318359,3.027344,3.302734,3.158203,2.968750,3.304688
...,...,...,...,...,...,...
777,3.980469,3.750000,3.927734,3.890625,3.523438,3.863281
778,3.041016,2.650391,3.025391,2.722656,2.410156,2.951172
779,3.134766,3.001953,3.220703,3.187500,3.263672,3.283203
780,3.667969,3.515625,3.640625,3.679688,3.580078,3.628906


In [15]:
np.sqrt(mean_squared_error(check_preds.iloc[:, 0], check_targs.iloc[:, 0]))

0.5131803667982215

Preparing the restructure:

In [16]:
oof_cols = oof_ensemble.model.unique()
oof_cols

array(['bart_large_2022_11_25_20_40',
       'blurr_deberta_large_2022_11_25_20_33',
       'blurr_deberta_v3_base_2022_11_23_16_10',
       'blurr_deberta_v3_large_2022_11_23_16_18',
       'blurr_deberta_v3_small_2022_11_23_19_08',
       'blurr_deberta_xlarge_2022_11_25_20_54',
       'blurr_roberta_large_2022_11_23_21_13',
       'xlnet_large_cased_2022_11_23_21_16'], dtype=object)

In [17]:
oof_ensemble.head(5)

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,fold,model
0,0,1.700195,1.538086,2.048828,1.802734,1.776367,1.657227,2.0,2.0,2.0,2.5,2.0,2.0,0,bart_large_2022_11_25_20_40
1,0,2.164062,2.009766,2.332031,2.015625,1.941406,2.164062,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_large_2022_11_25_20_33
2,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_base_2022_11_23_16_10
3,0,1.950195,1.75,2.201172,1.950195,1.857422,1.984375,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_large_2022_11_23_16_18
4,0,1.773438,1.750977,2.152344,1.828125,1.786133,1.911133,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_small_2022_11_23_19_08


The target columns

In [18]:
targ_cols = list(oof_ensemble.columns[oof_ensemble.columns.str.startswith('targ_')])
targ_cols

['targ_cohesion',
 'targ_syntax',
 'targ_vocabulary',
 'targ_phraseology',
 'targ_grammar',
 'targ_conventions']

Pivot to wider: we'll now have each model's predictions as a df column:

In [19]:
foo = oof_ensemble.loc[:, ~oof_ensemble.columns.str.startswith('targ_')]
foo = foo.pivot(index = ["index", "fold"], columns="model").reset_index().reset_index(drop = True)
foo.columns = ['-'.join(col).strip() for col in foo.columns.values]
foo = foo.rename_axis(None, axis=1).rename(columns = {'fold-': 'fold', 'index-': 'index'})

In [20]:
foo

Unnamed: 0,index,fold,pred_cohesion-bart_large_2022_11_25_20_40,pred_cohesion-blurr_deberta_large_2022_11_25_20_33,pred_cohesion-blurr_deberta_v3_base_2022_11_23_16_10,pred_cohesion-blurr_deberta_v3_large_2022_11_23_16_18,pred_cohesion-blurr_deberta_v3_small_2022_11_23_19_08,pred_cohesion-blurr_deberta_xlarge_2022_11_25_20_54,pred_cohesion-blurr_roberta_large_2022_11_23_21_13,pred_cohesion-xlnet_large_cased_2022_11_23_21_16,...,pred_grammar-blurr_roberta_large_2022_11_23_21_13,pred_grammar-xlnet_large_cased_2022_11_23_21_16,pred_conventions-bart_large_2022_11_25_20_40,pred_conventions-blurr_deberta_large_2022_11_25_20_33,pred_conventions-blurr_deberta_v3_base_2022_11_23_16_10,pred_conventions-blurr_deberta_v3_large_2022_11_23_16_18,pred_conventions-blurr_deberta_v3_small_2022_11_23_19_08,pred_conventions-blurr_deberta_xlarge_2022_11_25_20_54,pred_conventions-blurr_roberta_large_2022_11_23_21_13,pred_conventions-xlnet_large_cased_2022_11_23_21_16
0,0,0,1.700195,2.164062,1.724609,1.950195,1.773438,2.570312,1.735352,1.591797,...,1.788086,1.742188,1.657227,2.164062,1.735352,1.984375,1.911133,2.439453,1.679688,1.798828
1,0,1,3.001953,3.177734,2.890625,3.208984,2.855469,3.503906,3.304688,3.804688,...,3.554688,3.312500,2.791016,2.910156,2.726562,2.925781,2.730469,2.871094,2.974609,2.935547
2,0,2,3.101562,3.091797,3.173828,3.289062,3.121094,3.156250,3.185547,3.052734,...,3.269531,3.505859,3.222656,2.892578,3.091797,3.125000,2.976562,3.042969,3.210938,3.287109
3,0,3,2.505859,3.171875,2.767578,3.070312,2.984375,2.802734,2.718750,2.818359,...,3.419922,3.529297,3.140625,2.986328,2.980469,3.308594,2.951172,2.710938,2.763672,3.289062
4,0,4,2.714844,2.386719,2.507812,2.603516,2.490234,2.626953,2.894531,2.707031,...,2.275391,2.275391,2.214844,2.087891,2.357422,2.304688,2.230469,2.035156,2.105469,2.123047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3906,781,1,2.257812,2.371094,2.417969,2.429688,2.525391,2.423828,2.509766,2.357422,...,2.152344,2.212891,2.521484,2.384766,2.474609,2.658203,2.595703,2.470703,2.453125,2.478516
3907,781,2,4.027344,3.832031,3.472656,3.523438,3.521484,3.578125,3.654297,3.583984,...,3.589844,3.322266,3.425781,3.316406,3.369141,3.376953,3.357422,3.298828,3.421875,3.273438
3908,781,3,2.640625,2.742188,2.966797,3.082031,3.140625,3.115234,2.960938,2.886719,...,2.859375,3.052734,3.181641,3.291016,2.982422,3.273438,3.056641,3.228516,3.109375,3.382812
3909,781,4,3.119141,3.066406,3.199219,3.164062,3.138672,3.330078,2.826172,3.136719,...,3.115234,3.308594,3.083984,3.054688,3.185547,3.064453,3.042969,3.304688,2.808594,2.974609


Sanity checks:

In [21]:
foobar = oof_ensemble.loc[oof_ensemble.model == "xlnet_large_cased_2022_11_23_21_16", ~oof_ensemble.columns.str.startswith('targ_')].reset_index(drop = True)
foobar[foobar.fold == 0]

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,fold,model
0,0,1.591797,1.541992,2.080078,1.727539,1.742188,1.798828,0,xlnet_large_cased_2022_11_23_21_16
1,1,3.048828,2.986328,3.289062,3.347656,3.376953,3.392578,0,xlnet_large_cased_2022_11_23_21_16
2,2,3.587891,3.562500,3.824219,3.880859,3.779297,3.992188,0,xlnet_large_cased_2022_11_23_21_16
3,3,3.771484,3.275391,3.546875,3.396484,3.072266,3.359375,0,xlnet_large_cased_2022_11_23_21_16
4,4,3.410156,3.056641,3.316406,3.042969,2.966797,3.400391,0,xlnet_large_cased_2022_11_23_21_16
...,...,...,...,...,...,...,...,...,...
777,777,3.974609,3.646484,3.986328,3.769531,3.570312,4.085938,0,xlnet_large_cased_2022_11_23_21_16
778,778,3.263672,3.052734,3.412109,3.132812,2.925781,3.496094,0,xlnet_large_cased_2022_11_23_21_16
779,779,3.111328,3.177734,3.396484,3.277344,3.486328,3.765625,0,xlnet_large_cased_2022_11_23_21_16
780,780,3.695312,3.488281,3.562500,3.603516,3.699219,3.658203,0,xlnet_large_cased_2022_11_23_21_16


In [22]:
foobar = oof_ensemble.loc[oof_ensemble.model == "blurr_deberta_v3_base_2022_11_23_16_10", ~oof_ensemble.columns.str.startswith('targ_')].reset_index(drop = True)
foobar[foobar.fold == 0]

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,fold,model
0,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,0,blurr_deberta_v3_base_2022_11_23_16_10
1,1,2.925781,2.685547,3.074219,2.960938,2.923828,2.882812,0,blurr_deberta_v3_base_2022_11_23_16_10
2,2,3.726562,3.644531,3.869141,3.925781,3.929688,3.835938,0,blurr_deberta_v3_base_2022_11_23_16_10
3,3,3.478516,3.207031,3.427734,3.345703,2.994141,3.228516,0,blurr_deberta_v3_base_2022_11_23_16_10
4,4,3.318359,3.027344,3.302734,3.158203,2.968750,3.304688,0,blurr_deberta_v3_base_2022_11_23_16_10
...,...,...,...,...,...,...,...,...,...
777,777,3.980469,3.750000,3.927734,3.890625,3.523438,3.863281,0,blurr_deberta_v3_base_2022_11_23_16_10
778,778,3.041016,2.650391,3.025391,2.722656,2.410156,2.951172,0,blurr_deberta_v3_base_2022_11_23_16_10
779,779,3.134766,3.001953,3.220703,3.187500,3.263672,3.283203,0,blurr_deberta_v3_base_2022_11_23_16_10
780,780,3.667969,3.515625,3.640625,3.679688,3.580078,3.628906,0,blurr_deberta_v3_base_2022_11_23_16_10


In [23]:
check

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,fold,model
0,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_base_2022_11_23_16_10
1,1,2.925781,2.685547,3.074219,2.960938,2.923828,2.882812,3.0,3.5,3.0,3.0,4.0,3.5,0,blurr_deberta_v3_base_2022_11_23_16_10
2,2,3.726562,3.644531,3.869141,3.925781,3.929688,3.835938,3.5,3.5,4.5,4.0,4.0,4.5,0,blurr_deberta_v3_base_2022_11_23_16_10
3,3,3.478516,3.207031,3.427734,3.345703,2.994141,3.228516,3.5,3.5,3.0,2.5,2.5,3.0,0,blurr_deberta_v3_base_2022_11_23_16_10
4,4,3.318359,3.027344,3.302734,3.158203,2.968750,3.304688,2.5,2.5,3.0,3.0,3.0,2.5,0,blurr_deberta_v3_base_2022_11_23_16_10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,777,3.980469,3.750000,3.927734,3.890625,3.523438,3.863281,4.0,4.5,4.5,4.0,4.5,4.5,0,blurr_deberta_v3_base_2022_11_23_16_10
778,778,3.041016,2.650391,3.025391,2.722656,2.410156,2.951172,2.5,2.5,2.5,3.0,2.5,3.5,0,blurr_deberta_v3_base_2022_11_23_16_10
779,779,3.134766,3.001953,3.220703,3.187500,3.263672,3.283203,3.0,3.0,3.0,2.5,3.0,4.0,0,blurr_deberta_v3_base_2022_11_23_16_10
780,780,3.667969,3.515625,3.640625,3.679688,3.580078,3.628906,3.5,3.0,3.5,3.5,4.0,3.0,0,blurr_deberta_v3_base_2022_11_23_16_10


The targets to join back onto:

In [24]:
bar = oof_ensemble.loc[oof_ensemble.model == oof_ensemble.model[0], ['fold', 'index'] + targ_cols].reset_index(drop = True) # drop(columns = ['fold', 'index'])
bar


Unnamed: 0,fold,index,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions
0,0,0,2.0,2.0,2.0,2.5,2.0,2.0
1,0,1,3.0,3.5,3.0,3.0,4.0,3.5
2,0,2,3.5,3.5,4.5,4.0,4.0,4.5
3,0,3,3.5,3.5,3.0,2.5,2.5,3.0
4,0,4,2.5,2.5,3.0,3.0,3.0,2.5
...,...,...,...,...,...,...,...,...
3906,4,778,2.5,3.5,3.0,3.0,2.5,3.0
3907,4,779,4.0,3.5,3.5,3.5,3.0,3.0
3908,4,780,3.5,3.5,4.0,3.0,2.5,3.5
3909,4,781,3.5,3.0,3.0,3.5,3.5,3.0


Sanity checks:

In [25]:
bar[bar.fold == 0]

Unnamed: 0,fold,index,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions
0,0,0,2.0,2.0,2.0,2.5,2.0,2.0
1,0,1,3.0,3.5,3.0,3.0,4.0,3.5
2,0,2,3.5,3.5,4.5,4.0,4.0,4.5
3,0,3,3.5,3.5,3.0,2.5,2.5,3.0
4,0,4,2.5,2.5,3.0,3.0,3.0,2.5
...,...,...,...,...,...,...,...,...
777,0,777,4.0,4.5,4.5,4.0,4.5,4.5
778,0,778,2.5,2.5,2.5,3.0,2.5,3.5
779,0,779,3.0,3.0,3.0,2.5,3.0,4.0
780,0,780,3.5,3.0,3.5,3.5,4.0,3.0


In [26]:
check

Unnamed: 0,index,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions,fold,model
0,0,1.724609,1.588867,2.007812,1.796875,1.785156,1.735352,2.0,2.0,2.0,2.5,2.0,2.0,0,blurr_deberta_v3_base_2022_11_23_16_10
1,1,2.925781,2.685547,3.074219,2.960938,2.923828,2.882812,3.0,3.5,3.0,3.0,4.0,3.5,0,blurr_deberta_v3_base_2022_11_23_16_10
2,2,3.726562,3.644531,3.869141,3.925781,3.929688,3.835938,3.5,3.5,4.5,4.0,4.0,4.5,0,blurr_deberta_v3_base_2022_11_23_16_10
3,3,3.478516,3.207031,3.427734,3.345703,2.994141,3.228516,3.5,3.5,3.0,2.5,2.5,3.0,0,blurr_deberta_v3_base_2022_11_23_16_10
4,4,3.318359,3.027344,3.302734,3.158203,2.968750,3.304688,2.5,2.5,3.0,3.0,3.0,2.5,0,blurr_deberta_v3_base_2022_11_23_16_10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,777,3.980469,3.750000,3.927734,3.890625,3.523438,3.863281,4.0,4.5,4.5,4.0,4.5,4.5,0,blurr_deberta_v3_base_2022_11_23_16_10
778,778,3.041016,2.650391,3.025391,2.722656,2.410156,2.951172,2.5,2.5,2.5,3.0,2.5,3.5,0,blurr_deberta_v3_base_2022_11_23_16_10
779,779,3.134766,3.001953,3.220703,3.187500,3.263672,3.283203,3.0,3.0,3.0,2.5,3.0,4.0,0,blurr_deberta_v3_base_2022_11_23_16_10
780,780,3.667969,3.515625,3.640625,3.679688,3.580078,3.628906,3.5,3.0,3.5,3.5,4.0,3.0,0,blurr_deberta_v3_base_2022_11_23_16_10


In [27]:
oof_ensemble_train = foo.merge(bar, how = "left", on = ['fold', 'index'])

Everything is together in this combined df now:

In [28]:
oof_ensemble_train

Unnamed: 0,index,fold,pred_cohesion-bart_large_2022_11_25_20_40,pred_cohesion-blurr_deberta_large_2022_11_25_20_33,pred_cohesion-blurr_deberta_v3_base_2022_11_23_16_10,pred_cohesion-blurr_deberta_v3_large_2022_11_23_16_18,pred_cohesion-blurr_deberta_v3_small_2022_11_23_19_08,pred_cohesion-blurr_deberta_xlarge_2022_11_25_20_54,pred_cohesion-blurr_roberta_large_2022_11_23_21_13,pred_cohesion-xlnet_large_cased_2022_11_23_21_16,...,pred_conventions-blurr_deberta_v3_small_2022_11_23_19_08,pred_conventions-blurr_deberta_xlarge_2022_11_25_20_54,pred_conventions-blurr_roberta_large_2022_11_23_21_13,pred_conventions-xlnet_large_cased_2022_11_23_21_16,targ_cohesion,targ_syntax,targ_vocabulary,targ_phraseology,targ_grammar,targ_conventions
0,0,0,1.700195,2.164062,1.724609,1.950195,1.773438,2.570312,1.735352,1.591797,...,1.911133,2.439453,1.679688,1.798828,2.0,2.0,2.0,2.5,2.0,2.0
1,0,1,3.001953,3.177734,2.890625,3.208984,2.855469,3.503906,3.304688,3.804688,...,2.730469,2.871094,2.974609,2.935547,3.5,3.5,3.5,3.5,3.0,3.5
2,0,2,3.101562,3.091797,3.173828,3.289062,3.121094,3.156250,3.185547,3.052734,...,2.976562,3.042969,3.210938,3.287109,3.0,3.0,3.5,3.0,3.5,3.5
3,0,3,2.505859,3.171875,2.767578,3.070312,2.984375,2.802734,2.718750,2.818359,...,2.951172,2.710938,2.763672,3.289062,3.0,2.0,3.0,3.5,3.0,3.0
4,0,4,2.714844,2.386719,2.507812,2.603516,2.490234,2.626953,2.894531,2.707031,...,2.230469,2.035156,2.105469,2.123047,3.0,2.5,2.5,2.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3906,781,1,2.257812,2.371094,2.417969,2.429688,2.525391,2.423828,2.509766,2.357422,...,2.595703,2.470703,2.453125,2.478516,2.5,2.0,2.5,2.0,2.0,2.5
3907,781,2,4.027344,3.832031,3.472656,3.523438,3.521484,3.578125,3.654297,3.583984,...,3.357422,3.298828,3.421875,3.273438,3.0,3.5,3.5,3.0,3.5,3.0
3908,781,3,2.640625,2.742188,2.966797,3.082031,3.140625,3.115234,2.960938,2.886719,...,3.056641,3.228516,3.109375,3.382812,2.5,2.5,2.5,2.0,2.0,2.0
3909,781,4,3.119141,3.066406,3.199219,3.164062,3.138672,3.330078,2.826172,3.136719,...,3.042969,3.304688,2.808594,2.974609,3.5,3.0,3.0,3.5,3.5,3.0


Final sanity check:

In [29]:
np.sqrt(mean_squared_error(
    oof_ensemble_train.loc[oof_ensemble_train.fold == 0, "pred_cohesion-blurr_deberta_v3_base_2022_11_23_16_10"],
    oof_ensemble_train.loc[oof_ensemble_train.fold == 0, "targ_cohesion"]
    ))

0.5131803667982215

In [30]:
np.sqrt(mean_squared_error(check_preds.iloc[:, 0], check_targs.iloc[:, 0]))

0.5131803667982215

In [31]:
oof_ensemble_train.to_csv("oof_ensemble_train.csv", index = False)

# Models

In [32]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

from sklearn.multioutput import MultiOutputRegressor

import lightgbm as lgb

from joblib import dump, load

### Simple example with multi-output regressor:

In [33]:
train_oof = oof_ensemble_train[oof_ensemble_train.fold != 0]
valid_oof = oof_ensemble_train[oof_ensemble_train.fold == 0]

In [34]:
X = train_oof.drop(columns = ['fold'] + targ_cols).values
y = train_oof.loc[:, targ_cols].values

In [35]:
X.shape, y.shape

((3129, 49), (3129, 6))

In [36]:
mod = MultiOutputRegressor(Ridge(random_state=4321))
mod.fit(X, y)

MultiOutputRegressor(estimator=Ridge(random_state=4321))

In [37]:
y_pred = mod.predict(X)

In [38]:
y_pred.shape

(3129, 6)

In [39]:
comp_metric_score(y, y_pred)

0.44352825852967975

In a loop:

In [62]:
oof_pred = pd.DataFrame()
oof_scores = []

for i in range(5):
    train_oof = oof_ensemble_train[oof_ensemble_train.fold != i]
    valid_oof = oof_ensemble_train[oof_ensemble_train.fold == i]

    X = train_oof.drop(columns = ['fold'] + targ_cols).values
    y = train_oof.loc[:, targ_cols].values
    
    X_valid = valid_oof.drop(columns = ['fold'] + targ_cols).values
    y_valid = valid_oof.loc[:, targ_cols].values

    mod = MultiOutputRegressor(Ridge(random_state=4321))
    mod.fit(X, y)

    dump(mod, f'mod_ridge_v1_fold_{i}.joblib')    

    y_pred = mod.predict(X_valid)
    oof_scores.append(comp_metric_score(y_valid, y_pred))
    

np.mean(oof_scores), oof_scores

(0.4582612859123323,
 [0.4702346843655463,
  0.4547056870643935,
  0.44685136146328147,
  0.4635779963463332,
  0.45593670032210704])

That's a start. With Ridge Regression only.

### LightGBM

Using default params to demonstrate passing of parameters:

In [63]:
params = {'n_estimators': 100, 'learning_rate': 0.1}

In [64]:
oof_pred = pd.DataFrame()
oof_scores = []

for i in range(5):
    train_oof = oof_ensemble_train[oof_ensemble_train.fold != i]
    valid_oof = oof_ensemble_train[oof_ensemble_train.fold == i]

    X = train_oof.drop(columns = ['fold'] + targ_cols).values
    y = train_oof.loc[:, targ_cols].values

    X_valid = valid_oof.drop(columns = ['fold'] + targ_cols).values
    y_valid = valid_oof.loc[:, targ_cols].values

    mod = MultiOutputRegressor(lgb.LGBMRegressor(random_state=4321, **params))
    mod.fit(X, y)
    
    dump(mod, f'mod_lightgbm_v1_fold_{i}.joblib')

    y_pred = mod.predict(X_valid)
    oof_scores.append(comp_metric_score(y_valid, y_pred))
    

np.mean(oof_scores), oof_scores

(0.47194744454122783,
 [0.4819029230648639,
  0.4715829305227279,
  0.4614212170389221,
  0.47345244943691006,
  0.4713777026427153])

Alright, we get reasonable results. Could do some tuning from here.