In [None]:
import numpy as np
import pandas as pd
import subprocess
import joblib

from time import time
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.svm import SVR
import matplotlib.pyplot as plt

submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

In [None]:
trust  = 3 # TrustDiV
option = 4
DEBUG = True  # counterintuitive

EIDs = [
    [36, 37, 34, 32, 30, '26ish', 24, 18, 10, 7],                 # Trust CV, C=0.59
    [7, 10, 14, 18, 23, 24, '26ish', 36, 37],                     # Trust LB, C=0.59
    [10, 14, 16, 18, 19, 20, 23, '26ish', 27, 30, 31, 32, 34],    # Trust Diversity, C=0.59
    
    # Model Zoo = Diversity + LB + CV Models
    [7, 10, 14, 16, 18, 19, 20, 23, 24, '26ish', 27, 30, 31, 32, 34, 36, 37],
][trust]
ModelName = ['TrustCV', 'TrustLB', 'TrustDiv', 'TrustZoo'][trust]
EnsembleType = "ABCDM"[option]
DEBUG = DEBUG and submission_df.shape[0] == 7
        
meta_features = [
    f'oof{eid}' for eid in EIDs
]
print(f'{ModelName}/{EnsembleType}', 'Debug' if DEBUG else '')

In [None]:
EIDs

In [None]:
meta_features

In [None]:
# Wuh??
!cp ../input/d/authman/commonlit-ensemble/* ./
ensemble = joblib.load(f'./{ModelName}.jlib')
scripts = [
    f'./EXP{eid}_submission.py'
    for eid in EIDs
]
scripts

In [None]:
ensemble

In [None]:
for script in scripts:
    if DEBUG: continue
    t = time()
    !python {script}
    print(f'Finished {script}, {(time()-t)/60:3} mins')

In [None]:
if not DEBUG:
    subs = [
        pd.read_csv(f'./EXP{eid}_submission.csv').rename(columns={'target':f'oof{eid}'})
        for eid in EIDs
    ]

In [None]:
train_df = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
outputs = submission_df[['id']].copy()

if not DEBUG:

    for i, eid in enumerate(EIDs):
        exp_df = subs[i]
        columns = ['id', f'oof{eid}']
        #exp_df[f'oof{eid}'] = exp_df[f'oof{eid}'] - exp_df[f'oof{eid}'].mean() + train_df.target.mean()
        #exp_df[f'oof{eid}'] = exp_df[f'oof{eid}'] - ensemble[eid] + train_df.target.mean()
        outputs = outputs.merge(exp_df[columns], how='inner', on='id')

    print(outputs.shape)

In [None]:
outputs.head()

In [None]:
outputs.corr()

# Ensemble Mean

In [None]:
if not DEBUG and EnsembleType == "M":
    # Subtract the mean from each model individually
    for i, eid in enumerate(EIDs):
        outputs[f'oof{eid}'] = outputs[f'oof{eid}'] - outputs[f'oof{eid}'].mean() + train_df.target.mean()
    outputs['pred'] = outputs[meta_features].mean(axis=1)

    # Additional Final Mean Hack
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.target = outputs.pred.values - outputs.pred.mean() + train_df.target.mean()
    submission_df.to_csv('submission.csv', index=False)
    print('Praying for:', ensemble['mean_score'])

# Ensemble A

In [None]:
if not DEBUG and EnsembleType == "A":
    print('One SVR')
    ss = ensemble['ac_ss']
    outputs[meta_features] = ss.transform(outputs[meta_features])

    r = ensemble['a_model']
    outputs['pred'] = r.predict(outputs[meta_features])
    print('Praying for:', ensemble['a_score'])
    
    # Optional Final Mean Hack
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.target = outputs.pred.values - outputs.pred.mean() + train_df.target.mean()
    submission_df.to_csv('submission.csv', index=False)


# Ensemble C

In [None]:
if not DEBUG and EnsembleType == "C":
    print('One LR')
    ss = ensemble['ac_ss']
    outputs[meta_features] = ss.transform(outputs[meta_features])

    r = ensemble['c_model']
    outputs['pred'] = r.predict(outputs[meta_features])
    print('Praying for:', ensemble['c_score'])
    
    # Optional Final Mean Hack
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.target = outputs.pred.values - outputs.pred.mean() + train_df.target.mean()
    submission_df.to_csv('submission.csv', index=False)


# Ensemble B

In [None]:
if not DEBUG and EnsembleType == "B":
    print('5 SVRs')
    NUM_KFOLDS = len(ensemble['bd_ss'])
    bd_ss = ensemble['bd_ss']
    models = ensemble['b_models']
    preds = []
    
    for k in range(NUM_KFOLDS):
        ss = bd_ss[k]
        output_copy = outputs.copy()
        output_copy[meta_features] = ss.transform(output_copy[meta_features])
        preds.append(models[k].predict(output_copy[meta_features]))
        
    outputs['pred'] = np.mean(preds, axis=0)   
    print('Praying for:', ensemble['b_score'])
    
    # Optional Final Mean Hack
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.target = outputs.pred.values - outputs.pred.mean() + train_df.target.mean()
    submission_df.to_csv('submission.csv', index=False)

# Ensemble D

In [None]:
if not DEBUG and EnsembleType == "D":
    print('5 LRs')
    NUM_KFOLDS = len(ensemble['bd_ss'])
    bd_ss = ensemble['bd_ss']
    models = ensemble['d_models']
    preds = []
    
    for k in range(NUM_KFOLDS):
        ss = bd_ss[k]
        output_copy = outputs.copy()
        output_copy[meta_features] = ss.transform(output_copy[meta_features])
        preds.append(models[k].predict(output_copy[meta_features]))
        
    outputs['pred'] = np.mean(preds, axis=0)   
    print('Praying for:', ensemble['d_score'])
    
    # Optional Final Mean Hack
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.target = outputs.pred.values - outputs.pred.mean() + train_df.target.mean()
    submission_df.to_csv('submission.csv', index=False)

# Otherwise..

In [None]:
if DEBUG:
    submission_df = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
    submission_df.to_csv('submission.csv', index=False)

# Results

In [None]:
submission_df.head()

# Inspection

In [None]:
plt.title('Submission')
plt.hist(submission_df.target, 35)
plt.show()