In [6]:
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sklearn.metrics import cohen_kappa_score, mean_squared_error

qwk_cols = ["qwk{}".format(i) for i in range(50)]
rmse_cols = ["rmse{}".format(i) for i in range(50)]
coef_cols = ["coef{}".format(i) for i in range(4)]
cols = score_cols+rmse_cols+coef_cols+["ensemble_score", "ensemble_rmse"]

def get_score(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def to_bins(x, borders):
    for i in range(len(borders)):
        if x <= borders[i]:
            return i
    return len(borders)

class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _loss(self, coef, X, y, idx):
        X_p = np.array([to_bins(pred, coef) for pred in X])
        ll = -get_score(y, X_p)
        return ll

    def fit(self, X, y):
        coef = [1.5, 2.0, 2.5, 3.0]
        golden1 = 0.618
        golden2 = 1 - golden1
        ab_start = [(1, 2), (1.5, 2.5), (2, 3), (2.5, 3.5)]
        for it1 in range(10):
            for idx in range(4):
                # golden section search
                a, b = ab_start[idx]
                # calc losses
                coef[idx] = a
                la = self._loss(coef, X, y, idx)
                coef[idx] = b
                lb = self._loss(coef, X, y, idx)
                for it in range(20):
                    # choose value
                    if la > lb:
                        a = b - (b - a) * golden1
                        coef[idx] = a
                        la = self._loss(coef, X, y, idx)
                    else:
                        b = b - (b - a) * golden2
                        coef[idx] = b
                        lb = self._loss(coef, X, y, idx)
        self.coef_ = {'x': coef}

    def predict(self, X, coef):
        X_p = np.array([to_bins(pred, coef) for pred in X])
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [7]:
pred = []
test = []
for seed in [0, 10, 50, 88, 150, 255, 777, 1022, 2019, 2100]:
    for fold_id in range(5):
        if fold_id==0:
            y_pred = np.load("th/pred_seed{}_fold{}.npy".format(seed, fold_id))
            y_true = np.load("th/true_seed{}_fold{}.npy".format(seed, fold_id))
        else:
            y_pred = np.append(y_pred, np.load("th/pred_seed{}_fold{}.npy".format(seed, fold_id)))
            y_true = np.append(y_true, np.load("th/true_seed{}_fold{}.npy".format(seed, fold_id)))
    pred.append(y_pred)
    test.append(y_true)

In [8]:
predict = np.mean(np.array(pred), axis=0)
true = test[0]
optR = OptimizedRounder()
optR.fit(predict, true)
coefficients = optR.coefficients()
y_pred_opt = optR.predict(predict, coefficients)
ensemble_score = get_score(true, y_pred_opt)
ensemble_rmse = np.sqrt(mean_squared_error(true, y_pred_opt))
ensemble_score, ensemble_rmse

(0.4526553519811738, 1.233035350486185)

In [9]:
scores = []
rmses = []
for seed in [0, 10, 50, 88, 150, 255, 777, 1022, 2019, 2100]:
    score = []
    rmse = []
    for fold_id in range(5):
        y_pred = np.load("th/pred_seed{}_fold{}.npy".format(seed, fold_id))
        y_true = np.load("th/true_seed{}_fold{}.npy".format(seed, fold_id))
        y_pred_opt = optR.predict(y_pred, coefficients)
        score.append(get_score(y_true, y_pred_opt))
        rmse.append(np.sqrt(mean_squared_error(y_true, y_pred_opt)))
    scores.extend(score)
    rmses.extend(rmse)
    
score_cols = ["score{}".format(i) for i in range(len(scores))]
coef_cols = ["coef{}".format(i) for i in range(len(coefficients))]
df = pd.DataFrame([scores + rmses + coefficients + [ensemble_score, ensemble_rmse]], columns=cols)

In [12]:
np.mean(scores), np.min(scores), np.max(scores), ensemble_score, np.mean(rmses), np.min(rmses), np.max(rmses), ensemble_rmse

(0.44606188515048667,
 0.41694376161519164,
 0.468932939006086,
 0.4526553519811738,
 1.2410667844762946,
 1.217166312086024,
 1.261372548907773,
 1.233035350486185)

In [13]:
p_list = []
t_list = []
for seed in [0, 10, 50, 88, 150, 255, 777, 1022, 2019, 2100]:
    for fold_id in range(5):
        y_pred = np.load("th/pred_seed{}_fold{}.npy".format(seed, fold_id))
        y_true = np.load("th/true_seed{}_fold{}.npy".format(seed, fold_id))
        p_list.append(y_pred)
        t_list.append(y_true)

In [17]:
import GPy
import GPyOpt

a = []


def training(x):
    coefficients = [float(x[:, 0]), float(x[:, 1]), float(x[:, 2]), float(x[:, 3])]
    scores = []
    rmses = []
    extend = scores.extend
    extend2 = rmses.extend
    for seed in range(10):
        score = []
        rmse = []
        append = score.append
        append2 = rmse.append
        for fold in range(5):
            y_pred_opt = optR.predict(p_list[fold+seed*5], coefficients)
            append(get_score(t_list[fold+seed*5], y_pred_opt))
            append2(np.sqrt(mean_squared_error(t_list[fold+seed*5], y_pred_opt)))
        extend(score)
        extend2(rmse)

    y_pred_opt = optR.predict(predict, coefficients)
    ensemble_score = get_score(true, y_pred_opt)
    ensemble_rmse = np.sqrt(mean_squared_error(true, y_pred_opt))

    #df_ = pd.DataFrame([scores + coefficients + [ensemble_score]], columns=cols)
    a.append([scores+rmses+coefficients+[ensemble_score, ensemble_rmse]])
    
    print(round(np.mean(scores), 5), round(np.min(scores), 5), round(np.max(scores), 5), 
          round(ensemble_score, 5), round(np.mean(rmses), 5), round(np.min(rmses), 5), round(np.max(rmses), 5), 
          round(ensemble_rmse, 5), [round(c, 5) for c in coefficients])

    return np.max(rmses)+ensemble_rmse

bounds = [{'name': 'c1', 'type': 'continuous', 'domain': (1, 2)},
          {'name': 'c2', 'type': 'continuous', 'domain': (1.5, 2.5)},
          {'name': 'c3', 'type': 'continuous', 'domain': (2, 3)},
          {'name': 'c4', 'type': 'continuous', 'domain': (2.5, 3.5)},
          ]
myBopt = GPyOpt.methods.BayesianOptimization(f=training, domain=bounds, initial_design_numdata=5, acquisition_type='EI',
                                            num_cores=10)
myBopt.run_optimization(max_iter=500)

0.43595 0.41341 0.45324 0.43947 1.19197 1.17024 1.2094 1.18824 [1.20163, 2.11346, 2.41091, 3.02192]
0.39572 0.36715 0.41999 0.39966 1.10572 1.08661 1.12045 1.10147 [1.2089, 1.84487, 2.61133, 3.28062]
0.43481 0.40508 0.4566 0.43933 1.17191 1.15864 1.19463 1.16666 [1.23844, 1.92676, 2.60826, 2.87004]
0.40169 0.37866 0.4413 0.40365 1.29936 1.24854 1.32508 1.29627 [1.30436, 1.95448, 2.32262, 2.54156]
0.30639 0.273 0.33859 0.30442 1.1873 1.16281 1.20519 1.1856 [1.60788, 1.70721, 2.02791, 3.24657]
0.32356 0.28339 0.3554 0.32575 1.15 1.12075 1.19477 1.14712 [1.19487, 1.78797, 2.95142, 3.32798]
0.37975 0.35149 0.40017 0.38052 1.08619 1.06945 1.1043 1.08484 [1.13477, 1.68799, 2.54551, 3.28596]
0.34763 0.32795 0.37192 0.34893 1.08595 1.07178 1.11183 1.08392 [1.0, 1.71012, 2.43596, 3.5]
0.32419 0.29265 0.35566 0.32702 1.08174 1.06695 1.10566 1.0784 [1.22913, 1.5, 2.46468, 3.5]
0.33632 0.31312 0.3649 0.33793 1.08876 1.07147 1.11108 1.08687 [1.0, 1.5, 2.36887, 3.36794]
0.36379 0.33827 0.38398 0.366

0.35838 0.3254 0.37848 0.35872 1.08396 1.06695 1.10279 1.08136 [1.35032, 1.63637, 2.54273, 3.39125]
0.34142 0.30811 0.36463 0.34212 1.08058 1.05989 1.09809 1.07877 [1.0, 1.5, 2.5476, 3.40201]
0.34145 0.30811 0.36463 0.34212 1.08058 1.05974 1.09809 1.07877 [1.0, 1.5, 2.54762, 3.40186]
0.33288 0.29304 0.35564 0.33483 1.08619 1.06867 1.09824 1.08352 [1.0402, 1.52884, 2.62495, 3.42901]
0.35351 0.32193 0.36878 0.35345 1.08482 1.06601 1.09912 1.08287 [1.23888, 1.65749, 2.58806, 3.41822]
0.34748 0.32311 0.37641 0.35005 1.08179 1.06851 1.10551 1.078 [1.28482, 1.59053, 2.48773, 3.41856]
0.34135 0.30756 0.3619 0.34438 1.08343 1.06679 1.09733 1.08004 [1.12057, 1.61608, 2.58017, 3.45691]
0.34094 0.30632 0.3645 0.34166 1.0807 1.05958 1.0987 1.07899 [1.0, 1.5, 2.54912, 3.4038]
0.33434 0.30087 0.35638 0.33509 1.08098 1.06068 1.10022 1.07908 [1.10802, 1.5, 2.54959, 3.4403]
0.3414 0.30791 0.36434 0.34194 1.08065 1.05879 1.09855 1.07892 [1.0, 1.5, 2.54932, 3.40166]
0.34297 0.30517 0.36184 0.34311 1.0811

KeyboardInterrupt: 