In [None]:
import sys
import numpy as np
import pandas as pd
from time import time
sys.path.append('../input/autograd')
from autograd import grad
import autograd.numpy as anp
from scipy.optimize import fsolve


def optimize_weights(ps, labels):
    if isinstance(ps, list):
        ps = anp.stack(ps)

    weights = anp.random.dirichlet([2] * len(ps), size=1).reshape(len(ps)).tolist() + [1]
    L = labels.values

    def log_loss_numpy(y_pred, y_true=L):
        y_true = anp.array(y_true).ravel()
        y_pred = anp.array(y_pred).ravel()
        y_pred = anp.clip(y_pred, 1e-15, 1 - 1e-15)
        loss = anp.where(y_true == 1, -anp.log(y_pred), -anp.log(1 - y_pred))
        return loss.mean()

    def individual_log_loss(ps):
        for i, p in enumerate(ps):
            print('M%d Log Loss: %.7f' % (i, log_loss_numpy(p)))

    def calc_oof_blend(ws, ps):
        return anp.squeeze(anp.matmul(ws.reshape(1, 1, len(ws)), anp.transpose(ps, [1, 0, 2])))

    def Lagrange_func(params):
        ws = params[:-1]
        _lambda = params[-1]
        ws = anp.array(ws)
        oof_blend = calc_oof_blend(ws, ps)
        return log_loss_numpy(oof_blend) - _lambda * (ws.sum() - 1.)

    def Lagrange_obj(params):
        ws = params[:-1]
        grad_L = grad(Lagrange_func)
        pars = grad_L(params)
        dLdws = pars[:-1]
        # dldlam = pars[-1]
        res = anp.append(dLdws, sum(ws) - 1.)
        return res

    individual_log_loss(ps)
    start_time = time()
    pars = fsolve(Lagrange_obj, weights)
    ws = pars[:-1]
    time_elapsed = time() - start_time
    print('Optimized in %.2fs' % time_elapsed)
    print('Optimized Weights:', ws)
    oof_b = calc_oof_blend(ws, ps)
    optimized_cv = log_loss_numpy(oof_b)
    print('Optimised Blend OOF Score: %.7f' % optimized_cv)
    return ws, optimized_cv

# OOF Labels
x_develop = pd.read_csv('../input/lish-moa/train_features.csv')
idx = x_develop['cp_type']!='ctl_vehicle'
labels = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
target_cols = labels.drop(['sig_id'], axis=1).columns
labels = labels.loc[idx, target_cols]

# OOF Predictions
p1 = anp.load('../input/moa-oof-predictions/T19.npy')
p2 = anp.load('../input/moa-oof-predictions/oof_nn_transfer.npy')
ps = anp.vstack([p1, p2])

# Optimization
ws, cv = optimize_weights(ps, labels)