<b>Ensemble</b> is nothing but combination of different models.

In [3]:
import numpy as np
def mean_predictions(probas):

    return np.mean(probas, axis=1)

def max_voting(preds):

    idxs = np.argmax(preds, axis=1)
    return np.take_along_axis(preds, idxs[:, None], axis=1)

In [4]:
def rank_mena(probas):
    for i in range(porbas.shape[1]):
        rank_data = stats.rankdata(probas[:,i])
        ranked.append(rank_data)
        ranked = np.column_stack(ranked)
        return np.mean(ranked, axis=1)

<b>Find the best weights of multiple models to optimize
for AUC</b>

In [5]:
import numpy as np
from functools import partial
from scipy.optimize import fmin
from sklearn import metrics
class OptimizeAUC:

    def __init__(self):
        self.coef_ = 0
    def _auc(self, coef, X, y):

        x_coef = X * coef

        predictions = np.sum(x_coef, axis=1)

        auc_score = metrics.roc_auc_score(y, predictions)

        return -1.0 * auc_score
    def fit(self, X, y):

        loss_partial = partial(self._auc, X=X, y=y)

        initial_coef = np.random.dirichlet(np.ones(X.shape[1]), size=1)

        self.coef_ = fmin(loss_partial, initial_coef, disp=True)

    def predict(self, X):

        x_coef = X * self.coef_
        predictions = np.sum(x_coef, axis=1)
        return predictions

In [7]:
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn import ensemble
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection

X, y = make_classification(n_samples=10000, n_features=25)

xfold1, xfold2, yfold1, yfold2 = model_selection.train_test_split(
        X,
        y,
        test_size=0.5,
        stratify=y
    )

logreg = linear_model.LogisticRegression()
rf = ensemble.RandomForestClassifier()
xgbc = xgb.XGBClassifier()

logreg.fit(xfold1, yfold1)
rf.fit(xfold1, yfold1)
xgbc.fit(xfold1, yfold1)

pred_logreg = logreg.predict_proba(xfold2)[:, 1]
pred_rf = rf.predict_proba(xfold2)[:, 1]
pred_xgbc = xgbc.predict_proba(xfold2)[:, 1]


avg_pred = (pred_logreg + pred_rf + pred_xgbc) / 3

fold2_preds = np.column_stack((
        pred_logreg,
        pred_rf,
        pred_xgbc,
        avg_pred
    ))

aucs_fold2 = []
for i in range(fold2_preds.shape[1]):
    auc = metrics.roc_auc_score(yfold2, fold2_preds[:, i])
    aucs_fold2.append(auc)
print(f"Fold-2: LR AUC = {aucs_fold2[0]}")
print(f"Fold-2: RF AUC = {aucs_fold2[1]}")
print(f"Fold-2: XGB AUC = {aucs_fold2[2]}")
print(f"Fold-2: Average Pred AUC = {aucs_fold2[3]}")

logreg = linear_model.LogisticRegression()
rf = ensemble.RandomForestClassifier()
xgbc = xgb.XGBClassifier()

logreg.fit(xfold2, yfold2)
rf.fit(xfold2, yfold2)
xgbc.fit(xfold2, yfold2)
pred_logreg = logreg.predict_proba(xfold1)[:, 1]

pred_rf = rf.predict_proba(xfold1)[:, 1]
pred_xgbc = xgbc.predict_proba(xfold1)[:, 1]
avg_pred = (pred_logreg + pred_rf + pred_xgbc) / 3

fold1_preds = np.column_stack((
        pred_logreg,
        pred_rf,
        pred_xgbc,
        avg_pred
    ))

aucs_fold1 = []
for i in range(fold1_preds.shape[1]):
    auc = metrics.roc_auc_score(yfold1, fold1_preds[:, i])
    aucs_fold1.append(auc)
print(f"Fold-1: LR AUC = {aucs_fold1[0]}")
print(f"Fold-1: RF AUC = {aucs_fold1[1]}")
print(f"Fold-1: XGB AUC = {aucs_fold1[2]}")
print(f"Fold-1: Average prediction AUC = {aucs_fold1[3]}")

opt = OptimizeAUC()

opt.fit(fold1_preds[:, :-1], yfold1)
opt_preds_fold2 = opt.predict(fold2_preds[:, :-1])
auc = metrics.roc_auc_score(yfold2, opt_preds_fold2)
print(f"Optimized AUC, Fold 2 = {auc}")
print(f"Coefficients = {opt.coef_}")

opt = OptimizeAUC()
opt.fit(fold2_preds[:, :-1], yfold2)
opt_preds_fold1 = opt.predict(fold1_preds[:, :-1])
auc = metrics.roc_auc_score(yfold1, opt_preds_fold1)
print(f"Optimized AUC, Fold 1 = {auc}")
print(f"Coefficients = {opt.coef_}")


Fold-2: LR AUC = 0.9690487950478074
Fold-2: RF AUC = 0.9871887179501948
Fold-2: XGB AUC = 0.9856741577078653
Fold-2: Average Pred AUC = 0.9865447978471675
Fold-1: LR AUC = 0.9674886347981816
Fold-1: RF AUC = 0.9860055177608829
Fold-1: XGB AUC = 0.9853077576492412
Fold-1: Average prediction AUC = 0.9854406376705019
Optimization terminated successfully.
         Current function value: -0.986528
         Iterations: 26
         Function evaluations: 63
Optimized AUC, Fold 2 = 0.9875985580157692
Coefficients = [0.03606054 0.26068088 0.75777763]
Optimization terminated successfully.
         Current function value: -0.987644
         Iterations: 62
         Function evaluations: 123
Optimized AUC, Fold 1 = 0.9864788778366205
Coefficients = [0.04720912 0.23062615 0.51369249]
