# LIBRARIES

In [18]:
# packages
import pandas as pd
import numpy as np
from scipy.stats import rankdata
from scipy.stats import spearmanr
from sklearn.metrics import roc_auc_score

# ENSEMBLING SUBMISSIONS

In [26]:
# load submissions
a = pd.read_csv("../submissions/auc8465_data_v4wlp_0_60_under_lgb_2stage.csv")
b = pd.read_csv("../submissions/auc789084_data_v4mlp_ak2_0_60_under_rec27_lgb_2stage.csv")

In [27]:
# geometric mean
#a.CustomerInterest = (a.CustomerInterest*b.CustomerInterest) ** (1/2)

In [28]:
# arithmetic mean
#a.CustomerInterest = a.CustomerInterest*0.25 + b.CustomerInterest*0.75

In [29]:
# rank mean
a.CustomerInterest = rankdata(a.CustomerInterest) + rankdata(b.CustomerInterest)  \
                   #+ rankdata(c.CustomerInterest) + rankdata(d.CustomerInterest)  \
                   #+ rankdata(e.CustomerInterest)

In [30]:
# scaling
a.CustomerInterest = (a.CustomerInterest - a.CustomerInterest.min()) / (a.CustomerInterest.max() - a.CustomerInterest.min())

In [31]:
# check rank correlation with the best submission
best = pd.read_csv("../submissions/rankmean_two_models.csv")
spearmanr(a["CustomerInterest"], best.CustomerInterest)

SpearmanrResult(correlation=1.0, pvalue=0.0)

In [25]:
a.to_csv('../submissions/rankmean_two_models_new.csv', index = False, float_format = "%.8f")

# OPTIMIZING WEIGHTS

In [4]:
# load predictions
a = pd.read_csv("../pred_valid/auc786368_data_v4mlp_ak2_0_60_under_rec27_lgb.csv")
b = pd.read_csv("../pred_valid/auc789084_data_v4mlp_ak2_0_60_under_rec27_lgb.csv")

In [5]:
# compute AUC
print(roc_auc_score(a.CustomerInterest, a.TARGET))
print(roc_auc_score(b.CustomerInterest, b.TARGET))

0.7863676465335259
0.7890844311762045


In [6]:
# optimal weights for arithmetic mean
for w in np.arange(0, 1.05, 0.05):
    target = w*a.TARGET + (1-w)*b.TARGET
    print(str(round(w, 2)) + ": auc = " + str(round(roc_auc_score(b.CustomerInterest, target), 6)))

0.0: auc = 0.789084
0.05: auc = 0.78907
0.1: auc = 0.789047
0.15: auc = 0.789007
0.2: auc = 0.788953
0.25: auc = 0.7889
0.3: auc = 0.788817
0.35: auc = 0.78873
0.4: auc = 0.788627
0.45: auc = 0.788509
0.5: auc = 0.788377
0.55: auc = 0.788224
0.6: auc = 0.78807
0.65: auc = 0.787903
0.7: auc = 0.787733
0.75: auc = 0.787541
0.8: auc = 0.787336
0.85: auc = 0.787113
0.9: auc = 0.786878
0.95: auc = 0.786632
1.0: auc = 0.786368


In [7]:
# optimal weights for rank mean
for w in np.arange(0, 1.05, 0.05):
    target = w*rankdata(a.TARGET) + (1-w)*rankdata(b.TARGET)
    print(str(round(w, 2)) + ": auc = " + str(round(roc_auc_score(b.CustomerInterest, target), 6)))

0.0: auc = 0.789084
0.05: auc = 0.789067
0.1: auc = 0.789043
0.15: auc = 0.789
0.2: auc = 0.788946
0.25: auc = 0.78889
0.3: auc = 0.788812
0.35: auc = 0.788715
0.4: auc = 0.788609
0.45: auc = 0.788492
0.5: auc = 0.78836
0.55: auc = 0.788206
0.6: auc = 0.788051
0.65: auc = 0.787889
0.7: auc = 0.787714
0.75: auc = 0.787521
0.8: auc = 0.787322
0.85: auc = 0.787099
0.9: auc = 0.786867
0.95: auc = 0.786626
1.0: auc = 0.786368


record: 0.843235