# LIBRARIES

In [1]:
# packages
import pandas as pd
import numpy as np
from scipy.stats import rankdata
from scipy.stats import spearmanr
from sklearn.metrics import roc_auc_score

# ENSEMBLING SUBMISSIONS

In [2]:
# load submissions
a = pd.read_csv("../submissions/auc85239_data_v4_0_80_under_wlp_lm_bm_f34_lgb_2stage.csv")
b = pd.read_csv("../submissions/auc850434_data_v4_0_80_under_wlp_lm_bm_lgb_2stage.csv")

In [3]:
a = a.sort_values("PredictionIdx")
b = b.sort_values("PredictionIdx")

In [4]:
# geometric mean
#a.CustomerInterest = (a.CustomerInterest*b.CustomerInterest) ** (1/2)

In [5]:
# arithmetic mean
#a.CustomerInterest = a.CustomerInterest*0.25 + b.CustomerInterest*0.75

In [52]:
# rank mean
a.CustomerInterest = 0.95*rankdata(a.CustomerInterest) + 0.1*rankdata(b.CustomerInterest)  \
                   #+ rankdata(c.CustomerInterest) + rankdata(d.CustomerInterest)  \
                   #+ rankdata(e.CustomerInterest)

In [53]:
# scaling
a.CustomerInterest = (a.CustomerInterest - a.CustomerInterest.min()) / (a.CustomerInterest.max() - a.CustomerInterest.min())

In [7]:
# check rank correlation with the best submission
best = pd.read_csv("../submissions/auc850085_data_v4_0_60_under_wlp_lm_bm_lgb_2stage.csv")
best = best.sort_values("PredictionIdx")

spearmanr(b["CustomerInterest"], best.CustomerInterest)

SpearmanrResult(correlation=0.992425938984684, pvalue=0.0)

In [55]:
a.to_csv('../submissions/rankmean_09_auc789227_ensemble_es_01_auc687042_noRF.csv', index = False, float_format = "%.8f")

# OPTIMIZING WEIGHTS

In [93]:
# load predictions
a = pd.read_csv("../pred_valid_under/auc8465_data_v4wlp_0_60_under_lgb.csv")
b = pd.read_csv("../pred_valid/auc789084_data_v4mlp_ak2_0_60_under_rec27_lgb.csv")

In [94]:
a = a.sort_values(["CustomerIdx", "IsinIdx", "BuySell"])
b = b.sort_values(["CustomerIdx", "IsinIdx", "BuySell"])

In [95]:
# compute AUC
print(roc_auc_score(a.CustomerInterest, a.TARGET))
print(roc_auc_score(b.CustomerInterest, b.TARGET))

0.8465000963193037
0.7890844311762045


In [96]:
a.columns = ['CustomerIdx', 'IsinIdx', 'BuySell', 'Week', 'CustomerInterest', 'TARGETA']

In [97]:
c = b.merge(a, how = "left")

In [101]:
# optimal weights for arithmetic mean
for w in np.arange(0, 1.05, 0.05):
    target = w*c.TARGETA + (1-w)*c.TARGET
    print(str(round(w, 2)) + ": auc = " + str(round(roc_auc_score(c.CustomerInterest, target), 6)))

0.0: auc = 0.789084
0.05: auc = 0.789118
0.1: auc = 0.789134
0.15: auc = 0.789124
0.2: auc = 0.789106
0.25: auc = 0.789068
0.3: auc = 0.789012
0.35: auc = 0.78893
0.4: auc = 0.78884
0.45: auc = 0.788742
0.5: auc = 0.788615
0.55: auc = 0.788471
0.6: auc = 0.788307
0.65: auc = 0.788129
0.7: auc = 0.787926
0.75: auc = 0.787711
0.8: auc = 0.787474
0.85: auc = 0.787226
0.9: auc = 0.786955
0.95: auc = 0.786667
1.0: auc = 0.786366


In [103]:
# optimal weights for rank mean
for w in np.arange(0, 1.05, 0.05):
    target = w*rankdata(c.TARGETA) + (1-w)*rankdata(c.TARGET)
    print(str(round(w, 2)) + ": auc = " + str(round(roc_auc_score(c.CustomerInterest, target), 6)))

0.0: auc = 0.789084
0.05: auc = 0.789121
0.1: auc = 0.789139
0.15: auc = 0.789137
0.2: auc = 0.789122
0.25: auc = 0.789091
0.3: auc = 0.789025
0.35: auc = 0.788953
0.4: auc = 0.788863
0.45: auc = 0.788764
0.5: auc = 0.788638
0.55: auc = 0.788486
0.6: auc = 0.788326
0.65: auc = 0.788143
0.7: auc = 0.78794
0.75: auc = 0.787727
0.8: auc = 0.787487
0.85: auc = 0.787234
0.9: auc = 0.786961
0.95: auc = 0.786668
1.0: auc = 0.786366


record: 0.843235