In [19]:
%matplotlib inline

import os
import pandas as pd 
import numpy as np
import seaborn as sns
import datetime as dt
import matplotlib as mpl
from matplotlib import pyplot as plt
from xgboost import plot_importance
from sklearn.metrics import roc_auc_score
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')


mpl.rc("figure", figsize=(100, 70))
mpl.rcParams.update({'font.size':25})

pd.set_option('display.max_columns', 500)

## Loading both predictions

In [20]:
xgb_pred = pd.read_csv("../data/submissions/submission_after_xgb_modeling_final.csv")
xgb_pred.redemption_status.describe()

count    5.022600e+04
mean     3.349414e-03
std      4.139968e-02
min      2.710388e-18
25%      9.786470e-11
50%      4.249922e-09
75%      5.438271e-07
max      9.994196e-01
Name: redemption_status, dtype: float64

In [21]:
xgb_pred[xgb_pred.redemption_status > 0.5]

Unnamed: 0,id,redemption_status
667,1704,0.773372
2085,5224,0.874832
2098,5268,0.641584
2214,5539,0.588712
2459,6139,0.747347
...,...,...
46605,119197,0.961957
46742,119514,0.575438
47770,122161,0.674259
48146,123136,0.815890


In [5]:
lgbm_pred = pd.read_csv("../data/submissions/submission_after_modeling_lgb_final.csv")

lgbm_pred.redemption_status.describe()

count    5.022600e+04
mean     1.388587e-03
std      3.237690e-02
min      0.000000e+00
25%      4.991929e-22
50%      1.907606e-17
75%      4.854364e-13
max      1.000000e+00
Name: redemption_status, dtype: float64

In [23]:
lgbm_pred[lgbm_pred.redemption_status > 0.5]

Unnamed: 0,id,redemption_status
667,1704,0.863877
2645,6577,0.72688
3246,8054,0.905594
4879,12290,0.911047
5600,14135,0.640259
6624,16827,0.923843
6886,17480,0.71715
8255,20956,0.830398
8592,21781,0.927687
8721,22145,0.761658


In [7]:
xgb_pred

Unnamed: 0,id,redemption_status
0,3,4.930832e-02
1,4,1.216368e-05
2,5,1.059312e-06
3,8,2.008840e-11
4,10,1.397163e-08
...,...,...
50221,128584,1.127315e-06
50222,128588,4.976915e-04
50223,128591,9.088521e-05
50224,128593,8.021926e-03


In [10]:
ensembled = xgb_pred
ensembled['lgbm_redemption_status'] = lgbm_pred.redemption_status
ensembled

Unnamed: 0,id,redemption_status,lgbm_redemption_status
0,3,4.930832e-02,4.836949e-05
1,4,1.216368e-05,1.967550e-11
2,5,1.059312e-06,2.130820e-15
3,8,2.008840e-11,7.601448e-23
4,10,1.397163e-08,1.066333e-13
...,...,...,...
50221,128584,1.127315e-06,1.955858e-11
50222,128588,4.976915e-04,1.978080e-08
50223,128591,9.088521e-05,5.788444e-10
50224,128593,8.021926e-03,1.150258e-08


In [12]:
ensembled[(ensembled.redemption_status > 0.5) & (ensembled.lgbm_redemption_status < 0.5)]

Unnamed: 0,id,redemption_status,lgbm_redemption_status
2085,5224,0.874832,0.001815
2098,5268,0.641584,0.000003
2214,5539,0.588712,0.075924
2459,6139,0.747347,0.000390
3667,9182,0.975472,0.150026
...,...,...,...
46370,118541,0.628707,0.006113
46605,119197,0.961957,0.415723
46742,119514,0.575438,0.014614
47770,122161,0.674259,0.380603


In [13]:
ensembled[(ensembled.redemption_status < 0.5) & (ensembled.lgbm_redemption_status > 0.5)]

Unnamed: 0,id,redemption_status,lgbm_redemption_status
9303,23646,0.003149557,0.972211
11337,28929,0.001322844,1.0
16699,42778,0.01816256,0.915518
17570,44996,2.020349e-05,1.0
17611,45132,0.01099102,1.0
23042,59037,0.301431,0.998945
24545,62789,1.740025e-07,1.0
27094,69358,5.85369e-08,1.0
28365,72634,0.00290228,1.0
28515,73045,0.000332958,1.0


In [14]:
ensembled['redemption_status'] = 0.5*ensembled['redemption_status'] +0.5*ensembled['lgbm_redemption_status'] 
ensembled

Unnamed: 0,id,redemption_status,lgbm_redemption_status
0,3,2.467835e-02,4.836949e-05
1,4,6.081848e-06,1.967550e-11
2,5,5.296561e-07,2.130820e-15
3,8,1.004420e-11,7.601448e-23
4,10,6.985868e-09,1.066333e-13
...,...,...,...
50221,128584,5.636671e-07,1.955858e-11
50222,128588,2.488556e-04,1.978080e-08
50223,128591,4.544289e-05,5.788444e-10
50224,128593,4.010969e-03,1.150258e-08


In [15]:
ensembled.drop(columns=['lgbm_redemption_status'], inplace=True)
ensembled

Unnamed: 0,id,redemption_status
0,3,2.467835e-02
1,4,6.081848e-06
2,5,5.296561e-07
3,8,1.004420e-11
4,10,6.985868e-09
...,...,...
50221,128584,5.636671e-07
50222,128588,2.488556e-04
50223,128591,4.544289e-05
50224,128593,4.010969e-03


## Saving the solution

In [16]:
c='final'
name = "../data/submissions/submission_after_ensembling_lgb_xgb_"+str(c)+".csv"
ensembled.to_csv( name,index=False)
print("File saved!")

File saved!


In [17]:
ensembled[ensembled.redemption_status > 0.5]

Unnamed: 0,id,redemption_status
667,1704,0.818625
2645,6577,0.685167
3246,8054,0.875424
3667,9182,0.562749
4879,12290,0.947359
...,...,...
47915,122570,0.562337
48146,123136,0.902183
48726,124699,0.693311
49150,125828,0.552783
