# 4. Home Credit Default Risk - Stacking

In [None]:
import pandas as pd 
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from six.moves import cPickle as pickle

In [None]:
# plotting libs
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#sns.set() # setting seaborn default for plots

In [None]:
# figure size in inches
rcParams['figure.figsize'] = 13.5, 10

In [None]:
import os
data_path="../data/pickles"

In [None]:
prep_train = pickle.load(open(os.path.join(data_path, "prep_train_EF.pkl"), "rb"))
prep_test = pickle.load(open(os.path.join(data_path, "prep_test_EF.pkl"), "rb"))

In [None]:
print(prep_train.shape)
print(prep_test.shape)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train_SK_IDS = prep_train.SK_ID_CURR
X_train = prep_train.drop(columns=['TARGET', 'SK_ID_CURR'])
y_train = prep_train.TARGET

X_test_SK_IDS = prep_test.SK_ID_CURR
X_test = prep_test.drop(columns=['SK_ID_CURR'])

# Delete 
#del prep_train
#del prep_test

## Scaler

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
std_scaler = StandardScaler()
std_scaler.fit(X_train)

In [None]:
X_train = std_scaler.transform(X_train)
X_test = std_scaler.transform(X_test)

## Split prep_train into X_train_base and X_pred

In [None]:
X_train_base, X_pred_base, y_train_base, y_pred_base = train_test_split(X_train, y_train, test_size=0.4, random_state=42)

--------
## Define L1-Learners

In [None]:
# Class wrapper for L1-Learners with unique_name property
class L1Learner(object):
  def __init__(self, clf, unique_name):
    self.clf = clf
    self.unique_name = unique_name
    self.predictions_proba = []
    self.predictions = []
  def __repr__(self):
    return 'Type:{} | Name: {}'.format(self.clf.__class__.__name__, self.unique_name)

### LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf_logreg = L1Learner(LogisticRegression(), unique_name='LogisticRegression')
clf_logreg.clf.set_params(**{
  'C': 1,
  'class_weight': 'balanced',
  'random_state': 42
})

### RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf_rf = L1Learner(RandomForestClassifier(), unique_name='RandomForestClassifier')
clf_rf.clf.set_params(**{
  'max_leaf_nodes': 70,
  'n_estimators': 150,
  'random_state': 42
})

In [None]:
clf_rf_2 = L1Learner(RandomForestClassifier(), unique_name='RandomForestClassifier_2')
clf_rf_2.clf.set_params(**{
  'n_estimators': 200,
  'max_features': 0.2,
  'max_depth': 12,
  'min_samples_leaf': 2,
  'random_state': 42
})

### XGBoost

In [None]:
from xgboost import XGBClassifier

In [None]:
clf_xgb = L1Learner(XGBClassifier(), unique_name='XGBClassifier')
clf_xgb.clf.set_params(**{
  "n_estimators": 1000,
  "max_depth": 4,
  "min_child_weight": 4,
  "subsample": 0.8,
  "learning_rate": 0.01,
  "colsample_bytree": 0.8,
  "objective": 'binary:logistic',
  "random_state": 42
})

In [None]:
clf_xgb_2 = L1Learner(XGBClassifier(), unique_name='XGBClassifier_2')
clf_xgb_2.clf.set_params(**{
  'colsample_bytree': 0.7,
  'silent': 1,
  'subsample': 0.7,
  'learning_rate': 0.075,
  'objective': 'binary:logistic',
  'max_depth': 4,
  'min_child_weight': 1,
  'random_state': 42
})

### LightGBM

In [None]:
from lightgbm import LGBMClassifier

In [None]:
clf_lgm = L1Learner(LGBMClassifier(), unique_name='LightGBM')
clf_lgm.clf.set_params(**{
  'n_estimators': 10000,
  'learning_rate': 0.02,
  'num_leaves': 34,
  'colsample_bytree': 0.9497036,
  'subsample': 0.8715623,
  'max_depth': 8,
  'reg_alpha':0.041545473,
  'reg_lambda':0.0735294,
  'min_split_gain': 0.0222415,
  'min_child_weight': 39.3259775
})

In [None]:
clf_lgm_2 = L1Learner(LGBMClassifier(), unique_name='LightGBM_2')
clf_lgm_2.clf.set_params(**{
  'n_estimators':200,
  'learning_rate':0.1,
  'num_leaves':123,
  'colsample_bytree':0.8,
  'subsample':0.9,
  'max_depth':15,
  'reg_alpha':0.1,
  'reg_lambda':0.1,
  'min_split_gain':0.01,
  'min_child_weight':2,
  'random_state': 42
})

### SGDClassifier

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
clf_sgd = L1Learner(SGDClassifier(), unique_name='SGDClassifier')
clf_sgd.clf.set_params(**{
  'n_iter': 1, 
  'warm_start': True, 
  'penalty':'l2', 
  'loss': 'log', 
  'learning_rate': 'constant', 
  'eta0': 0.0005, 
  'random_state':42, 
  'n_jobs':4
})

### ExtraTreesClassifier

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

In [None]:
clf_etc = L1Learner(ExtraTreesClassifier(), unique_name='ExtraTreesClassifier')
clf_etc.clf.set_params(**{
  'n_jobs': -1,
  'n_estimators': 200,
  'max_features': 0.5,
  'max_depth': 12,
  'min_samples_leaf': 2,
  'random_state': 42
})

### CatBoost

In [None]:
from catboost import CatBoostClassifier

In [None]:
clf_cat = L1Learner(CatBoostClassifier(), unique_name='CatBoosClassifier')
clf_cat.clf.set_params(**{
  'iterations': 200,
  'learning_rate': 0.5,
  'depth': 3,
  'l2_leaf_reg': 40,
  'bootstrap_type': 'Bernoulli',
  'subsample': 0.7,
  'scale_pos_weight': 5,
  'eval_metric': 'AUC',
  'od_type': 'Iter',
  'allow_writing_files': False,
  'random_state': 42
})

### Multi Layer Perceptron

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
clf_mlp = L1Learner(MLPClassifier(), unique_name='MLPClassifier')
clf_mlp.clf.set_params(**{
  'alpha': 0.1,
  'random_state': 42
})

----
## Train L1-Learners

In [None]:
l1_learners = []
l1_learners.append(clf_logreg)
l1_learners.append(clf_rf)
l1_learners.append(clf_rf_2)
l1_learners.append(clf_xgb)
l1_learners.append(clf_xgb_2)
l1_learners.append(clf_lgm)
l1_learners.append(clf_lgm_2)
l1_learners.append(clf_sgd)
l1_learners.append(clf_etc)
l1_learners.append(clf_cat)
l1_learners.append(clf_mlp)

In [None]:
import time
from sklearn.metrics import roc_auc_score
from os import listdir

In [None]:
def train_l1_learners(l1_learners, X_t, y_t, X_p, y_p):
  print("Number of Learners to train: {}".format(len(l1_learners)))
  for learner in l1_learners:
    # train clf
    start_time = time.time()
    print('Training {}'.format(learner))
    learner.clf.fit(X_t, y_t)
    
    # make predictions on X_pred
    learner.predictions = learner.clf.predict(X_p)
    learner.predictions_proba = learner.clf.predict_proba(X_p)
    # print score
    score = roc_auc_score(y_p, learner.predictions_proba[:,1])
    print("Score on X_pred (ROC AUC): {}".format(score))
    end_time = time.time()
    print("End (Elapsed time is {} min.)".format((end_time - start_time) / 60))
    print(40*'-')

In [None]:
train_l1_learners(l1_learners, X_train_base, y_train_base, X_pred_base, y_pred_base)

### Save/Load models and predictions

In [None]:
from sklearn.externals import joblib
stacking_models_dir = './stacking_models'

stacking_model_prefix = 'stacking_60_'
#stacking_model_prefix = 'stacking_75_'

In [None]:
# load learners
l1_learners = []

for f in os.listdir(stacking_models_dir):
  if f.startswith(stacking_model_prefix):
    l1_learners.append(joblib.load("{}/{}".format(stacking_models_dir, f)))

l1_learners

In [None]:
# save learners
for learner in l1_learners:
  joblib.dump(learner, '{}/{}{}.pkl'.format(stacking_models_dir, stacking_model_prefix, learner.unique_name))

In [None]:
from sklearn.metrics import roc_auc_score

def print_l1_learners_rocauc(l1_learners, y_p):
  for learner in l1_learners:
    print('CLF: {}'.format(learner))
    
    # print score
    score = roc_auc_score(y_p, learner.predictions_proba[:,1])
    print("Score on X_pred (ROC AUC): {}".format(score))
    print(40*'-')

In [None]:
print_l1_learners_rocauc(l1_learners, y_pred_base)

---
## Create new dataset with L1-Learners predictions + extra features

In [None]:
# Scaler for meta featuers
from sklearn.preprocessing import StandardScaler

scaler_meta = StandardScaler()
#scaler_meta = None

In [None]:
# add these features to the l1 predictions for training meta classifier
extra_features = ['AMT_INCOME_TOTAL', 'DAYS_BIRTH', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3',
                  'DAYS_EMPLOYED_PERC', 'INCOME_CREDIT_PERC', 'INCOME_PER_PERSON', 'ANNUITY_INCOME_PERC', 
                  'PAYMENT_RATE', 'EXT_SOURCES_MEAN', 'EXT_SOURCES_PRODUCT', 'NEW_PHONE_TO_BIRTH_RATIO']

In [None]:
extra_features = ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 
                 'INCOME_CREDIT_PERC', 'INCOME_PER_PERSON', 'ANNUITY_INCOME_PERC', 'PAYMENT_RATE', 'DAYS_BIRTH', 'DAYS_EMPLOYED']

In [None]:
extra_features = []

In [None]:
def create_meta_dataset(learners, X, extra_features, scaler_meta=None):
  X_train_meta = pd.DataFrame()
  for learner in learners:
    clf_name = learner.unique_name
    X_train_meta[clf_name] = learner.predictions_proba[:, 1]
    #X_train_meta[clf_name] = learner.predictions[:, 1]
    
  # Add extra features
  if len(extra_features) > 0:
    columns = prep_train.columns[2:]
    for col in extra_features:
      # get index of the column
      idx = np.where(prep_train.columns[2:].values == col)[0][0]
      X_train_meta[col] = X[:,idx]
      
  # save column names 
  columns = X_train_meta.columns
  
  # Scale data
  if scaler_meta:
    scaler_meta.fit(X_train_meta)
    X_train_meta = pd.DataFrame(scaler_meta.transform(X_train_meta), columns=columns)
    
  return X_train_meta

In [None]:
X_train_meta = create_meta_dataset(l1_learners, X_pred_base, extra_features, scaler_meta)

In [None]:
X_train_meta.head(10)

# Train meta classifier 

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
def print_cv_scores(grid_cv):
  print("Best Score: {}".format(grid_cv.best_score_))
  print("Best Params: {}".format(grid_cv.best_params_))
  for mean, std, params in zip(grid_cv.cv_results_['mean_test_score'], grid_cv.cv_results_['std_test_score'], grid_cv.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

### SGDClassifier

In [None]:
from sklearn.linear_model import SGDClassifier
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

In [None]:
clf_meta_sgd = SGDClassifier()

clf_meta_grid_sgd = {
  "loss": Categorical(['log', 'huber', 'modified_huber']),
  "penalty": Categorical(['l1', 'l2', 'elasticnet']),
  "l1_ratio": Real(0.0001, 1, prior='log-uniform'),
  "alpha": Real(0.00001, 100, prior='log-uniform'),
  "class_weight": Categorical([None, 'balanced']),
  #'learning_rate': Categorical(['constant', 'optimal', 'invscaling']),
  #'eta0': Integer(0.1, 1),
  'max_iter': Integer(5, 1000)
}

In [None]:
gridcv_meta_sgd = BayesSearchCV(clf_meta_sgd, search_spaces=clf_meta_grid_sgd, cv=5, n_jobs=-1, 
                                verbose=3, scoring='roc_auc', n_iter=20, random_state=42)
gridcv_meta_sgd.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_sgd)

In [None]:
joblib.dump(gridcv_meta_sgd, '{}/{}{}.pkl'.format(stacking_models_dir, 'meta_2_', 'sgd_without_domainfeatures'))

### LogisticRegression 

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf_meta_logreg = LogisticRegression()
clf_meta_grid_logreg = [
  {
    'C': [10, 100, 1000],
    'tol': [0.0004, 0.003, 0.002, 0.1],
    'class_weight' : [None, 'balanced'],
    'random_state': [42],
  },
]

In [None]:
gridcv_meta_logreg = GridSearchCV(clf_meta_logreg, clf_meta_grid_logreg, cv=5, n_jobs=-1, verbose=3, scoring='roc_auc')
gridcv_meta_logreg.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_logreg)

In [None]:
joblib.dump(gridcv_meta_sgd, '{}/{}{}.pkl'.format(stacking_models_dir, 'meta_2_', 'logreg_with_domainfeatures'))

### SVC 

In [None]:
from sklearn.svm import SVC

In [None]:
clf_meta_svc = SVC()
clf_meta_grid_svc = [
  {
    'C': [0.0001, 0.1, 1],
    'kernel': ['linear'],
#    'gamma': [0.01, 1.0],
    'class_weight': ['balanced'],
    'random_state': [42],
  },
]

In [None]:
gridcv_meta_svc = GridSearchCV(clf_meta_svc, clf_meta_grid_svc, cv=5, n_jobs=5, verbose=3, scoring='roc_auc')
gridcv_meta_svc.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_svc)

### XGBoost

In [None]:
from xgboost import XGBClassifier
clf_meta_xgb = XGBClassifier()

#clf_meta_grid_xgb = {
#  "n_estimators": [25, 100, 250, 300],
#  "max_depth": [4, 8 , 16],
#  "num_leaves": [15, 30]
#  "min_child_weight": [4, 8],
#  "subsample": [0.2, 0.8],
#  "learning_rate": [0.01],
#  "colsample_bytree": [0.8, 0.6],
#  "objective": ['binary:logistic'],
#  "random_state": [42]
#}

# best params
clf_meta_grid_xgb = {
  "n_estimators": [200, 300],
  "max_depth": [4],
  "min_child_weight": [8],
  "subsample": [0.2],
  "learning_rate": [0.01],
  "colsample_bytree": [0.8],
  "objective": ['binary:logistic'],
  "random_state": [42]
}

In [None]:
gridcv_meta_gdb = GridSearchCV(clf_meta_xgb, param_grid=clf_meta_grid_xgb, cv=5, n_jobs=-1, verbose=5, scoring='roc_auc')
gridcv_meta_gdb.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_gdb)

### XGBoost with Bayesian Optimization

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from xgboost import XGBClassifier

clf_meta_xgb = XGBClassifier(objective='binary:logistic', eval_metric='auc')

opt = BayesSearchCV(
  clf_meta_xgb, 
  search_spaces=
  {
    "n_estimators": Integer(50, 300),
    "max_depth": Integer(1, 50),
    "min_child_weight": Integer(1, 100),
    "subsample": Real(0.1, 0.9, prior='log-uniform'),
    "learning_rate": Real(0.001, 0.1, prior='log-uniform'),
    "colsample_bytree": Real(0.1, 0.9, prior='log-uniform')
  },
  n_iter=20,
  random_state=42,
  verbose=3,
  scoring = 'roc_auc',
  cv=5,
  n_jobs=-1
)

In [None]:
opt.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(opt)

### LightGBM

In [None]:
from lightgbm import LGBMClassifier

In [None]:
clf_meta_lgm = LGBMClassifier()

clf_meta_grid_lgm = {
  'n_estimators': [250, 280, 290, 300],
  'learning_rate': [0.02, 0.001],
  'num_leaves': [25, 30],
  'max_depth': [16, 20, 25],
  'random_state': [42]
}

In [None]:
gridcv_meta_lgm = GridSearchCV(clf_meta_lgm, param_grid=clf_meta_grid_lgm, cv=5, n_jobs=-1, verbose=5, scoring='roc_auc')
gridcv_meta_lgm.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_lgm)

### MultiLayer-Perceptron

In [None]:
from sklearn.neural_network import MLPClassifier
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

In [None]:
clf_meta_mlp = MLPClassifier()

clf_meta_grid_mlp = {
  #'hidden_layer_sizes': Categorical([(8,5)]),
  'hidden_layer_sizes': Categorical([(3,), (5,), (10,), (8,2)]),
  'activation': Categorical(['relu', 'logistic', 'tanh']),
  'epsilon': Real(1e-9, 1e-4, prior='log-uniform'),
  'alpha': Real(0.000000001, 2, prior='log-uniform'),
}

In [None]:
gridcv_meta_mlp = BayesSearchCV(clf_meta_mlp, search_spaces=clf_meta_grid_mlp, cv=5, n_jobs=-1, 
                                verbose=5, random_state=49, scoring='roc_auc', n_iter=70)
gridcv_meta_mlp.fit(X_train_meta, y_pred_base)

In [None]:
print_cv_scores(gridcv_meta_mlp)

In [None]:
joblib.dump(gridcv_meta_mlp, '{}/{}{}.pkl'.format(stacking_models_dir, 'meta_2_', 'mlp_without_domainfeatures'))

# Average L1-Learners

In [None]:
def average_l1_learners(l1_learners, Xtest):
  X_test_l1 = pd.DataFrame()
  # make prediction with L1-Learners
  for learner in l1_learners:
    X_test_l1[learner.unique_name] = learner.clf.predict_proba(Xtest)[:, 1]
    
  return np.mean(X_test_l1, axis=1)

In [None]:
y_test = average_l1_learners(l1_learners, X_test)

In [None]:
# Make the submission dataframe
submission = pd.DataFrame({'SK_ID_CURR': X_test_SK_IDS, 'TARGET': y_test})
submission.to_csv('average_stacking_submission.csv', index = False)

# Make prediciton for train set and submit csv

In [None]:
def make_prediction(l1_learners, meta_clf, Xtest, extra_features, scaler_meta):
  x_test_l1 = pd.DataFrame()
  
  # make prediction with L1-Learners
  for learner in l1_learners:
    x_test_l1[learner.unique_name] = learner.clf.predict_proba(Xtest)[:, 1]
    
  # Add extra features
  if len(extra_features) > 0:
    columns = prep_train.columns[2:]
    for col in extra_features:
      # get index of the column
      idx = np.where(prep_train.columns[2:].values == col)[0][0]
      x_test_l1[col] = Xtest[:,idx]
  
  if scaler_meta:
    # save column names
    columns = x_test_l1.columns    
    # scale 
    x_test_l1 = pd.DataFrame(scaler_meta.transform(x_test_l1), columns=columns)
  
  # make prediction with Meta Learner
  return meta_clf.predict_proba(x_test_l1)

In [None]:
#meta_clf = gridcv_meta_logreg.best_estimator_
#meta_clf = gridcv_meta_gdb.best_estimator_
#meta_clf = gridcv_meta_lgm.best_estimator_
#meta_clf = opt.best_estimator_
#meta_clf = gridcv_meta_sgd.best_estimator_
meta_clf = gridcv_meta_mlp.best_estimator_

y_test = make_prediction(l1_learners, meta_clf, X_test, extra_features, scaler_meta)

In [None]:
# Linear stretch of predictions to [0,1]
y_test_str = y_test[:, 1]
y_test_str = (y_test_str - y_test_str.min()) / (y_test_str.max() - y_test_str.min())

In [None]:
# Make the submission dataframe
submission = pd.DataFrame({'SK_ID_CURR': X_test_SK_IDS, 'TARGET': y_test[:,1]})
#submission = pd.DataFrame({'SK_ID_CURR': X_test_SK_IDS, 'TARGET': y_test_str})
submission.to_csv('./stacking_submissions/meta_paper/meta_2_mlp.csv', index = False)

## Plots

In [None]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_curve, auc

In [None]:
# Load Grids 
gridcv_meta_sgd___ = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_sgd_without_domainfeatures'))
gridcv_meta_sgd_df = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_sgd_with_domainfeatures'))

gridcv_meta_logreg___ = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_logreg_without_domainfeatures'))
gridcv_meta_logreg_df = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_logreg_with_domainfeatures'))

gridcv_meta_mlp___ = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_mlp_without_domainfeatures'))
gridcv_meta_mlp_df = joblib.load("{}/{}.pkl".format(stacking_models_dir, 'meta_2_mlp_with_domainfeatures'))

In [None]:
# SGD (no domain features)
#sgd_scores_no_domain = cross_val_predict(gridcv_meta_sgd___.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')
sgd_scores_with_domain = cross_val_predict(gridcv_meta_sgd_df.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')

#logreg_scores_no_domain = cross_val_predict(gridcv_meta_logreg___.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')
#logreg_scores_with_domain = cross_val_predict(gridcv_meta_logreg_df.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')

#mlp_scores_no_domain = cross_val_predict(gridcv_meta_mlp___.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')
#mlp_scores_with_domain = cross_val_predict(gridcv_meta_mlp_df.best_estimator_, X_train_meta, y_pred_base, cv=5, n_jobs=-1, method='predict_proba')

In [None]:
# SGD (no domain)
fpr, tpr, threshold = roc_curve(y_pred_base, sgd_scores_no_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='SGD (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.74842, 0.75080))

fpr, tpr, threshold = roc_curve(y_pred_base, sgd_scores_with_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='SGD with EF (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.75192, 0.75125))

fpr, tpr, threshold = roc_curve(y_pred_base, logreg_scores_no_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='LogReg (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.74802, 0.75065))

fpr, tpr, threshold = roc_curve(y_pred_base, logreg_scores_with_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='LogReg with EF (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.75007, 0.74938))

fpr, tpr, threshold = roc_curve(y_pred_base, mlp_scores_no_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='MLP (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.75456, 0.75693))

fpr, tpr, threshold = roc_curve(y_pred_base, mlp_scores_with_domain[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='MLP with EF (CV=%0.5f, PubLB=%0.5f, PriLB=%0.5f)' % (roc_auc, 0.75518, 0.75633))

plt.grid()
plt.title('ROC Meta-Learners with Stacking (Method A)', fontsize=14)
plt.xlabel('False positive rate', fontsize=14)
plt.ylabel('True positive rate', fontsize=14)
plt.legend(loc="lower right", prop={'size': 14})
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.show()

In [None]:
sns.reset_orig()