In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler,PolynomialFeatures
from sklearn.model_selection import train_test_split,GridSearchCV
import lightgbm as lgb
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.metrics import roc_auc_score

In [None]:
X = pd.read_csv("../input/creditriskessemble/features_43.csv")
y = pd.read_csv("../input/creditriskessemble/labels_train.csv")

In [None]:
def train_model(model,X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    model.fit(X, y, eval_metric = 'auc',
          eval_set = [(X_train, y_train), (X_test, y_test)],
          eval_names = ['train', 'valid'],
          early_stopping_rounds = 100, verbose = 200)
    prob = model.predict_proba(X_test)[:,1]
    return roc_auc_score(y_test,prob)

In [None]:
model_lgmb_100 = lgb.LGBMClassifier(n_estimators=100, objective = 'binary', 
                           class_weight = 'balanced',  learning_rate=0.05, 
                           reg_alpha = 0.1, reg_lambda = 0.1, 
                           subsample = 0.8, n_jobs = -1, random_state = 50)

roc = train_model(model_lgmb_100,X,y)
print("LGBM n_estimators=100 roc_auc_score",roc)

In [None]:
model_lgmb_400 = lgb.LGBMClassifier(n_estimators=400, objective = 'binary', 
                           class_weight = 'balanced',  learning_rate=0.05, 
                           reg_alpha = 0.1, reg_lambda = 0.1, 
                           subsample = 0.8, n_jobs = -1, random_state = 50)

roc = train_model(model_lgmb_400,X,y)
print("LGBM n_estimators=400 roc_auc_score",roc)

In [None]:
model_lgmb_1000= lgb.LGBMClassifier(n_estimators=1000, objective = 'binary', 
                           class_weight = 'balanced',  learning_rate=0.05, 
                           reg_alpha = 0.1, reg_lambda = 0.1, 
                           subsample = 0.8, n_jobs = -1, random_state = 50)

roc = train_model(model_lgmb_1000,X,y)
print("LGBM n_estimators=1000 roc_auc_score",roc)

In [None]:
model_lgmb_1500= lgb.LGBMClassifier(n_estimators=1500, objective = 'binary', 
                           class_weight = 'balanced',  learning_rate=0.05, 
                           reg_alpha = 0.1, reg_lambda = 0.1, 
                           subsample = 0.8, n_jobs = -1, random_state = 50)

roc = train_model(model_lgmb_1500,X,y)
print("LGBM n_estimators=1500 roc_auc_score",roc)

In [None]:
model_lgmb_2000= lgb.LGBMClassifier(n_estimators=2000, objective = 'binary', 
                           class_weight = 'balanced',  learning_rate=0.05, 
                           reg_alpha = 0.1, reg_lambda = 0.1, 
                           subsample = 0.8, n_jobs = -1, random_state = 50)

roc = train_model(model_lgmb_2000,X,y)
print("LGBM n_estimators=2000 roc_auc_score",roc)

In [None]:
!pip install xgboost

In [None]:
import xgboost
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=42)
xgboost_model = xgboost.XGBClassifier(eval_metric="auc",
                                          max_depth=6,
                                          learning_rate=0.25,
                                          gamma=4,
                                          use_label_encoder=False)

xgboost_model.fit(X_train, y_train)
prob = xgboost_model.predict_proba(X_test)[:,1]
print("xgboost_model roc_auc_score",roc_auc_score(y_test,prob))

In [None]:
from sklearn.ensemble import VotingClassifier
model_votting = VotingClassifier(estimators=[
('xgboost', xgboost_model),
('model_lgmb_100', model_lgmb_100),
('model_lgmb_400', model_lgmb_400),
('model_lgmb_1000', model_lgmb_1000),
('model_lgmb_1500', model_lgmb_1500),
('model_lgmb_2000', model_lgmb_2000),
("xboost",xgboost_model)
],
voting='soft')
model_votting.fit(X_train, y_train)
prob = model_votting.predict_proba(X_test)[:,1]
print("ROC",roc_auc_score(y_test,prob))

In [None]:
blend_models = [model_lgmb_100,model_lgmb_400,model_lgmb_1000,model_lgmb_1500,model_lgmb_2000,xgboost_model]

In [None]:
import pickle
# save the model to disk
filename = "blending_models.pk"  

with open(filename, 'wb') as file:  
    pickle.dump(blend_models, file)

In [None]:
class BlendingModel():
    def __init__(self, models):
        self.models = models
        self.blender = xgboost.XGBClassifier(eval_metric="auc",max_depth=3,learning_rate=0.25,gamma=4,use_label_encoder=False)
    
    def fit(self,X,y):
        results_map={}
        for i,model in enumerate(self.models):
            model.fit(X,y)
            results_map[i]=model.predict_proba(X)[:,1]        
        self.blender.fit(pd.DataFrame(results_map), y)
            
    def predict_proba(self,X):
        results_map={}
        for i,model in enumerate(self.models):
            results_map[i]=model.predict_proba(X)[:,1]           
        return self.blender.predict_proba(pd.DataFrame(results_map))
        
        

In [None]:
blending = BlendingModel(blend_models)

In [None]:
blending.fit(X_train,y_train)

In [None]:
prob = blending.predict_proba(X_test)[:,1]
print("Blending ROC",roc_auc_score(y_test,prob))

In [None]:
filename = "blending_model.pk"  

with open(filename, 'wb') as file:  
    pickle.dump(blending, file)

In [None]:
!pip install mlens

In [None]:

from mlens.ensemble import SuperLearner
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# --- Build ---

# Passing a scoring function will create cv scores during fitting
# the scorer should be a simple function accepting to vectors and returning a scalar
ensemble = SuperLearner(scorer=roc_auc_score, random_state=12345)

# Build the first layer
ensemble.add(blend_models)

# Attach the final meta estimator
ensemble.add_meta(xgboost_model)

# --- Use ---

# Fit ensemble
ensemble.fit(X_train,y_train)

# Predict
preds = ensemble.predict(X_test) 
print("SuperLearner ROC",roc_auc_score(y_test,preds))

In [None]:
preds

In [None]:
 ensemble.predict_proba(X_test) 