In [1]:
## ======================================================================
#            Importing the necessary modules and tools
## ======================================================================

import pandas as pd; import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier

# Import BaggingClassifier
from sklearn.ensemble import BaggingClassifier

# from sklearn.metrics import necessary metrics
from sklearn.metrics import accuracy_score


# Set notebook options
# --------------------
pd.options.display.float_format = '{:,.3f}'.format
%matplotlib inline 

In [2]:
url = 'https://raw.githubusercontent.com/DrSaadLa/PythonTuts/main/TreeBasedModels/loan_data.csv'
df = pd.read_csv(url)

In [3]:
# Data Preprocessing
from sklearn.preprocessing import LabelEncoder
df['purpose']=LabelEncoder().fit_transform(df['purpose'])

In [4]:
# Features
X = df.drop('credit.policy', axis = 1)
# Target
y = df['credit.policy']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3 , random_state= 1,stratify= y)

In [6]:
## ========================================================
#.     Adaboost hyperparameter tuning with decision tree as
#.     a base learner using GridSeachCV Algorithm
## ========================================================
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier



# from sklearn.metrics import necessary metrics
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.model_selection import train_test_split

# Create the estimator with dtree
# -------------------------------
ada_tree = AdaBoostClassifier(
    base_estimator = DecisionTreeClassifier(
                        max_depth=3,                                    
                        random_state=1),             
                        random_state=1)

ada_param_grid = {'n_estimators': [10, 100],
                 'learning_rate': [0.01, 1]}

ada_gs = GridSearchCV(estimator=ada_tree ,
                       param_grid=ada_param_grid,
                       #cv = 10,
                       verbose = 1)

# Fitting the grid
# ---------------
ada_gs_model = ada_gs.fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [7]:
print("*"*40)

for hyper_name, val in ada_gs_model.best_params_.items():
    print("{:15s} : {}".format(hyper_name, val))
print("*"*40)
print("The best score: {:.4f}".format(ada_gs_model.best_score_))
print("*"*40)

****************************************
learning_rate   : 1
n_estimators    : 100
****************************************
The best score: 0.9908
****************************************


In [8]:
## =======================================================
#      Fitting the tuned adaboost
## =======================================================

dtree = DecisionTreeClassifier(max_depth=3, random_state=1)
#dtree.fit(X_train, Y_train)

# Tuning the hyperparams
ada_tuned = AdaBoostClassifier(
           base_estimator=DecisionTreeClassifier(max_depth=3, 
                                          random_state=1),                          
           n_estimators=100,                                                                              
           learning_rate=1,                                   
           random_state=1)

ada_tuned.fit(X_train, y_train)

# The accuracy of tuned model
# ---------------------------
print("*"*40)
print('The Tuned model accuracy is: {:.4}'.format(
    (ada_tuned.score(X_test,y_test)))
     )
print("*"*40)

# The Tuned model AUC score
# -------------------------
y_pred_ada_tuned = ada_tuned.predict_proba(X_test)
fpr_ab_tuned, tpr_ab_tuned, thresholds = roc_curve(y_test, y_pred_ada_tuned[:,1])
auc_adaboost_tune = auc(fpr_ab_tuned, tpr_ab_tuned)

print ('The Tuned Adaboost AUC Score is: {:.4f}'.format(auc_adaboost_tune))
print("*"*40)

****************************************
The Tuned model accuracy is: 0.9923
****************************************
The Tuned Adaboost AUC Score is: 0.9981
****************************************
