In [1]:
## ======================================================================
#            Importing the necessary modules and tools
## ======================================================================

import pandas as pd; import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier

# Import BaggingClassifier
from sklearn.ensemble import BaggingClassifier

# from sklearn.metrics import necessary metrics
from sklearn.metrics import accuracy_score


# Set notebook options
# --------------------
pd.options.display.float_format = '{:,.3f}'.format
%matplotlib inline 

In [2]:
url = 'https://raw.githubusercontent.com/DrSaadLa/PythonTuts/main/TreeBasedModels/loan_data.csv'
df = pd.read_csv(url)

In [3]:
# Data Preprocessing
from sklearn.preprocessing import LabelEncoder
df['purpose']=LabelEncoder().fit_transform(df['purpose'])

In [4]:
# Features
X = df.drop('credit.policy', axis = 1)
# Target
y = df['credit.policy']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3 , random_state= 1,stratify= y)

In [6]:
## =========================================================
#.      Gradient Boosting Classifier Hyperparameter Tuning 
#.      using GridSearch CV
## =========================================================

# Import GridsearchCV
# -------------------
from sklearn.model_selection import GridSearchCV
# Import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Create a dictionary of the grid
# -----------------------------
gbm_grid = {
    "n_estimators":[100,150,200],
    "learning_rate": [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 1],
    "max_depth":[3, 5, 8],
    "max_features":["log2","sqrt"],
    }

# Instantiate the grid search CV
# ------------------------------
grid_gb = GridSearchCV(
            estimator=GradientBoostingClassifier(random_state=1),
            param_grid= gbm_grid,
            cv=5, 
            n_jobs=8,
            verbose = 1) 

In [7]:
# Fit the grid seach object to the training data
# ---------------------------------------------
grid_gb.fit(X_train, y_train)

Fitting 5 folds for each of 144 candidates, totalling 720 fits


GridSearchCV(cv=5, estimator=GradientBoostingClassifier(random_state=1),
             n_jobs=8,
             param_grid={'learning_rate': [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9,
                                           1],
                         'max_depth': [3, 5, 8],
                         'max_features': ['log2', 'sqrt'],
                         'n_estimators': [100, 150, 200]},
             verbose=1)

In [8]:
# Print the best parameters
# -------------------------

print("*"*40)
for hyper_name, val in grid_gb.best_params_.items():
    print("{0:18}: {1}".format(hyper_name, val))

# print the best score
# ---------------------
print("*"*40)
print('The best achieved score: {:.5f}'.format(grid_gb.best_score_))
print("*"*40)

****************************************
learning_rate     : 0.1
max_depth         : 5
max_features      : log2
n_estimators      : 200
****************************************
The best achieved score: 0.99090
****************************************


In [10]:
# from sklearn.metrics import necessary metrics

from sklearn.metrics import roc_auc_score , accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

# Train the best classifier
# --------------------------
best_clf = GradientBoostingClassifier(
              random_state=1,
              learning_rate=0.7, 
              max_depth=3,
              max_features='sqrt', 
              n_estimators=100)
# Fit the best classifier
# ------------------------
best_clf.fit(X_train, y_train)

# Make predictions
# ----------------

best_preds = best_clf.predict(X_test)

print("The Gradient Boosting Classification report".center(60))
print("*"*60)
print(classification_report(y_test, best_preds))
print("*"*60)

        The Gradient Boosting Classification report         
************************************************************
              precision    recall  f1-score   support

           0       0.98      0.96      0.97       561
           1       0.99      1.00      0.99      2313

    accuracy                           0.99      2874
   macro avg       0.99      0.98      0.98      2874
weighted avg       0.99      0.99      0.99      2874

************************************************************
