In [1]:
## ======================================================================
#            Importing the necessary modules and tools
## ======================================================================

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# from sklearn.metrics import necessary metrics
from sklearn.metrics import accuracy_score


# Set notebook options
# --------------------
pd.options.display.float_format = '{:,.3f}'.format
%matplotlib inline 

In [2]:
url = 'https://raw.githubusercontent.com/DrSaadLa/PythonTuts/main/TreeBasedModels/loan_data.csv'
df = pd.read_csv(url)

In [3]:
# Data Preprocessing
from sklearn.preprocessing import LabelEncoder
df['purpose']=LabelEncoder().fit_transform(df['purpose'])

In [4]:
# Features
X = df.drop('credit.policy', axis = 1)
# Target
y = df['credit.policy']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3 , random_state= 1,stratify= y)

In [6]:
## =============================================================
#         Training Bagging Classifier
## =============================================================

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

from sklearn.metrics import accuracy_score

# Instantiate dt
# --------------
dt = DecisionTreeClassifier(min_samples_leaf=8,
                            random_state=1)

# Instantiate bagging classifier
# ------------------------------
bc = BaggingClassifier(base_estimator= dt, 
                       n_estimators= 500 ,
                       oob_score=True,
                       random_state=1)

# Fit bc to the training set
# -------------------------
bc.fit(X_train, y_train)

# Predict test set labels
#-------------------------
y_pred = bc.predict(X_test)

# Evaluate test set accuracy
# --------------------------
test_acc = accuracy_score(y_pred, y_test)

# Evaluate OOB accuracy
# ---------------------
oob_acc = bc.oob_score_

# Print acc_test and acc_oob
# --------------------------
print("*"*50)
print('Test set accuracy: {:.3f}'.format(test_acc))
print("*"*50)
print('The OOB accuracy: {:.3f}'.format(oob_acc))

**************************************************
Test set accuracy: 0.991
**************************************************
The OOB accuracy: 0.989


In [7]:
# from sklearn.metrics import necessary metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.99      0.97      0.98       561
           1       0.99      1.00      0.99      2313

    accuracy                           0.99      2874
   macro avg       0.99      0.98      0.99      2874
weighted avg       0.99      0.99      0.99      2874

