In [2]:
## ======================================================================
#            Importing the necessary modules and tools
## ======================================================================

import pandas as pd;
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split


# Set notebook options
# --------------------
pd.options.display.float_format = '{:,.3f}'.format
%matplotlib inline

In [3]:
url = 'https://raw.githubusercontent.com/DrSaadLa/PythonTuts/main/TreeBasedModels/loan_data.csv'
df = pd.read_csv(url)

In [4]:
# Data Preprocessing
from sklearn.preprocessing import LabelEncoder
df['purpose']=LabelEncoder().fit_transform(df['purpose'])

In [5]:
# Features
X = df.drop('credit.policy', axis = 1)
# Target
y = df['credit.policy']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3 , random_state= 1,stratify= y)

In [7]:
# Scaling the data
# ---------------
# Import Preprocessing tools
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [10]:
# Import models that make the ensemble 
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import necessary metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
# Instantiate logreg
logreg = LogisticRegression(random_state=1)

# Instantiate knn
knn = KNN(n_neighbors=27)

# Instantiate dt
dt = DecisionTreeClassifier(min_samples_leaf= 0.13, 
                            random_state=1)

# Define the list classifiers
classifiers = [('Logistic Regression', logreg), 
               ('K Nearest Neighbours', knn), 
               ('Classification Tree', dt)]

# Iterate over the pre-defined list of classifiers
for clf_name, clf in classifiers:    
 
    # Fit clf to the training set
    clf.fit(X_train, y_train)    
   
    # Predict y_pred
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_pred, y_test) 
   
    # Evaluate clf's accuracy on the test set
    print('{:20}: {:.3f}'.format(clf_name,  accuracy))

Logistic Regression : 0.905
K Nearest Neighbours: 0.891
Classification Tree : 0.841


In [11]:
# Import VotingClassifier from sklearn.ensemble
from sklearn.ensemble import VotingClassifier

# Instantiate a VotingClassifier vc
vc = VotingClassifier(estimators=classifiers)     

# Fit vc to the training set
vc.fit(X_train, y_train)   

# Evaluate the test set predictions
y_pred = vc.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_pred, y_test)
print('Voting Classifier: {:.3f}'.format(accuracy))

Voting Classifier: 0.900


In [12]:
# from sklearn.metrics import necessary metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.85      0.59      0.70       561
           1       0.91      0.97      0.94      2313

    accuracy                           0.90      2874
   macro avg       0.88      0.78      0.82      2874
weighted avg       0.90      0.90      0.89      2874

