In [None]:
#17.7.2 Predict Loan Application Approval
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report


In [None]:
# Loading data
file_path = Path("./Resources/loans_data_encoded.csv")
df_loans = pd.read_csv(file_path)
df_loans.head()

In [None]:
# Define the features set.
X = df_loans.copy()
X = X.drop("bad", axis=1)
X.head()

In [None]:
# Define the target set.
y = df_loans["bad"].values
y[:5]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)


In [None]:
# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
(375, 10)
(125, 10)
(375, 1)
(125, 1)

In [None]:
# Splitting into Train and Test sets into an 80/20 split.
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, random_state=78, train_size=0.80)


In [None]:
# Determine the shape of our training and testing sets.
print(X_train2.shape)
print(X_test2.shape)
print(y_train2.shape)
print(y_test2.shape)

In [None]:
(400, 10)
(100, 10)
(400, 1)
(100, 1)

In [None]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Import numpy as np
# np.mean(X_train_scaled___)
# np.mean(X_test_scaled___)
# np.std(X_train_scaled___)
# np.std(X_test_scaled___)
# Which of the following should go in the blank spaces below to compute the mean and the standard deviation of the first column of the scaled data?
# [:,0]

In [None]:
# 17.7.3 Make Predictions and Evaluate Results
# Creating the decision tree classifier instance.
model = tree.DecisionTreeClassifier()
# Fitting the model.
model = model.fit(X_train_scaled, y_train)

In [None]:
# Making predictions using the testing data.
predictions = model.predict(X_test_scaled)

In [None]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

In [None]:
# What is the recall (sensitivity) for bad loans (Actual 1)?
# There are a total of 41 (22 + 19) bad loans. Nineteen out of 41 were classified as bad loans. 
# Therefore, the recall is 0.46 or 46%.


In [None]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)


In [None]:
# (True Positives (TP) + True Negatives (TN)) / Total = (50 + 19)/125 = 0.552


In [None]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))