<a href="https://colab.research.google.com/github/njgeorge000158/Credit-Risk-Classification-Using-Scikit-Learn/blob/main/credit_risk_classification_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#*******************************************************************************************
 #
 #  File Name:  credit_risk_classification_colab.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, credit_risk_classification_colab.ipynb, reads
 #      a csv file, lending_data.csv, in the folder, resources, and uses Python and
 #      supervised learning methods to use a dataset of historical lending activity from
 #      a peer-to-peer lending services company to compare models that can identify
 #      the creditworthiness of borrowers. Here is a list of the models:
 #
 #      logistic regression
 #      decision tree
 #      random forest
 #      support vector machine (SVM)
 #      k-nearest neighbor (KNN)
 #      gaussian naive bayes (GNB)
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  11/25/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/gdrive/')

import sys
sys.path.insert(0,'./gdrive/MyDrive/credit_risk_classification')

import os
os.environ['HV_DOC_HTML'] = 'true'

!apt-get update
!apt install firefox
!pip install -U geckodriver
!pip install -U dataframe_image
!pip install -U selenium
!pip install -U kaleido
!pip install -U hvplot
!pip install -U plotly
!pip install -U panel
!pip install -U bokeh
!pip install -U imblearn

import hvplot
import hvplot.pandas

import pandas as pd

import holoviews as hv
hv.extension('bokeh')

import logx
logx.set_logs_directory_path('./gdrive/MyDrive/credit_risk_classification/logs')
logx.set_images_directory_path('./gdrive/MyDrive/credit_risk_classification/images')

import pandasx
pandasx.set_google_colab(True)

import classificationsx
import credit_risk_constants

import copy
import pickle

import numpy as np
import pandas as pd

from IPython.display import clear_output

from imblearn.combine import SMOTEENN
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.under_sampling import RandomUnderSampler

from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

pd.options.mode.chained_assignment = None

Mounted at /content/gdrive/
Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Get:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Get:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,077 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [1,756 kB]
Get:13 http

In [None]:
CONSTANT_LOCAL_FILE_NAME = 'credit_risk_classification_colab.ipynb'

CONSTANT_LENDING_DATA_CSV_FILE_PATH = '/gdrive/MyDrive/credit_risk_classification/resources/lending_data.csv'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('credit_risk_classification_colab')

Program execution begins...



# <br> **Section 1: Extraction and Transformation**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [None]:
data_type_dictionary \
    = {'loan_size': float,
       'interest_rate':	float,
       'borrower_income': int,
       'debt_to_income': float,
       'num_of_accounts': int,
       'derogatory_marks': int,
       'total_debt': int,
       'loan_status': int}

lending_dataframe \
    = pd.read_csv(credit_risk_constants.CONSTANT_INPUT_FILE_PATH, dtype = data_type_dictionary)

logx.log_write_object(lending_dataframe)

## **1.2: Display Spam DataFrame**

In [None]:
pandasx.return_formatted_table(lending_dataframe, 'Table 1.1: Lending Data Table') \
    .format({'loan_size': '${:,.0f}', 'interest_rate': '{:.2f}%',
             'borrower_income': '${:,.0f}', 'total_debt': '${:,.0f}'})

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status
"$10,700",7.67%,"$52,800",0.431818,5,1,"$22,800",0
"$8,400",6.69%,"$43,600",0.311927,3,0,"$13,600",0
"$9,000",6.96%,"$46,100",0.349241,3,0,"$16,100",0
"$10,700",7.66%,"$52,700",0.43074,5,1,"$22,700",0
"$10,800",7.70%,"$53,000",0.433962,5,1,"$23,000",0
"$10,100",7.44%,"$50,600",0.407115,4,1,"$20,600",0
"$10,300",7.49%,"$51,100",0.412916,4,1,"$21,100",0
"$8,800",6.86%,"$45,100",0.334812,3,0,"$15,100",0
"$9,300",7.10%,"$47,400",0.367089,3,0,"$17,400",0
"$9,700",7.25%,"$48,800",0.385246,4,0,"$18,800",0


## **1.3: Create the labels series (`y`)  from the “spam” column, and then create the features (`X`) DataFrame from the remaining columns.**

### **Separate the Y Variable, The Labels**

In [None]:
y_series = lending_dataframe['loan_status']

logx.log_write_object(y_series)

### **Review the Y Series**

In [None]:
pandasx.return_formatted_table(y_series.to_frame(), 'Table 1.3.1: Credit Risk Target Series')

loan_status
0
0
0
0
0
0
0
0
0
0


### **Check the Balance of the Labels Variable (`y`) by Using the `value_counts` Function.**

In [None]:
y_series.value_counts()

loan_status
0    75036
1     2500
Name: count, dtype: int64

### **Separate the X Variable, the Features**

In [None]:
x_dataframe = lending_dataframe.drop(columns = 'loan_status', axis = 1)

logx.log_write_object(x_dataframe)

### **Review the X DataFrame**

In [None]:
pandasx.return_formatted_table(x_dataframe, 'Table 1.3.2: Credit Risk Features DataFrame')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
10700.0,7.67,52800,0.43,5,1,22800
8400.0,6.69,43600,0.31,3,0,13600
9000.0,6.96,46100,0.35,3,0,16100
10700.0,7.66,52700,0.43,5,1,22700
10800.0,7.7,53000,0.43,5,1,23000
10100.0,7.44,50600,0.41,4,1,20600
10300.0,7.49,51100,0.41,4,1,21100
8800.0,6.86,45100,0.33,3,0,15100
9300.0,7.1,47400,0.37,3,0,17400
9700.0,7.25,48800,0.39,4,0,18800


## **1.4: Split the Data into Training and Testing Datasets by Using `train_test_split`.**

In [None]:
x_train_dataframe, x_test_dataframe, \
y_train_series, y_test_series \
    = train_test_split \
        (x_dataframe, y_series,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1)

logx.log_write_object(x_train_dataframe)

logx.log_write_object(x_test_dataframe)

logx.log_write_object(y_train_series)

logx.log_write_object(y_test_series)

## **1.5: Use the StandardScaler to Scale the X Variables**

### **Scale Training and Test Data as Numpy Arrays**

In [None]:
x_train_scaled_nparray = StandardScaler().fit_transform(x_train_dataframe)

logx.log_write_object(x_train_scaled_nparray)

In [None]:
x_test_scaled_nparray = StandardScaler().fit_transform(x_test_dataframe)

logx.log_write_object(x_test_scaled_nparray)

### **Create Scaled X Variable DataFrames**

In [None]:
x_train_scaled_dataframe \
    = pd.DataFrame \
        (x_train_scaled_nparray,
         columns = x_train_dataframe.columns,
         index = x_train_dataframe.index)

logx.log_write_object(x_train_scaled_dataframe)

In [None]:
x_test_scaled_dataframe \
    = pd.DataFrame \
        (x_test_scaled_nparray,
         columns = x_test_dataframe.columns,
         index = x_test_dataframe.index)

logx.log_write_object(x_test_scaled_dataframe)

### **Display Scaled Training and Testing Data**

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_dataframe,
     'Table 1.5.1: Credit Risk Scaled Features Training Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.34,-0.33,-0.34,-0.29,-0.43,-0.67,-0.34
-0.57,-0.6,-0.6,-0.68,-0.43,-0.67,-0.6
0.38,0.39,0.39,0.63,0.61,1.04,0.39
-0.57,-0.57,-0.56,-0.63,-0.43,-0.67,-0.56
-1.0,-0.98,-0.98,-1.33,-0.96,-0.67,-0.98
-0.1,-0.11,-0.11,0.02,0.09,-0.67,-0.11
-0.77,-0.78,-0.78,-0.98,-0.96,-0.67,-0.78
-1.05,-1.06,-1.06,-1.49,-0.96,-0.67,-1.06
-0.96,-0.97,-0.97,-1.31,-0.96,-0.67,-0.97
0.19,0.17,0.18,0.38,0.09,1.04,0.18


In [None]:
pandasx.return_formatted_table \
    (x_test_scaled_dataframe,
     'Table 1.5.2: Credit Risk Scaled Features Test Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.15,-0.13,-0.14,-0.02,0.09,-0.68,-0.14
0.04,0.05,0.04,0.22,0.09,-0.68,0.04
-0.68,-0.66,-0.65,-0.77,-0.44,-0.68,-0.65
-0.97,-0.96,-0.95,-1.29,-0.97,-0.68,-0.95
0.33,0.35,0.36,0.59,0.09,1.04,0.36
0.24,0.25,0.25,0.47,0.09,1.04,0.25
-0.05,-0.05,-0.04,0.11,0.09,-0.68,-0.04
-0.15,-0.17,-0.17,-0.06,0.09,-0.68,-0.17
0.04,0.03,0.03,0.2,0.09,-0.68,0.03
0.33,0.35,0.35,0.58,0.09,1.04,0.35


# <br> **Section 2: Undersampled and OverSampled Spam Data**

## **2.1: Instantiate the Random Undersampler Instance**

In [None]:
x_train_scaled_undersampled_dataframe, y_train_undersampled_series \
    = RandomUnderSampler(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_undersampled_dataframe)

logx.log_write_object(y_train_undersampled_series)

## **2.2: Instantiate the Random Oversampler Instance**

In [None]:
x_train_scaled_oversampled_dataframe, y_train_oversampled_series \
    = RandomOverSampler(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_undersampled_dataframe)

logx.log_write_object(y_train_undersampled_series)

## **2.3: Instantiate the Cluster Centroids Instance**

In [None]:
x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series \
    = ClusterCentroids \
        (estimator \
             = KMeans(n_init = 'auto', random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_2),
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_cluster_centroids_dataframe)

logx.log_write_object(y_train_cluster_centroids_series)

## **2.4: Instantiate the SMOTE Instance**

In [None]:
x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series \
    = SMOTE(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1, sampling_strategy = 'auto') \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_SMOTE_dataframe)

logx.log_write_object(y_train_SMOTE_series)

## **2.5: Instantiate the SMOTEENN Instance**

In [None]:
x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series \
    = SMOTEENN(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_SMOTEENN_dataframe)

logx.log_write_object(y_train_SMOTEENN_series)

## **2.6: Check the Balance of the Labels Variable (`y`) by Using the `value_counts` Function.**

In [None]:
y_train_undersampled_series.value_counts()

loan_status
0    1890
1    1890
Name: count, dtype: int64

In [None]:
y_train_oversampled_series.value_counts()

loan_status
0    56262
1    56262
Name: count, dtype: int64

In [None]:
y_train_cluster_centroids_series.value_counts()

loan_status
0    1890
1    1890
Name: count, dtype: int64

In [None]:
y_train_SMOTE_series.value_counts()

loan_status
0    56262
1    56262
Name: count, dtype: int64

In [None]:
y_train_SMOTEENN_series.value_counts()

loan_status
0    55873
1    54849
Name: count, dtype: int64

## **2.7: Display Normalized Resampled Training and Testing Data**

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_undersampled_dataframe,
     'Table 2.7.1: Scaled Features Training Undersampled Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.62,-0.63,-0.63,-0.74,-0.43,-0.67,-0.63
1.05,1.05,1.06,1.3,1.14,1.04,1.06
0.14,0.12,0.12,0.31,0.09,1.04,0.12
0.19,0.21,0.2,0.41,0.09,1.04,0.2
-0.1,-0.1,-0.1,0.04,0.09,-0.67,-0.1
0.71,0.71,0.71,0.97,0.61,1.04,0.71
-0.81,-0.81,-0.81,-1.04,-0.96,-0.67,-0.81
-0.48,-0.49,-0.49,-0.52,-0.43,-0.67,-0.49
-0.62,-0.64,-0.63,-0.74,-0.43,-0.67,-0.63
0.62,0.6,0.61,0.86,0.61,1.04,0.61


In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_oversampled_dataframe,
     'Table 2.7.2: Scaled Features Training Oversampled Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.34,-0.33,-0.34,-0.29,-0.43,-0.67,-0.34
-0.57,-0.6,-0.6,-0.68,-0.43,-0.67,-0.6
0.38,0.39,0.39,0.63,0.61,1.04,0.39
-0.57,-0.57,-0.56,-0.63,-0.43,-0.67,-0.56
-1.0,-0.98,-0.98,-1.33,-0.96,-0.67,-0.98
-0.1,-0.11,-0.11,0.02,0.09,-0.67,-0.11
-0.77,-0.78,-0.78,-0.98,-0.96,-0.67,-0.78
-1.05,-1.06,-1.06,-1.49,-0.96,-0.67,-1.06
-0.96,-0.97,-0.97,-1.31,-0.96,-0.67,-0.97
0.19,0.17,0.18,0.38,0.09,1.04,0.18


In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_cluster_centroids_dataframe,
     'Table 2.7.3: Scaled Features Training Cluster Centroids Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
0.81,0.8,0.8,1.05,0.61,1.04,0.8
-0.48,-0.48,-0.48,-0.5,-0.43,-0.67,-0.48
-1.1,-1.08,-1.09,-1.53,-0.96,-0.67,-1.09
4.81,4.8,4.8,3.52,4.81,2.76,4.8
0.28,0.3,0.3,0.52,0.09,1.04,0.3
-0.05,-0.05,-0.05,0.1,0.09,-0.67,-0.05
-0.77,-0.75,-0.74,-0.92,-0.96,-0.67,-0.74
-0.29,-0.27,-0.28,-0.21,-0.43,-0.67,-0.28
-1.62,-1.63,-1.62,-2.69,-1.48,-0.67,-1.62
0.38,0.37,0.37,0.6,0.61,1.04,0.37


In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_SMOTE_dataframe,
     'Table 2.7.4: Scaled Features Training SMOTE Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.34,-0.33,-0.34,-0.29,-0.43,-0.67,-0.34
-0.57,-0.6,-0.6,-0.68,-0.43,-0.67,-0.6
0.38,0.39,0.39,0.63,0.61,1.04,0.39
-0.57,-0.57,-0.56,-0.63,-0.43,-0.67,-0.56
-1.0,-0.98,-0.98,-1.33,-0.96,-0.67,-0.98
-0.1,-0.11,-0.11,0.02,0.09,-0.67,-0.11
-0.77,-0.78,-0.78,-0.98,-0.96,-0.67,-0.78
-1.05,-1.06,-1.06,-1.49,-0.96,-0.67,-1.06
-0.96,-0.97,-0.97,-1.31,-0.96,-0.67,-0.97
0.19,0.17,0.18,0.38,0.09,1.04,0.18


In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_SMOTEENN_dataframe,
     'Table 2.7.5: Scaled Features Training SMOTEENN Data')

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt
-0.34,-0.33,-0.34,-0.29,-0.43,-0.67,-0.34
-0.57,-0.6,-0.6,-0.68,-0.43,-0.67,-0.6
0.38,0.39,0.39,0.63,0.61,1.04,0.39
-0.57,-0.57,-0.56,-0.63,-0.43,-0.67,-0.56
-1.0,-0.98,-0.98,-1.33,-0.96,-0.67,-0.98
-0.1,-0.11,-0.11,0.02,0.09,-0.67,-0.11
-0.77,-0.78,-0.78,-0.98,-0.96,-0.67,-0.78
-1.05,-1.06,-1.06,-1.49,-0.96,-0.67,-1.06
-0.96,-0.97,-0.97,-1.31,-0.96,-0.67,-0.97
0.19,0.17,0.18,0.38,0.09,1.04,0.18


# <br> **Section 3: Logistic Regression Models**

## **3.1: Fit Models by Using the Scaled Training Data**

### **Original**

In [None]:
lr_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_LR_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_model \
    = LogisticRegression \
        (class_weight = lr_grid_search_model.best_params_['class_weight'],
         solver = lr_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
lr_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_undersampled_model \
    = LogisticRegression \
        (class_weight = lr_undersampled_grid_search_model.best_params_['class_weight'],
         solver = lr_undersampled_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
lr_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_oversampled_model \
    = LogisticRegression \
        (class_weight = lr_oversampled_grid_search_model.best_params_['class_weight'],
         solver = lr_oversampled_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
lr_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_CLUSTER_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_cluster_centroids_model \
    = LogisticRegression \
        (class_weight = lr_cluster_centroids_grid_search_model.best_params_['class_weight'],
         solver = lr_cluster_centroids_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
lr_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_SMOTE_model \
    = LogisticRegression \
        (class_weight = lr_SMOTE_grid_search_model.best_params_['class_weight'],
         solver = lr_SMOTE_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
lr_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_SMOTEENN_model \
    = LogisticRegression \
        (class_weight = lr_SMOTEENN_grid_search_model.best_params_['class_weight'],
         solver = lr_SMOTEENN_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **3.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = logistic_regression_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from scaled training data is 99.44%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from scaled test data is 99.33%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = logistic_regression_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from undersampled scaled training data is 99.44%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from undersampled scaled test data is 99.34%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = logistic_regression_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from overersampled scaled training data is 99.43%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from oversampled scaled test data is 99.34%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = logistic_regression_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from cluster centroids scaled training data is 99.46%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from cluster centroids scaled test data is 99.37%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = logistic_regression_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from SMOTE scaled training data is 99.43%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from SMOTE scaled test data is 99.34%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = logistic_regression_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe logistic regression model score from SMOTEENN scaled training data is 99.42%[0m


In [None]:
accuracy_score_test_float \
    = logistic_regression_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe logistic regression model score from SMOTEENN scaled test data is 99.30%[0m


## **3.3: Calculate Training and Test Predictions**

### **Original**

In [None]:
lr_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_predictions_dataframe)

In [None]:
lr_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
lr_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_undersampled_predictions_dataframe)

In [None]:
lr_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
lr_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_oversampled_predictions_dataframe)

In [None]:
lr_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
lr_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_cluster_centroids_predictions_dataframe)

In [None]:
lr_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
lr_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_SMOTE_predictions_dataframe)

In [None]:
lr_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
lr_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_SMOTEENN_predictions_dataframe)

In [None]:
lr_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_SMOTEENN_predictions_dataframe)

# <br> **Section 4: Decision Tree Models**

## **4.1: Fit Models by Using the Scaled Training Data**

### **Original**

In [None]:
dt_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_DT_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_model \
    = DecisionTreeClassifier \
        (criterion = dt_grid_search_model.best_params_['criterion'],
         splitter = dt_grid_search_model.best_params_['splitter'],
         class_weight = dt_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
dt_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_undersampled_model \
    = DecisionTreeClassifier \
        (criterion = dt_undersampled_grid_search_model.best_params_['criterion'],
         splitter = dt_undersampled_grid_search_model.best_params_['splitter'],
         class_weight = dt_undersampled_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
dt_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_oversampled_model \
    = DecisionTreeClassifier \
        (criterion = dt_oversampled_grid_search_model.best_params_['criterion'],
         splitter = dt_oversampled_grid_search_model.best_params_['splitter'],
         class_weight = dt_oversampled_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
dt_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_CLUSTER_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_cluster_centroids_model \
    = DecisionTreeClassifier \
        (criterion = dt_cluster_centroids_grid_search_model.best_params_['criterion'],
         splitter = dt_cluster_centroids_grid_search_model.best_params_['splitter'],
         class_weight = dt_cluster_centroids_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
dt_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_SMOTE_model \
    = DecisionTreeClassifier \
        (criterion = dt_SMOTE_grid_search_model.best_params_['criterion'],
         splitter = dt_SMOTE_grid_search_model.best_params_['splitter'],
         class_weight = dt_SMOTE_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
dt_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_SMOTEENN_model \
    = DecisionTreeClassifier \
        (criterion = dt_SMOTEENN_grid_search_model.best_params_['criterion'],
         splitter = dt_SMOTEENN_grid_search_model.best_params_['splitter'],
         class_weight = dt_SMOTEENN_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **4.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = decision_tree_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from scaled training data is 99.63%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from scaled test data is 98.88%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = decision_tree_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from undersampled scaled training data is 99.05%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from undersampled scaled test data is 98.82%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = decision_tree_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from overersampled scaled training data is 99.57%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from oversampled scaled test data is 98.81%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = decision_tree_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from cluster centroids scaled training data is 99.38%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from cluster centroids scaled test data is 99.01%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = decision_tree_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from SMOTE scaled training data is 99.75%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from SMOTE scaled test data is 55.65%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = decision_tree_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe decision tree model score from SMOTEENN scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = decision_tree_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe decision tree model score from SMOTEENN scaled test data is 87.83%[0m


## **4.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
dt_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_predictions_dataframe)

In [None]:
dt_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
dt_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_undersampled_predictions_dataframe)

In [None]:
dt_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
dt_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_oversampled_predictions_dataframe)

In [None]:
dt_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
dt_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_cluster_centroids_predictions_dataframe)

In [None]:
dt_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
dt_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_SMOTE_predictions_dataframe)

In [None]:
dt_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
dt_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_SMOTEENN_predictions_dataframe)

In [None]:
dt_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_SMOTEENN_predictions_dataframe)

# <br> **Section 5: Random Forest Models**

## **5.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
rf_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_RF_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_model \
    = RandomForestClassifier \
        (criterion = rf_grid_search_model.best_params_['criterion'],
         class_weight = rf_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
rf_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_undersampled_model \
    = RandomForestClassifier \
        (criterion = rf_undersampled_grid_search_model.best_params_['criterion'],
         class_weight = rf_undersampled_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
rf_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_oversampled_model \
    = RandomForestClassifier \
        (criterion = rf_oversampled_grid_search_model.best_params_['criterion'],
         class_weight = rf_oversampled_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
rf_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_CLUSTER_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_cluster_centroids_model \
    = RandomForestClassifier \
        (criterion = rf_cluster_centroids_grid_search_model.best_params_['criterion'],
         class_weight = rf_cluster_centroids_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
rf_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_SMOTE_model \
    = RandomForestClassifier \
        (criterion = rf_SMOTE_grid_search_model.best_params_['criterion'],
         class_weight = rf_SMOTE_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
rf_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_SMOTEENN_model \
    = RandomForestClassifier \
        (criterion = rf_SMOTEENN_grid_search_model.best_params_['criterion'],
         class_weight = rf_SMOTEENN_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **5.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = random_forest_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from scaled training data is 99.75%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from scaled test data is 99.25%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = random_forest_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from undersampled scaled training data is 99.10%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from undersampled scaled test data is 99.25%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = random_forest_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from overersampled scaled training data is 99.57%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from oversampled scaled test data is 99.15%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = random_forest_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from cluster centroids scaled training data is 99.43%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from cluster centroids scaled test data is 99.10%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = random_forest_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from SMOTE scaled training data is 99.75%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from SMOTE scaled test data is 74.56%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = random_forest_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe random forest model score from SMOTEENN scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = random_forest_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe random forest model score from SMOTEENN scaled test data is 90.09%[0m


## **5.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
rf_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_predictions_dataframe)

In [None]:
rf_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
rf_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_undersampled_predictions_dataframe)

In [None]:
rf_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
rf_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_oversampled_predictions_dataframe)

In [None]:
rf_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
rf_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_cluster_centroids_predictions_dataframe)

In [None]:
rf_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
rf_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_SMOTE_predictions_dataframe)

In [None]:
rf_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
rf_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_SMOTEENN_predictions_dataframe)

In [None]:
rf_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_SMOTEENN_predictions_dataframe)

# <br> **Section 6: Support Vector Machine (SVM) Models**

## **6.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
svm_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
svm_undersampled_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
svm_oversampled_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
svm_cluster_centroids_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
svm_SMOTE_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
svm_SMOTEENN_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **6.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = svm_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = svm_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from scaled test data is 99.37%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = svm_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from undersampled scaled training data is 99.45%[0m


In [None]:
accuracy_score_test_float \
    = svm_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from undersampled scaled test data is 99.35%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = svm_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from overersampled scaled training data is 99.45%[0m


In [None]:
accuracy_score_test_float \
    = svm_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from oversampled scaled test data is 99.34%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = svm_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from cluster centroids scaled training data is 99.44%[0m


In [None]:
accuracy_score_test_float \
    = svm_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from cluster centroids scaled test data is 99.33%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = svm_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from SMOTE scaled training data is 99.44%[0m


In [None]:
accuracy_score_test_float \
    = svm_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from SMOTE scaled test data is 99.34%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = svm_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe svm model score from SMOTEENN scaled training data is 99.46%[0m


In [None]:
accuracy_score_test_float \
    = svm_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe svm model score from SMOTEENN scaled test data is 99.36%[0m


## **6.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
svm_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_predictions_dataframe)

In [None]:
svm_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
svm_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_undersampled_predictions_dataframe)

In [None]:
svm_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
svm_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_oversampled_predictions_dataframe)

In [None]:
svm_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
svm_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_cluster_centroids_predictions_dataframe)

In [None]:
svm_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
svm_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_SMOTE_predictions_dataframe)

In [None]:
svm_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
svm_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_SMOTEENN_predictions_dataframe)

In [None]:
svm_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_SMOTEENN_predictions_dataframe)

# <br> **Section 7: K-Nearest Neighbor (KNN) Models**

## **7.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
knn_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_KNN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_model \
    = KNeighborsClassifier \
        (algorithm = knn_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
knn_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_undersampled_model \
    = KNeighborsClassifier \
        (algorithm = knn_undersampled_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
knn_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_oversampled_model \
    = KNeighborsClassifier \
        (algorithm = knn_oversampled_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
knn_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_CLUSTER_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_cluster_centroids_model \
    = KNeighborsClassifier \
        (algorithm = knn_cluster_centroids_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
knn_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_SMOTE_model \
    = KNeighborsClassifier \
        (algorithm = knn_SMOTE_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
knn_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_SMOTEENN_model \
    = KNeighborsClassifier \
        (algorithm = knn_SMOTEENN_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **7.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = knn_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from scaled training data is 99.48%[0m


In [None]:
accuracy_score_test_float \
    = knn_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from scaled test data is 99.28%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = knn_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from undersampled scaled training data is 99.45%[0m


In [None]:
accuracy_score_test_float \
    = knn_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from undersampled scaled test data is 99.35%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = knn_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from overersampled scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = knn_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from oversampled scaled test data is 99.31%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = knn_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from cluster centroids scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = knn_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from cluster centroids scaled test data is 99.37%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = knn_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from SMOTE scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = knn_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from SMOTE scaled test data is 94.70%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = knn_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe knn model score from SMOTEENN scaled training data is 99.46%[0m


In [None]:
accuracy_score_test_float \
    = knn_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe knn model score from SMOTEENN scaled test data is 97.45%[0m


## **7.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
knn_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_predictions_dataframe)

In [None]:
knn_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
knn_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_undersampled_predictions_dataframe)

In [None]:
knn_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
knn_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_oversampled_predictions_dataframe)

In [None]:
knn_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
knn_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_cluster_centroids_predictions_dataframe)

In [None]:
knn_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
knn_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_SMOTE_predictions_dataframe)

In [None]:
knn_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
knn_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_SMOTEENN_predictions_dataframe)

In [None]:
knn_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_SMOTEENN_predictions_dataframe)

# <br> **Section 8: Gaussian Naive Bayes (GNB) Models**

## **8.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
gnb_model \
    = GaussianNB().fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
gnb_undersampled_model \
    = GaussianNB().fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
gnb_oversampled_model \
    = GaussianNB().fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
gnb_cluster_centroids_model \
    = GaussianNB().fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **SMOTE**

In [None]:
gnb_SMOTE_model \
    = GaussianNB().fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **SMOTEENN**

In [None]:
gnb_SMOTEENN_model \
    = GaussianNB().fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **8.2: Display the Model Scores Using the Scaled Training and Testing Data**

### **Original**

In [None]:
accuracy_score_train_float \
    = gnb_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = gnb_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from scaled test data is 99.37%[0m


### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = gnb_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from undersampled scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = gnb_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from undersampled scaled test data is 99.37%[0m


### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = gnb_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from overersampled scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = gnb_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from oversampled scaled test data is 99.37%[0m


### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = gnb_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from cluster centroids scaled training data is 99.44%[0m


In [None]:
accuracy_score_test_float \
    = gnb_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from cluster centroids scaled test data is 99.33%[0m


### **SMOTE**

In [None]:
accuracy_score_train_float \
    = gnb_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from SMOTE scaled training data is 99.47%[0m


In [None]:
accuracy_score_test_float \
    = gnb_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from SMOTE scaled test data is 99.37%[0m


### **SMOTEENN**

In [None]:
accuracy_score_train_float \
    = gnb_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

[1mThe gnb model score from SMOTEENN scaled training data is 99.46%[0m


In [None]:
accuracy_score_test_float \
    = gnb_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

[1mThe gnb model score from SMOTEENN scaled test data is 99.36%[0m


## **8.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
gnb_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_predictions_dataframe)

In [None]:
gnb_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
gnb_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_undersampled_predictions_dataframe)

In [None]:
gnb_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
gnb_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_oversampled_predictions_dataframe)

In [None]:
gnb_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
gnb_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_cluster_centroids_predictions_dataframe)

In [None]:
gnb_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
gnb_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_SMOTE_predictions_dataframe)

In [None]:
gnb_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_SMOTE_predictions_dataframe)

### **SMOTEENN**

In [None]:
gnb_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_SMOTEENN_predictions_dataframe)

In [None]:
gnb_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_SMOTEENN_predictions_dataframe)

# <br> **Section 9: Evaluate Model Performance**

## **9.1: Logistic Regression**

### **Original**

In [None]:
logistic_regression_model.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_predictions_nparray = logistic_regression_model.predict(x_test_scaled_dataframe)

lr_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for logistic regression from actual vs. test predictions is {:.2f}%' \
         .format(lr_balanced_accuracy_score_float)
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression from actual vs. test predictions is 99.33%[0m


In [None]:
lr_accuracy_score_float, \
lr_confusion_matrix_dataframe, \
lr_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_predictions_nparray,
         'LOGISTIC REGRESSION MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary \
    = {'logistic_regression': [lr_accuracy_score_float * 100]}

model_performance_ranking_dictionary \
    = {'logistic_regression': lr_accuracy_score_float * 100}

[1mLOGISTIC REGRESSION MODEL
[0m
1) [1mOverall Accuracy Score: [0m98.7%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18656                  118
Actual High-Risk                 12                  598

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.84      0.98      0.90       610

    accuracy                           0.99     19384
   macro avg       0.92      0.99      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
logistic_regression_undersampled_model.get_params()

{'C': 1.0,
 'class_weight': 'balanced',
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_undersampled_predictions_nparray \
    = logistic_regression_undersampled_model.predict(x_test_scaled_dataframe)

lr_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression undersampled from actual vs. test predictions is {:.2f}%' \
         .format(lr_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression undersampled from actual vs. test predictions is 99.34%[0m


In [None]:
lr_undersampled_accuracy_score_float, \
lr_undersampled_confusion_matrix_dataframe, \
lr_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_undersampled_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_undersampled'] \
    = lr_undersampled_accuracy_score_float * 100

[1mLOGISTIC REGRESSION MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.5%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18649                  125
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.91      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
logistic_regression_oversampled_model.get_params()

{'C': 1.0,
 'class_weight': 'balanced',
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_oversampled_predictions_nparray \
    = logistic_regression_oversampled_model.predict(x_test_scaled_dataframe)

lr_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression oversampled from actual vs. test predictions is {:.2f}%' \
         .format(lr_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression oversampled from actual vs. test predictions is 99.34%[0m


In [None]:
lr_oversampled_accuracy_score_float, \
lr_oversampled_confusion_matrix_dataframe, \
lr_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_oversampled_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_oversampled'] \
    = lr_oversampled_accuracy_score_float * 100

[1mLOGISTIC REGRESSION MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m99.5%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18648                  126
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.90       610

    accuracy                           0.99     19384
   macro avg       0.91      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
logistic_regression_cluster_centroids_model.get_params()

{'C': 1.0,
 'class_weight': 'balanced',
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_cluster_centroids_predictions_nparray \
    = logistic_regression_cluster_centroids_model.predict(x_test_scaled_dataframe)

lr_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(lr_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression cluster centroids from actual vs. test predictions is 99.37%[0m


In [None]:
lr_cluster_centroids_accuracy_score_float, \
lr_cluster_centroids_confusion_matrix_dataframe, \
lr_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_cluster_centroids_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_cluster_centroids'] \
    = lr_cluster_centroids_accuracy_score_float * 100

[1mLOGISTIC REGRESSION MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m99.52%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18654                  120
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.84      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
logistic_regression_SMOTE_model.get_params()

{'C': 1.0,
 'class_weight': 'balanced',
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_SMOTE_predictions_nparray \
    = logistic_regression_SMOTE_model.predict(x_test_scaled_dataframe)

lr_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(lr_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression SMOTE from actual vs. test predictions is 99.34%[0m


In [None]:
lr_SMOTE_accuracy_score_float, \
lr_SMOTE_confusion_matrix_dataframe, \
lr_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_SMOTE_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_smote'] \
    = lr_SMOTE_accuracy_score_float * 100

[1mLOGISTIC REGRESSION MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m99.5%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18648                  126
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.90       610

    accuracy                           0.99     19384
   macro avg       0.91      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTEENN**

In [None]:
logistic_regression_SMOTEENN_model.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 10000000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 21,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [None]:
lr_SMOTEENN_predictions_nparray \
    = logistic_regression_SMOTEENN_model.predict(x_test_scaled_dataframe)

lr_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(lr_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for logistic regression SMOTEENN from actual vs. test predictions is 99.30%[0m


In [None]:
lr_SMOTEENN_accuracy_score_float, \
lr_SMOTEENN_confusion_matrix_dataframe, \
lr_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_SMOTEENN_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_smoteen'] \
    = lr_SMOTEENN_accuracy_score_float * 100

[1mLOGISTIC REGRESSION MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m99.48%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18641                  133
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.82      1.00      0.90       610

    accuracy                           0.99     19384
   macro avg       0.91      0.99      0.95     19384
weighted avg       0.99      0.99      0.99     19384




## **9.2: Decision Tree**

### **Original**

In [None]:
decision_tree_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'best'}

In [None]:
dt_predictions_nparray \
    = decision_tree_model.predict(x_test_scaled_dataframe)

dt_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for decision tree from actual vs. test predictions is {:.2f}%' \
         .format(dt_balanced_accuracy_score_float)
     + '\033[0m')

[1mThe balanced accuracy score for decision tree from actual vs. test predictions is 98.88%[0m


In [None]:
dt_accuracy_score_float, \
dt_confusion_matrix_dataframe, \
dt_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_predictions_nparray,
         'DECISION TREE MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    = [dt_accuracy_score_float * 100]

model_performance_ranking_dictionary['decision_tree'] \
    = dt_accuracy_score_float * 100

[1mDECISION TREE MODEL
[0m
1) [1mOverall Accuracy Score: [0m92.12%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18648                  126
Actual High-Risk                 92                  518

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      0.99     18774
   high-risk       0.80      0.85      0.83       610

    accuracy                           0.99     19384
   macro avg       0.90      0.92      0.91     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
decision_tree_undersampled_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'random'}

In [None]:
dt_undersampled_predictions_nparray \
    = decision_tree_undersampled_model.predict(x_test_scaled_dataframe)

dt_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree undersampled from actual vs. test predictions is {:.2f}%' \
         .format(dt_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for decision tree undersampled from actual vs. test predictions is 98.82%[0m


In [None]:
dt_undersampled_accuracy_score_float, \
dt_undersampled_confusion_matrix_dataframe, \
dt_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_undersampled_predictions_nparray,
         'DECISION TREE MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_undersampling'] \
    = dt_undersampled_accuracy_score_float * 100

[1mDECISION TREE MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.0%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18551                  223
Actual High-Risk                  5                  605

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      0.99     18774
   high-risk       0.73      0.99      0.84       610

    accuracy                           0.99     19384
   macro avg       0.87      0.99      0.92     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
decision_tree_oversampled_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'best'}

In [None]:
dt_oversampled_predictions_nparray \
    = decision_tree_oversampled_model.predict(x_test_scaled_dataframe)

dt_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree oversampled from actual vs. test predictions is {:.2f}%' \
         .format(dt_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for decision tree oversampled from actual vs. test predictions is 98.81%[0m


In [None]:
dt_oversampled_accuracy_score_float, \
dt_oversampled_confusion_matrix_dataframe, \
dt_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_oversampled_predictions_nparray,
         'DECISION TREE MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_oversampling'] \
    = dt_oversampled_accuracy_score_float * 100

[1mDECISION TREE MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m93.36%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18619                  155
Actual High-Risk                 76                  534

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      0.99     18774
   high-risk       0.78      0.88      0.82       610

    accuracy                           0.99     19384
   macro avg       0.89      0.93      0.91     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
decision_tree_cluster_centroids_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'random'}

In [None]:
dt_cluster_centroids_predictions_nparray \
    = decision_tree_cluster_centroids_model.predict(x_test_scaled_dataframe)

dt_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(dt_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for decision tree cluster centroids from actual vs. test predictions is 99.01%[0m


In [None]:
dt_cluster_centroids_accuracy_score_float, \
dt_cluster_centroids_confusion_matrix_dataframe, \
dt_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_cluster_centroids_predictions_nparray,
         'DECISION TREE MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_cluster_centroids'] \
    = dt_cluster_centroids_accuracy_score_float * 100

[1mDECISION TREE MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m93.94%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                 70                  540

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      0.99     18774
   high-risk       0.82      0.89      0.85       610

    accuracy                           0.99     19384
   macro avg       0.91      0.94      0.92     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
decision_tree_SMOTE_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'best'}

In [None]:
dt_SMOTE_predictions_nparray \
    = decision_tree_SMOTE_model.predict(x_test_scaled_dataframe)

dt_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(dt_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for decision tree SMOTE from actual vs. test predictions is 55.65%[0m


In [None]:
dt_SMOTE_accuracy_score_float, \
dt_SMOTE_confusion_matrix_dataframe, \
dt_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_SMOTE_predictions_nparray,
         'DECISION TREE MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_smote'] \
    = dt_SMOTE_accuracy_score_float * 100

[1mDECISION TREE MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m72.74%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                10233                 8541
Actual High-Risk                 55                  555

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       0.99      0.55      0.70     18774
   high-risk       0.06      0.91      0.11       610

    accuracy                           0.56     19384
   macro avg       0.53      0.73      0.41     19384
weighted avg       0.97      0.56      0.69     19384




### **SMOTEENN**

In [None]:
decision_tree_SMOTEENN_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 21,
 'splitter': 'best'}

In [None]:
dt_SMOTEENN_predictions_nparray \
    = decision_tree_SMOTEENN_model.predict(x_test_scaled_dataframe)

dt_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(dt_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for decision tree SMOTEENN from actual vs. test predictions is 87.83%[0m


In [None]:
dt_SMOTEENN_accuracy_score_float, \
dt_SMOTEENN_confusion_matrix_dataframe, \
dt_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_SMOTEENN_predictions_nparray,
         'DECISION TREE MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_smoteen'] \
    = dt_SMOTEENN_accuracy_score_float * 100

[1mDECISION TREE MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m93.71%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                16414                 2360
Actual High-Risk                  0                  610

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.87      0.93     18774
   high-risk       0.21      1.00      0.34       610

    accuracy                           0.88     19384
   macro avg       0.60      0.94      0.64     19384
weighted avg       0.97      0.88      0.91     19384




## **9.3: Random Forest**

### **Original**

In [None]:
random_forest_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced_subsample',
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rt_predictions_nparray \
    = random_forest_model.predict(x_test_scaled_dataframe)

rf_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest from actual vs. test predictions is {:.2f}%' \
         .format(rf_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest from actual vs. test predictions is 99.25%[0m


In [None]:
rf_accuracy_score_float, rf_confusion_matrix_dataframe, rf_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_predictions_nparray,
         'RANDOM FOREST MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    = [rf_accuracy_score_float * 100]

model_performance_ranking_dictionary['random_forest'] \
    = rf_accuracy_score_float * 100

[1mRANDOM FOREST MODEL
[0m
1) [1mOverall Accuracy Score: [0m97.55%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18655                  119
Actual High-Risk                 26                  584

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      0.96      0.89       610

    accuracy                           0.99     19384
   macro avg       0.91      0.98      0.94     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
random_forest_undersampled_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rt_undersampled_predictions_nparray \
    = random_forest_undersampled_model.predict(x_test_scaled_dataframe)

rf_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest undersampled from actual vs. test predictions is {:.2f}%' \
         .format(rf_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest undersampled from actual vs. test predictions is 99.25%[0m


In [None]:
rf_undersampled_accuracy_score_float, \
rf_undersampled_confusion_matrix_dataframe, \
rf_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_undersampled_predictions_nparray,
         'RANDOM FOREST MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_undersampled'] \
    = rf_undersampled_accuracy_score_float * 100

[1mRANDOM FOREST MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.46%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18631                  143
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.81      1.00      0.89       610

    accuracy                           0.99     19384
   macro avg       0.90      0.99      0.94     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
random_forest_oversampled_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rt_oversampled_predictions_nparray \
    = random_forest_oversampled_model.predict(x_test_scaled_dataframe)

rf_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest oversampled from actual vs. test predictions is {:.2f}%' \
         .format(rf_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest oversampled from actual vs. test predictions is 99.15%[0m


In [None]:
rf_oversampled_accuracy_score_float, \
rf_oversampled_confusion_matrix_dataframe, \
rf_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_oversampled_predictions_nparray,
         'RANDOM FOREST MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_oversampled'] \
    = rf_oversampled_accuracy_score_float * 100

[1mRANDOM FOREST MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m98.22%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18627                  147
Actual High-Risk                 17                  593

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.80      0.97      0.88       610

    accuracy                           0.99     19384
   macro avg       0.90      0.98      0.94     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
random_forest_cluster_centroids_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rf_cluster_centroids_predictions_nparray \
    = random_forest_cluster_centroids_model.predict(x_test_scaled_dataframe)

rf_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(rf_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest cluster centroids from actual vs. test predictions is 99.10%[0m


In [None]:
rf_cluster_centroids_accuracy_score_float, \
rf_cluster_centroids_confusion_matrix_dataframe, \
rf_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_cluster_centroids_predictions_nparray,
         'RANDOM FOREST MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_cluster_centroids'] \
    = rf_cluster_centroids_accuracy_score_float * 100

[1mRANDOM FOREST MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m98.03%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18618                  156
Actual High-Risk                 19                  591

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.79      0.97      0.87       610

    accuracy                           0.99     19384
   macro avg       0.90      0.98      0.93     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
random_forest_SMOTE_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced_subsample',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rf_SMOTE_predictions_nparray \
    = random_forest_SMOTE_model.predict(x_test_scaled_dataframe)

rf_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(rf_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest SMOTE from actual vs. test predictions is 74.56%[0m


In [None]:
rf_SMOTE_accuracy_score_float, \
rf_SMOTE_confusion_matrix_dataframe, \
rf_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_SMOTE_predictions_nparray,
         'RANDOM FOREST MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_smote'] \
    = rf_SMOTE_accuracy_score_float * 100

[1mRANDOM FOREST MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m86.55%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                13846                 4928
Actual High-Risk                  4                  606

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.74      0.85     18774
   high-risk       0.11      0.99      0.20       610

    accuracy                           0.75     19384
   macro avg       0.55      0.87      0.52     19384
weighted avg       0.97      0.75      0.83     19384




### **SMOTEENN**

In [None]:
random_forest_SMOTEENN_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced_subsample',
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 21,
 'verbose': 0,
 'warm_start': False}

In [None]:
rf_SMOTEENN_predictions_nparray \
    = random_forest_SMOTEENN_model.predict(x_test_scaled_dataframe)

rf_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(rf_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for random forest SMOTEENN from actual vs. test predictions is 90.09%[0m


In [None]:
rf_SMOTEENN_accuracy_score_float, \
rf_SMOTEENN_confusion_matrix_dataframe, \
rf_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_SMOTEENN_predictions_nparray,
         'RANDOM FOREST MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_smoteen'] \
    = rf_SMOTEENN_accuracy_score_float * 100

[1mRANDOM FOREST MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m94.88%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                16853                 1921
Actual High-Risk                  0                  610

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.90      0.95     18774
   high-risk       0.24      1.00      0.39       610

    accuracy                           0.90     19384
   macro avg       0.62      0.95      0.67     19384
weighted avg       0.98      0.90      0.93     19384




## **9.4: Support Vector Machine (SVM)**

### **Original**

In [None]:
svm_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_predictions_nparray \
    = svm_model.predict(x_test_scaled_dataframe)

svm_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for SVM from actual vs. test predictions is {:.2f}%' \
         .format(svm_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for SVM from actual vs. test predictions is 99.37%[0m


In [None]:
svm_accuracy_score_float, svm_confusion_matrix_dataframe, svm_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_predictions_nparray,
         'SVM MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    = [svm_accuracy_score_float * 100]

model_performance_ranking_dictionary['svm'] \
    = svm_accuracy_score_float * 100

[1mSVM MODEL
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
svm_undersampled_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_undersampled_predictions_nparray \
    = svm_undersampled_model.predict(x_test_scaled_dataframe)

svm_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for svm undersampled from actual vs. test predictions is {:.2f}%' \
         .format(svm_undersampled_balanced_accuracy_score_float)
     + '\033[0m')

[1mThe balanced accuracy score for svm undersampled from actual vs. test predictions is 99.35%[0m


In [None]:
svm_undersampled_accuracy_score_float, \
svm_undersampled_confusion_matrix_dataframe, \
svm_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_undersampled_predictions_nparray,
         'SVM MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_undersampled'] \
    = svm_undersampled_accuracy_score_float * 100

[1mSVM MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18650                  124
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
svm_oversampled_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_oversampled_predictions_nparray \
    = svm_oversampled_model.predict(x_test_scaled_dataframe)

svm_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for svm oversampled from actual vs. test predictions is {:.2f}%' \
         .format(svm_oversampled_balanced_accuracy_score_float)
     + '\033[0m')

[1mThe balanced accuracy score for svm oversampled from actual vs. test predictions is 99.34%[0m


In [None]:
svm_oversampled_accuracy_score_float, \
svm_oversampled_confusion_matrix_dataframe, \
svm_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_oversampled_predictions_nparray,
         'SVM MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_oversampled'] \
    = svm_oversampled_accuracy_score_float * 100

[1mSVM MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m99.5%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18649                  125
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.91      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
svm_cluster_centroids_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_cluster_centroids_predictions_nparray \
    = svm_cluster_centroids_model.predict(x_test_scaled_dataframe)

svm_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(svm_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for svm cluster centroids from actual vs. test predictions is 99.33%[0m


In [None]:
svm_cluster_centroids_accuracy_score_float, \
svm_cluster_centroids_confusion_matrix_dataframe, \
svm_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_cluster_centroids_predictions_nparray,
         'SVM MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_cluster_centroids'] \
    = svm_cluster_centroids_accuracy_score_float * 100

[1mSVM MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m98.7%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18656                  118
Actual High-Risk                 12                  598

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.84      0.98      0.90       610

    accuracy                           0.99     19384
   macro avg       0.92      0.99      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
svm_SMOTE_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_SMOTE_predictions_nparray \
    = svm_SMOTE_model.predict(x_test_scaled_dataframe)

svm_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(svm_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for svm SMOTE from actual vs. test predictions is 99.34%[0m


In [None]:
svm_SMOTE_accuracy_score_float, \
svm_SMOTE_confusion_matrix_dataframe, \
svm_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_SMOTE_predictions_nparray,
         'SVM MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_smote'] \
    = svm_SMOTE_accuracy_score_float * 100

[1mSVM MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m99.5%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18649                  125
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.91      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTEENN**

In [None]:
svm_SMOTEENN_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': 21,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svm_SMOTEENN_predictions_nparray \
    = svm_SMOTEENN_model.predict(x_test_scaled_dataframe)

svm_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(svm_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for svm SMOTEENN from actual vs. test predictions is 99.36%[0m


In [None]:
svm_SMOTEENN_accuracy_score_float, \
svm_SMOTEENN_confusion_matrix_dataframe, \
svm_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_SMOTEENN_predictions_nparray,
         'SVM MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_smoteen'] \
    = svm_SMOTEENN_accuracy_score_float * 100

[1mSVM MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18651                  123
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




## **9.5: K-Nearest Neighbor (KNN)**

### **Original**

In [None]:
knn_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_predictions_nparray = knn_model.predict(x_test_scaled_dataframe)

knn_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for KNN from actual vs. test predictions is {:.2f}%' \
         .format(knn_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for KNN from actual vs. test predictions is 99.28%[0m


In [None]:
knn_accuracy_score_float, \
knn_confusion_matrix_dataframe, \
knn_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_predictions_nparray,
         'KNN MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    = [knn_accuracy_score_float * 100]

model_performance_ranking_dictionary['knn'] \
    = knn_accuracy_score_float * 100

[1mKNN MODEL
[0m
1) [1mOverall Accuracy Score: [0m98.12%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                 19                  591

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      0.97      0.89       610

    accuracy                           0.99     19384
   macro avg       0.91      0.98      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
knn_undersampled_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_undersampled_predictions_nparray \
    = knn_undersampled_model.predict(x_test_scaled_dataframe)

knn_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn undersampled from actual vs. test predictions is {:.2f}%' \
         .format(knn_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for knn undersampled from actual vs. test predictions is 99.35%[0m


In [None]:
knn_undersampled_accuracy_score_float, \
knn_undersampled_confusion_matrix_dataframe, \
knn_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_undersampled_predictions_nparray,
         'KNN MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_undersampled'] \
    = knn_undersampled_accuracy_score_float * 100

[1mKNN MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18650                  124
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
knn_oversampled_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_oversampled_predictions_nparray \
    = knn_oversampled_model.predict(x_test_scaled_dataframe)

knn_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for knn oversampled from actual vs. test predictions is {:.2f}%' \
         .format(knn_oversampled_balanced_accuracy_score_float)
     + '\033[0m')

[1mThe balanced accuracy score for knn oversampled from actual vs. test predictions is 99.31%[0m


In [None]:
knn_oversampled_accuracy_score_float, \
knn_oversampled_confusion_matrix_dataframe, \
knn_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_oversampled_predictions_nparray,
         'KNN MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_oversampled'] \
    = knn_oversampled_accuracy_score_float * 100

[1mKNN MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m99.33%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18645                  129
Actual High-Risk                  4                  606

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.82      0.99      0.90       610

    accuracy                           0.99     19384
   macro avg       0.91      0.99      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
knn_cluster_centroids_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_cluster_centroids_predictions_nparray \
    = knn_cluster_centroids_model.predict(x_test_scaled_dataframe)

knn_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(knn_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for knn cluster centroids from actual vs. test predictions is 99.37%[0m


In [None]:
knn_cluster_centroids_accuracy_score_float, \
knn_cluster_centroids_confusion_matrix_dataframe, \
knn_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_cluster_centroids_predictions_nparray,
         'KNN MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_cluster_centroids'] \
    = knn_cluster_centroids_accuracy_score_float * 100

[1mKNN MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
knn_SMOTE_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_SMOTE_predictions_nparray \
    = knn_SMOTE_model.predict(x_test_scaled_dataframe)

knn_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(knn_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for knn SMOTE from actual vs. test predictions is 94.70%[0m


In [None]:
knn_SMOTE_accuracy_score_float, \
knn_SMOTE_confusion_matrix_dataframe, \
knn_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_SMOTE_predictions_nparray,
         'KNN MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_smote'] \
    = knn_SMOTE_accuracy_score_float * 100

[1mKNN MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m97.02%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                17749                 1025
Actual High-Risk                  3                  607

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.95      0.97     18774
   high-risk       0.37      1.00      0.54       610

    accuracy                           0.95     19384
   macro avg       0.69      0.97      0.76     19384
weighted avg       0.98      0.95      0.96     19384




### **SMOTEENN**

In [None]:
knn_SMOTEENN_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 2,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
knn_SMOTEENN_predictions_nparray \
    = knn_SMOTEENN_model.predict(x_test_scaled_dataframe)

knn_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(knn_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for knn SMOTEENN from actual vs. test predictions is 97.45%[0m


In [None]:
knn_SMOTEENN_accuracy_score_float, \
knn_SMOTEENN_confusion_matrix_dataframe, \
knn_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_SMOTEENN_predictions_nparray,
         'KNN MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_smoteen'] \
    = knn_SMOTEENN_accuracy_score_float * 100

[1mKNN MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m98.61%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18281                  493
Actual High-Risk                  1                  609

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.97      0.99     18774
   high-risk       0.55      1.00      0.71       610

    accuracy                           0.97     19384
   macro avg       0.78      0.99      0.85     19384
weighted avg       0.99      0.97      0.98     19384




## **9.6: Gaussian Naive Bayes (GNB)**

### **Original**

In [None]:
gnb_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_predictions_nparray = gnb_model.predict(x_test_scaled_dataframe)

gnb_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for GNB from actual vs. test predictions is {:.2f}%' \
         .format(gnb_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for GNB from actual vs. test predictions is 99.37%[0m


In [None]:
gnb_accuracy_score_float, \
gnb_confusion_matrix_dataframe, \
gnb_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_predictions_nparray,
         'GNB MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    = [gnb_accuracy_score_float * 100]

model_performance_ranking_dictionary['gnb'] \
    = gnb_accuracy_score_float * 100

[1mGNB MODEL
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Undersampling**

In [None]:
gnb_undersampled_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_undersampled_predictions_nparray \
    = gnb_undersampled_model.predict(x_test_scaled_dataframe)

gnb_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb undersampled from actual vs. test predictions is {:.2f}%' \
         .format(gnb_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for gnb undersampled from actual vs. test predictions is 99.37%[0m


In [None]:
gnb_undersampled_accuracy_score_float, \
gnb_undersampled_confusion_matrix_dataframe, \
gnb_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_undersampled_predictions_nparray,
         'GNB MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_undersampled'] \
    = gnb_undersampled_accuracy_score_float * 100

[1mGNB MODEL (Undersampled)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Random Oversampling**

In [None]:
gnb_oversampled_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_oversampled_predictions_nparray \
    = gnb_oversampled_model.predict(x_test_scaled_dataframe)

gnb_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb oversampled from actual vs. test predictions is {:.2f}%' \
         .format(gnb_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for gnb oversampled from actual vs. test predictions is 99.37%[0m


In [None]:
gnb_oversampled_accuracy_score_float, \
gnb_oversampled_confusion_matrix_dataframe, \
gnb_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_oversampled_predictions_nparray,
         'GNB MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_oversampled'] \
    = gnb_oversampled_accuracy_score_float * 100

[1mGNB MODEL (Oversampled)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **Cluster Centroids**

In [None]:
gnb_cluster_centroids_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_cluster_centroids_predictions_nparray \
    = gnb_cluster_centroids_model.predict(x_test_scaled_dataframe)

gnb_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(gnb_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for knn cluster centroids from actual vs. test predictions is 99.33%[0m


In [None]:
gnb_cluster_centroids_accuracy_score_float, \
gnb_cluster_centroids_confusion_matrix_dataframe, \
gnb_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_cluster_centroids_predictions_nparray,
         'GNB MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_cluster_centroids'] \
    = gnb_cluster_centroids_accuracy_score_float * 100

[1mGNB MODEL (Cluster Centroids)
[0m
1) [1mOverall Accuracy Score: [0m98.7%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18656                  118
Actual High-Risk                 12                  598

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.84      0.98      0.90       610

    accuracy                           0.99     19384
   macro avg       0.92      0.99      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTE**

In [None]:
gnb_SMOTE_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_SMOTE_predictions_nparray \
    = gnb_SMOTE_model.predict(x_test_scaled_dataframe)

gnb_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(gnb_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for gnb SMOTE from actual vs. test predictions is 99.37%[0m


In [None]:
gnb_SMOTE_accuracy_score_float, \
gnb_SMOTE_confusion_matrix_dataframe, \
gnb_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_SMOTE_predictions_nparray,
         'GNB MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_smote'] \
    = gnb_SMOTE_accuracy_score_float * 100

[1mGNB MODEL (SMOTE)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18653                  121
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




### **SMOTEENN**

In [None]:
gnb_SMOTEENN_model.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [None]:
gnb_SMOTEENN_predictions_nparray \
    = gnb_SMOTEENN_model.predict(x_test_scaled_dataframe)

gnb_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(gnb_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

[1mThe balanced accuracy score for gnb SMOTEENN from actual vs. test predictions is 99.36%[0m


In [None]:
gnb_SMOTEENN_accuracy_score_float, \
gnb_SMOTEENN_confusion_matrix_dataframe, \
gnb_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_SMOTEENN_predictions_nparray,
         'GNB MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_smoteen'] \
    = gnb_SMOTEENN_accuracy_score_float * 100

[1mGNB MODEL (SMOTEENN)
[0m
1) [1mOverall Accuracy Score: [0m99.51%

2) [1mConfusion Matrix:
[0m
                  Predicted Healthy  Predicted High-Risk
Actual Healthy                18651                  123
Actual High-Risk                  2                  608

3) [1mClassification Report:
[0m
              precision    recall  f1-score   support

     healthy       1.00      0.99      1.00     18774
   high-risk       0.83      1.00      0.91       610

    accuracy                           0.99     19384
   macro avg       0.92      1.00      0.95     19384
weighted avg       0.99      0.99      0.99     19384




## **9.7: Model Performance Results**

### **Performance Matrix**

In [None]:
index_string_list \
    = ['original', 'undersampled', 'oversampled', 'cluster centroids', 'smote', 'smoteen']

model_performace_dataframe \
    = pd.DataFrame.from_dict \
        (model_performance_dictionary, orient = 'index').transpose()

model_performace_dataframe['resampling_method'] = index_string_list

model_performace_dataframe.set_index('resampling_method', drop = True, inplace = True)

logx.log_write_object(model_performace_dataframe)

In [None]:
pandasx.return_formatted_table \
    (model_performace_dataframe,
     'Table 9.7.1: Model Performance Matrix',
     line_count_integer = 36,
     hide_index_boolean = False) \
        .format('{:,.1f}%')

Unnamed: 0_level_0,logistic_regression,decision_tree,random_forest,svm,knn,gnb
resampling_method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
original,98.7%,92.1%,97.6%,99.5%,98.1%,99.5%
undersampled,99.5%,99.0%,99.5%,99.5%,99.5%,99.5%
oversampled,99.5%,93.4%,98.2%,99.5%,99.3%,99.5%
cluster centroids,99.5%,93.9%,98.0%,98.7%,99.5%,98.7%
smote,99.5%,72.7%,86.5%,99.5%,97.0%,99.5%
smoteen,99.5%,93.7%,94.9%,99.5%,98.6%,99.5%


### **Performance Ranking**

In [None]:
temp_dictionary \
    = dict(sorted(model_performance_ranking_dictionary.items(), key = lambda x: x[1], reverse = True))

model_performace_rankings_dataframe \
    = pd.DataFrame.from_dict \
        (temp_dictionary, orient = 'index', columns = ['accuracy'])

model_performace_rankings_dataframe.index.name = 'model'

logx.log_write_object(model_performace_rankings_dataframe)

In [None]:
pandasx.return_formatted_table \
    (model_performace_rankings_dataframe,
     'Table 9.7.2: Model Performance Rankings',
     line_count_integer = 36,
     hide_index_boolean = False) \
        .format({'accuracy': '{:,.1f}%'})

Unnamed: 0_level_0,accuracy
model,Unnamed: 1_level_1
logistic_regression_cluster_centroids,99.5%
svm,99.5%
knn_cluster_centroids,99.5%
gnb,99.5%
gnb_undersampled,99.5%
gnb_oversampled,99.5%
gnb_smote,99.5%
svm_smoteen,99.5%
gnb_smoteen,99.5%
svm_undersampled,99.5%


# <br> **Section 10: Save Models To Files**

## **10.1: Logistic Regression**

### **Original**

In [None]:
pickle.dump \
    (logistic_regression_model,
     open(credit_risk_constants.CONSTANT_LR_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (logistic_regression_undersampled_model,
     open(credit_risk_constants.CONSTANT_LR_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (logistic_regression_oversampled_model,
     open(credit_risk_constants.CONSTANT_LR_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (logistic_regression_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_LR_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (logistic_regression_SMOTE_model,
     open(credit_risk_constants.CONSTANT_LR_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (logistic_regression_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_LR_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.2: Decision Tree**

### **Original**

In [None]:
pickle.dump \
    (decision_tree_model,
     open(credit_risk_constants.CONSTANT_DT_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (decision_tree_undersampled_model,
     open(credit_risk_constants.CONSTANT_DT_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (decision_tree_oversampled_model,
     open(credit_risk_constants.CONSTANT_DT_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (decision_tree_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_DT_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (decision_tree_SMOTE_model,
     open(credit_risk_constants.CONSTANT_DT_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (decision_tree_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_DT_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.3: Random Forest**

### **Original**

In [None]:
pickle.dump \
    (random_forest_model,
     open(credit_risk_constants.CONSTANT_RF_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (random_forest_undersampled_model,
     open(credit_risk_constants.CONSTANT_RF_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (random_forest_oversampled_model,
     open(credit_risk_constants.CONSTANT_RF_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (random_forest_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_RF_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (random_forest_SMOTE_model,
     open(credit_risk_constants.CONSTANT_RF_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (random_forest_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_RF_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.4: Support Vector Machine (SVM)**

### **Original**

In [None]:
pickle.dump \
    (svm_model,
     open(credit_risk_constants.CONSTANT_SVM_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (svm_undersampled_model,
     open(credit_risk_constants.CONSTANT_SVM_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (svm_oversampled_model,
     open(credit_risk_constants.CONSTANT_SVM_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (svm_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_SVM_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (svm_SMOTE_model,
     open(credit_risk_constants.CONSTANT_SVM_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (svm_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_SVM_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.5: K-Nearest Neighbor (KNN)**

### **Original**

In [None]:
pickle.dump \
    (knn_model,
     open(credit_risk_constants.CONSTANT_KNN_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (knn_undersampled_model,
     open(credit_risk_constants.CONSTANT_KNN_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (knn_oversampled_model,
     open(credit_risk_constants.CONSTANT_KNN_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (knn_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_KNN_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (knn_SMOTE_model,
     open(credit_risk_constants.CONSTANT_KNN_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (knn_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_KNN_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.6: Gaussian Naive Bayes (GNB)**

### **Original**

In [None]:
pickle.dump \
    (gnb_model,
     open(credit_risk_constants.CONSTANT_GNB_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (gnb_undersampled_model,
     open(credit_risk_constants.CONSTANT_GNB_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (gnb_oversampled_model,
     open(credit_risk_constants.CONSTANT_GNB_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (gnb_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_GNB_CLUSTER_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (gnb_SMOTE_model,
     open(credit_risk_constants.CONSTANT_GNB_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEENN**

In [None]:
pickle.dump \
    (gnb_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_GNB_SMOTEENN_MODEL_FILE_PATH, 'wb'))

In [None]:
# logx.end_program()