In [None]:
#*******************************************************************************************
 #
 #  File Name:  credit_risk_classification_colab.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, credit_risk_classification_colab.ipynb, reads
 #      a csv file, lending_data.csv, in the folder, resources, and uses Python and
 #      supervised learning methods to use a dataset of historical lending activity from
 #      a peer-to-peer lending lending services company to compare models that can identify
 #      the creditworthiness of borrowers.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  11/25/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/gdrive/')

import sys
sys.path.insert(0,'./gdrive/MyDrive/credit_risk_classification')

import os
os.environ['HV_DOC_HTML'] = 'true'

!apt-get update
!apt install firefox firefox-geckodriver
!pip install -U dataframe_image
!pip install -U selenium
!pip install -U kaleido
!pip install -U hvplot
!pip install -U plotly
!pip install -U panel
!pip install -U bokeh

import hvplot
import hvplot.pandas

import pandas as pd

import holoviews as hv
hv.extension('bokeh')

import logx
logx.set_logs_directory_path('./gdrive/MyDrive/credit_risk_classification/logs')
logx.set_images_directory_path('./gdrive/MyDrive/credit_risk_classification/images')

import pandasx
pandasx.set_google_colab(True)

import classificationsx
import credit_risk_constants

import copy
import pickle

import numpy as np
import pandas as pd

from IPython.display import clear_output

from imblearn.combine import SMOTEENN
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.under_sampling import RandomUnderSampler

from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

pd.options.mode.chained_assignment = None

In [None]:
CONSTANT_LOCAL_FILE_NAME = 'credit_risk_classification_colab.ipynb'

CONSTANT_LENDING_DATA_CSV_FILE_PATH = '/gdrive/MyDrive/credit_risk_classification/resources/lending_data.csv'


log_subroutines.set_log_mode(False)

log_subroutines.set_image_mode(False)


log_subroutines.begin_program('credit_risk_classification_colab')

# <br> **Section 1: Extraction and Transformation**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [None]:
data_type_dictionary \
    = {'loan_size': float,
       'interest_rate':	float,
       'borrower_income': int,
       'debt_to_income': float,
       'num_of_accounts': int,
       'derogatory_marks': int,
       'total_debt': int,
       'loan_status': int}

lending_dataframe \
    = pd.read_csv(credit_risk_constants.CONSTANT_INPUT_FILE_PATH, dtype = data_type_dictionary)

logx.log_write_object(lending_dataframe)

## **1.2: Display Spam DataFrame**

In [None]:
pandasx.return_formatted_table(lending_dataframe, 'Table 1.1: Lending Data Table') \
    .format({'loan_size': '${:,.0f}', 'interest_rate': '{:.2f}%',
             'borrower_income': '${:,.0f}', 'total_debt': '${:,.0f}'})

loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status
"$10,700",7.67%,"$52,800",0.431818,5,1,"$22,800",0
"$8,400",6.69%,"$43,600",0.311927,3,0,"$13,600",0
"$9,000",6.96%,"$46,100",0.349241,3,0,"$16,100",0
"$10,700",7.66%,"$52,700",0.43074,5,1,"$22,700",0
"$10,800",7.70%,"$53,000",0.433962,5,1,"$23,000",0
"$10,100",7.44%,"$50,600",0.407115,4,1,"$20,600",0
"$10,300",7.49%,"$51,100",0.412916,4,1,"$21,100",0
"$8,800",6.86%,"$45,100",0.334812,3,0,"$15,100",0
"$9,300",7.10%,"$47,400",0.367089,3,0,"$17,400",0
"$9,700",7.25%,"$48,800",0.385246,4,0,"$18,800",0


## **1.3: Create the labels series (`y`)  from the “spam” column, and then create the features (`X`) DataFrame from the remaining columns.**

### **Separate the Y Variable, The Labels**

In [None]:
y_series = lending_dataframe['loan_status']

logx.log_write_object(y_series)

### **Review the Y Series**

In [None]:
pandasx.return_formatted_table(y_series.to_frame(), 'Table 1.3.1: Credit Risk Target Series')

### **Check the Balance of the Labels Variable (`y`) by Using the `value_counts` Function.**

In [None]:
y_series.value_counts()

### **Separate the X Variable, the Features**

In [None]:
x_dataframe = lending_dataframe.drop(columns = 'loan_status', axis = 1)

logx.log_write_object(x_dataframe)

### **Review the X DataFrame**

In [None]:
pandasx.return_formatted_table(x_dataframe, 'Table 1.3.2: Credit Risk Features DataFrame')

loan_status
0
0
0
0
0
0
0
0
0
0


## **1.4: Split the Data into Training and Testing Datasets by Using `train_test_split`.**

In [None]:
x_train_dataframe, x_test_dataframe, \
y_train_series, y_test_series \
    = train_test_split \
        (x_dataframe, y_series,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1)

logx.log_write_object(x_train_dataframe)

logx.log_write_object(x_test_dataframe)

logx.log_write_object(y_train_series)

logx.log_write_object(y_test_series)

## **1.5: Use the StandardScaler to Scale the X Variables**

### **Scale Training and Test Data as Numpy Arrays**

In [None]:
x_train_scaled_nparray = StandardScaler().fit_transform(x_train_dataframe)

logx.log_write_object(x_train_scaled_nparray)

In [None]:
x_test_scaled_nparray = StandardScaler().fit_transform(x_test_dataframe)

logx.log_write_object(x_test_scaled_nparray)

### **Create Scaled X Variable DataFrames**

In [None]:
x_train_scaled_dataframe \
    = pd.DataFrame \
        (x_train_scaled_nparray,
         columns = x_train_dataframe.columns,
         index = x_train_dataframe.index)

logx.log_write_object(x_train_scaled_dataframe)

In [None]:
x_test_scaled_dataframe \
    = pd.DataFrame \
        (x_test_scaled_nparray,
         columns = x_test_dataframe.columns,
         index = x_test_dataframe.index)

logx.log_write_object(x_test_scaled_dataframe)

### **Display Scaled Training and Testing Data**

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_dataframe,
     'Table 1.5.1: Credit Risk Scaled Features Training Data')

In [None]:
pandasx.return_formatted_table \
    (x_test_scaled_dataframe,
     'Table 1.5.2: Credit Risk Scaled Features Test Data')

# <br> **Section 2: Undersampled and OverSampled Spam Data**

## **2.1: Instantiate the Random Undersampler Instance**

In [None]:
x_train_scaled_undersampled_dataframe, y_train_undersampled_series \
    = RandomUnderSampler(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_undersampled_dataframe)

logx.log_write_object(y_train_undersampled_series)

## **2.2: Instantiate the Random Oversampler Instance**

In [None]:
x_train_scaled_oversampled_dataframe, y_train_oversampled_series \
    = RandomOverSampler(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_undersampled_dataframe)

logx.log_write_object(y_train_undersampled_series)

## **2.3: Instantiate the Cluster Centroids Instance**

In [None]:
x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series \
    = ClusterCentroids \
        (estimator \
             = KMeans(n_init = 'auto', random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_2),
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_cluster_centroids_dataframe)

logx.log_write_object(y_train_cluster_centroids_series)

## **2.4: Instantiate the SMOTE Instance**

In [None]:
x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series \
    = SMOTE(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1, sampling_strategy = 'auto') \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_SMOTE_dataframe)

logx.log_write_object(y_train_SMOTE_series)

## **2.5: Instantiate the SMOTEENN Instance**

In [None]:
x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series \
    = SMOTEENN(random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
        .fit_resample(x_train_scaled_dataframe, y_train_series)

In [None]:
logx.log_write_object(x_train_scaled_SMOTEENN_dataframe)

logx.log_write_object(y_train_SMOTEENN_series)

## **2.6: Check the Balance of the Labels Variable (`y`) by Using the `value_counts` Function.**

In [None]:
y_train_undersampled_series.value_counts()

In [None]:
y_train_oversampled_series.value_counts()

In [None]:
y_train_cluster_centroids_series.value_counts()

In [None]:
y_train_SMOTE_series.value_counts()

In [None]:
y_train_SMOTEENN_series.value_counts()

## **2.7: Display Normalized Resampled Training and Testing Data**

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_undersampled_dataframe,
     'Table 2.7.1: Scaled Features Training Undersampled Data')

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_oversampled_dataframe,
     'Table 2.7.2: Scaled Features Training Oversampled Data')

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_cluster_centroids_dataframe,
     'Table 2.7.3: Scaled Features Training Cluster Centroids Data')

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_SMOTE_dataframe,
     'Table 2.7.4: Scaled Features Training SMOTE Data')

In [None]:
pandasx.return_formatted_table \
    (x_train_scaled_SMOTEENN_dataframe,
     'Table 2.7.5: Scaled Features Training SMOTEENN Data')

# <br> **Section 3: Logistic Regression Models**

## **3.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
lr_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_LR_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_model \
    = LogisticRegression \
        (class_weight = lr_grid_search_model.best_params_['class_weight'],
         solver = lr_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
lr_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_undersampled_model \
    = LogisticRegression \
        (class_weight = lr_undersampled_grid_search_model.best_params_['class_weight'],
         solver = lr_undersampled_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
lr_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_oversampled_model \
    = LogisticRegression \
        (class_weight = lr_oversampled_grid_search_model.best_params_['class_weight'],
         solver = lr_oversampled_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
lr_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_cluster_centroids_model \
    = LogisticRegression \
        (class_weight = lr_cluster_centroids_grid_search_model.best_params_['class_weight'],
         solver = lr_cluster_centroids_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
lr_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_SMOTE_model \
    = LogisticRegression \
        (class_weight = lr_SMOTE_grid_search_model.best_params_['class_weight'],
         solver = lr_SMOTE_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
lr_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_LR_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

logistic_regression_SMOTEENN_model \
    = LogisticRegression \
        (class_weight = lr_SMOTEENN_grid_search_model.best_params_['class_weight'],
         solver = lr_SMOTEENN_grid_search_model.best_params_['solver'],
         max_iter = credit_risk_constants.CONSTANT_ML_LR_MAX_ITERATIONS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **3.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = logistic_regression_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = logistic_regression_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = logistic_regression_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = logistic_regression_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = logistic_regression_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = logistic_regression_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = logistic_regression_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The logistic regression model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **3.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
lr_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_predictions_dataframe)

In [None]:
lr_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
lr_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_undersampled_predictions_dataframe)

In [None]:
lr_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
lr_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_oversampled_predictions_dataframe)

In [None]:
lr_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
lr_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_cluster_centroids_predictions_dataframe)

In [None]:
lr_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
lr_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_SMOTE_predictions_dataframe)

In [None]:
lr_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
lr_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(lr_train_SMOTEENN_predictions_dataframe)

In [None]:
lr_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (logistic_regression_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(lr_test_SMOTEENN_predictions_dataframe)

# <br> **Section 4: Decision Tree Models**

## **4.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
dt_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_DT_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_model \
    = DecisionTreeClassifier \
        (criterion = dt_grid_search_model.best_params_['criterion'],
         splitter = dt_grid_search_model.best_params_['splitter'],
         class_weight = dt_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
dt_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_undersampled_model \
    = DecisionTreeClassifier \
        (criterion = dt_undersampled_grid_search_model.best_params_['criterion'],
         splitter = dt_undersampled_grid_search_model.best_params_['splitter'],
         class_weight = dt_undersampled_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
dt_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_oversampled_model \
    = DecisionTreeClassifier \
        (criterion = dt_oversampled_grid_search_model.best_params_['criterion'],
         splitter = dt_oversampled_grid_search_model.best_params_['splitter'],
         class_weight = dt_oversampled_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
dt_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_cluster_centroids_model \
    = DecisionTreeClassifier \
        (criterion = dt_cluster_centroids_grid_search_model.best_params_['criterion'],
         splitter = dt_cluster_centroids_grid_search_model.best_params_['splitter'],
         class_weight = dt_cluster_centroids_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
dt_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_SMOTE_model \
    = DecisionTreeClassifier \
        (criterion = dt_SMOTE_grid_search_model.best_params_['criterion'],
         splitter = dt_SMOTE_grid_search_model.best_params_['splitter'],
         class_weight = dt_SMOTE_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
dt_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_DT_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

decision_tree_SMOTEENN_model \
    = DecisionTreeClassifier \
        (criterion = dt_SMOTEENN_grid_search_model.best_params_['criterion'],
         splitter = dt_SMOTEENN_grid_search_model.best_params_['splitter'],
         class_weight = dt_SMOTEENN_grid_search_model.best_params_['class_weight'],
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **4.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = decision_tree_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = decision_tree_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = decision_tree_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = decision_tree_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = decision_tree_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = decision_tree_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = decision_tree_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The decision tree model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **4.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
dt_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_predictions_dataframe)

In [None]:
dt_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
dt_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_undersampled_predictions_dataframe)

In [None]:
dt_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
dt_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_oversampled_predictions_dataframe)

In [None]:
dt_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
dt_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_cluster_centroids_predictions_dataframe)

In [None]:
dt_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
dt_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_SMOTE_predictions_dataframe)

In [None]:
dt_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
dt_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(dt_train_SMOTEENN_predictions_dataframe)

In [None]:
dt_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (decision_tree_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(dt_test_SMOTEENN_predictions_dataframe)

# <br> **Section 5: Random Forest Models**

## **5.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
rf_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_RF_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_model \
    = RandomForestClassifier \
        (criterion = rf_grid_search_model.best_params_['criterion'],
         class_weight = rf_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
rf_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_undersampled_model \
    = RandomForestClassifier \
        (criterion = rf_undersampled_grid_search_model.best_params_['criterion'],
         max_features = rf_undersampled_grid_search_model.best_params_['max_features'],
         class_weight = rf_undersampled_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
rf_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_oversampled_model \
    = RandomForestClassifier \
        (criterion = rf_oversampled_grid_search_model.best_params_['criterion'],
         class_weight = rf_oversampled_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
rf_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_cluster_centroids_model \
    = RandomForestClassifier \
        (criterion = rf_cluster_centroids_grid_search_model.best_params_['criterion'],
         class_weight = rf_cluster_centroids_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
rf_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_SMOTE_model \
    = RandomForestClassifier \
        (criterion = rf_SMOTE_grid_search_model.best_params_['criterion'],
         class_weight = rf_SMOTE_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
rf_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_RF_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

random_forest_SMOTEENN_model \
    = RandomForestClassifier \
        (criterion = rf_SMOTEENN_grid_search_model.best_params_['criterion'],
         class_weight = rf_SMOTEENN_grid_search_model.best_params_['class_weight'],
         n_estimators = credit_risk_constants.CONSTANT_ML_RF_N_ESTIMATORS,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **5.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = random_forest_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = random_forest_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = random_forest_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = random_forest_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = random_forest_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = random_forest_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = random_forest_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The random forest model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **5.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
rf_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_predictions_dataframe)

In [None]:
rf_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
rf_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_undersampled_predictions_dataframe)

In [None]:
rf_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
rf_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_oversampled_predictions_dataframe)

In [None]:
rf_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
rf_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_cluster_centroids_predictions_dataframe)

In [None]:
rf_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
rf_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_SMOTE_predictions_dataframe)

In [None]:
rf_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
rf_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(rf_train_SMOTEENN_predictions_dataframe)

In [None]:
rf_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (random_forest_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(rf_test_SMOTEENN_predictions_dataframe)

# <br> **Section 6: Support Vector Machine (SVM) Models**

## **6.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
svm_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
svm_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_SVM_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

svm_undersampled_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
svm_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_SVM_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

svm_oversampled_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
svm_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_SVM_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

svm_cluster_centroids_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
svm_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_SVM_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

svm_SMOTE_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
svm_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_SVM_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

svm_SMOTEENN_model \
    = SVC \
        (probability = credit_risk_constants.CONSTANT_ML_SVM_PROBABILITY,
         random_state = credit_risk_constants.CONSTANT_ML_RANDOM_STATE_1) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **6.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = svm_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = svm_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = svm_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = svm_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = svm_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = svm_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = svm_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The svm model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **6.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
svm_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_predictions_dataframe)

In [None]:
svm_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
svm_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_undersampled_predictions_dataframe)

In [None]:
svm_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
svm_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_oversampled_predictions_dataframe)

In [None]:
svm_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
svm_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_cluster_centroids_predictions_dataframe)

In [None]:
svm_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
svm_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_SMOTE_predictions_dataframe)

In [None]:
svm_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
svm_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(svm_train_SMOTEENN_predictions_dataframe)

In [None]:
svm_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (svm_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(svm_test_SMOTEENN_predictions_dataframe)

# <br> **Section 7: K-Nearest Neighbor (KNN) Models**

## **7.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
knn_grid_search_model \
    = pickle.load(open(credit_risk_constants.CONSTANT_KNN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_model \
    = KNeighborsClassifier \
        (algorithm = knn_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
knn_undersampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_UNDERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_undersampled_model \
    = KNeighborsClassifier \
        (algorithm = knn_undersampled_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
knn_oversampled_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_OVERSAMPLED_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_oversampled_model \
    = KNeighborsClassifier \
        (algorithm = knn_oversampled_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
knn_cluster_centroids_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_CENTROIDS_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_cluster_centroids_model \
    = KNeighborsClassifier \
        (algorithm = knn_cluster_centroids_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
knn_SMOTE_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_SMOTE_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_SMOTE_model \
    = KNeighborsClassifier \
        (algorithm = knn_SMOTE_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
knn_SMOTEENN_grid_search_model \
    = pickle.load \
        (open(credit_risk_constants.CONSTANT_KNN_SMOTEENN_GRID_SEARCH_MODEL_FILE_PATH, 'rb'))

knn_SMOTEENN_model \
    = KNeighborsClassifier \
        (algorithm = knn_SMOTEENN_grid_search_model.best_params_['algorithm'],
         leaf_size = credit_risk_constants.CONSTANT_ML_KNN_LEAF_SIZE) \
            .fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **7.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = knn_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = knn_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = knn_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = knn_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = knn_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = knn_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = knn_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The knn model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **7.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
knn_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_predictions_dataframe)

In [None]:
knn_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
knn_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_undersampled_predictions_dataframe)

In [None]:
knn_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
knn_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_oversampled_predictions_dataframe)

In [None]:
knn_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
knn_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_cluster_centroids_predictions_dataframe)

In [None]:
knn_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
knn_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_SMOTE_predictions_dataframe)

In [None]:
knn_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
knn_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(knn_train_SMOTEENN_predictions_dataframe)

In [None]:
knn_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (knn_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(knn_test_SMOTEENN_predictions_dataframe)

# <br> **Section 8: Gaussian Naive Bayes (GNB) Models**

## **8.1: Fit Models by Using the Scaled Training Data.**

### **Original**

In [None]:
gnb_model \
    = GaussianNB().fit(x_train_scaled_dataframe, y_train_series)

### **Random Undersampling**

In [None]:
gnb_undersampled_model \
    = GaussianNB().fit(x_train_scaled_undersampled_dataframe, y_train_undersampled_series)

### **Random Oversampling**

In [None]:
gnb_oversampled_model \
    = GaussianNB().fit(x_train_scaled_oversampled_dataframe, y_train_oversampled_series)

### **Cluster Centroids**

In [None]:
gnb_cluster_centroids_model \
    = GaussianNB().fit(x_train_scaled_cluster_centroids_dataframe, y_train_cluster_centroids_series)

### **Smote**

In [None]:
gnb_SMOTE_model \
    = GaussianNB().fit(x_train_scaled_SMOTE_dataframe, y_train_SMOTE_series)

### **Smoteen**

In [None]:
gnb_SMOTEENN_model \
    = GaussianNB().fit(x_train_scaled_SMOTEENN_dataframe, y_train_SMOTEENN_series)

## **8.2: Display the Model Scores Using the Scaled Training and Testing data.**

### **Original**

In [None]:
accuracy_score_train_float \
    = gnb_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Undersampling**

In [None]:
accuracy_score_train_float \
    = gnb_undersampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from undersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_undersampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from undersampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Random Oversampling**

In [None]:
accuracy_score_train_float \
    = gnb_oversampled_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from overersampled scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_oversampled_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from oversampled scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **Cluster Centroids**

In [None]:
accuracy_score_train_float \
    = gnb_cluster_centroids_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from cluster centroids scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_cluster_centroids_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from cluster centroids scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTE**

In [None]:
accuracy_score_train_float \
    = gnb_SMOTE_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTE scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_SMOTE_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTE scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

### **SMOTEEN**

In [None]:
accuracy_score_train_float \
    = gnb_SMOTEENN_model.score \
        (x_train_scaled_dataframe, y_train_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTEENN scaled training data is {:.2f}%' \
         .format(accuracy_score_train_float)
     + '\033[0m')

In [None]:
accuracy_score_test_float \
    = gnb_SMOTEENN_model.score \
        (x_test_scaled_dataframe, y_test_series) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The gnb model score from SMOTEENN scaled test data is {:.2f}%' \
         .format(accuracy_score_test_float)
     + '\033[0m')

## **8.3: Calculate Training and Test Predictions.**

### **Original**

In [None]:
gnb_train_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_predictions_dataframe)

In [None]:
gnb_test_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_predictions_dataframe)

### **Random Undersampling**

In [None]:
gnb_train_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_undersampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_undersampled_predictions_dataframe)

In [None]:
gnb_test_undersampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_undersampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_undersampled_predictions_dataframe)

### **Random Oversampling**

In [None]:
gnb_train_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_oversampled_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_oversampled_predictions_dataframe)

In [None]:
gnb_test_oversampled_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_oversampled_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_oversampled_predictions_dataframe)

### **Cluster Centroids**

In [None]:
gnb_train_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_cluster_centroids_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_cluster_centroids_predictions_dataframe)

In [None]:
gnb_test_cluster_centroids_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_cluster_centroids_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_cluster_centroids_predictions_dataframe)

### **SMOTE**

In [None]:
gnb_train_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_SMOTE_predictions_dataframe)

In [None]:
gnb_test_SMOTE_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_SMOTE_predictions_dataframe)

### **SMOTEEN**

In [None]:
gnb_train_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTEENN_model,
         x_train_scaled_dataframe,
         y_train_series)

logx.log_write_object(gnb_train_SMOTEENN_predictions_dataframe)

In [None]:
gnb_test_SMOTEENN_predictions_dataframe \
    = classificationsx.return_predictions_dataframe \
        (gnb_SMOTE_model,
         x_test_scaled_dataframe,
         y_test_series)

logx.log_write_object(gnb_test_SMOTEENN_predictions_dataframe)

# <br> **Section 9: Evaluate Model Performance**

## **9.1: Logistic Regression**

### **Original**

In [None]:
logistic_regression_model.get_params()

In [None]:
lr_predictions_nparray = logistic_regression_model.predict(x_test_scaled_dataframe)

lr_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for logistic regression from actual vs. test predictions is {:.2f}%' \
         .format(lr_balanced_accuracy_score_float)
     + '\033[0m')

In [None]:
lr_accuracy_score_float, \
lr_confusion_matrix_dataframe, \
lr_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_predictions_nparray,
         'LOGISTIC REGRESSION MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary \
    = {'logistic_regression': [lr_accuracy_score_float * 100]}

model_performance_ranking_dictionary \
    = {'logistic_regression': lr_accuracy_score_float * 100}

### **Random Undersampling**

In [None]:
logistic_regression_undersampled_model.get_params()

In [None]:
lr_undersampled_predictions_nparray \
    = logistic_regression_undersampled_model.predict(x_test_scaled_dataframe)

lr_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression undersampled from actual vs. test predictions is {:.2f}%' \
         .format(lr_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
lr_undersampled_accuracy_score_float, \
lr_undersampled_confusion_matrix_dataframe, \
lr_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_undersampled_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_undersampled'] \
    = lr_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
logistic_regression_oversampled_model.get_params()

In [None]:
lr_oversampled_predictions_nparray \
    = logistic_regression_oversampled_model.predict(x_test_scaled_dataframe)

lr_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression oversampled from actual vs. test predictions is {:.2f}%' \
         .format(lr_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
lr_oversampled_accuracy_score_float, \
lr_oversampled_confusion_matrix_dataframe, \
lr_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_oversampled_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_oversampled'] \
    = lr_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
logistic_regression_cluster_centroids_model.get_params()

In [None]:
lr_cluster_centroids_predictions_nparray \
    = logistic_regression_cluster_centroids_model.predict(x_test_scaled_dataframe)

lr_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(lr_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
lr_cluster_centroids_accuracy_score_float, \
lr_cluster_centroids_confusion_matrix_dataframe, \
lr_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_cluster_centroids_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_cluster_centroids'] \
    = lr_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
logistic_regression_SMOTE_model.get_params()

In [None]:
lr_SMOTE_predictions_nparray \
    = logistic_regression_SMOTE_model.predict(x_test_scaled_dataframe)

lr_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(lr_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
lr_SMOTE_accuracy_score_float, \
lr_SMOTE_confusion_matrix_dataframe, \
lr_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_SMOTE_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_smote'] \
    = lr_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
logistic_regression_SMOTEENN_model.get_params()

In [None]:
lr_SMOTEENN_predictions_nparray \
    = logistic_regression_SMOTEENN_model.predict(x_test_scaled_dataframe)

lr_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, lr_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for logistic regression SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(lr_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
lr_SMOTEENN_accuracy_score_float, \
lr_SMOTEENN_confusion_matrix_dataframe, \
lr_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         lr_SMOTEENN_predictions_nparray,
         'LOGISTIC REGRESSION MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['logistic_regression'] \
    .append(lr_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['logistic_regression_smoteen'] \
    = lr_SMOTEENN_accuracy_score_float * 100

## **9.2: Decision Tree**

### **Original**

In [None]:
decision_tree_model.get_params()

In [None]:
dt_predictions_nparray \
    = decision_tree_model.predict(x_test_scaled_dataframe)

dt_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for decision tree from actual vs. test predictions is {:.2f}%' \
         .format(dt_balanced_accuracy_score_float)
     + '\033[0m')

In [None]:
dt_accuracy_score_float, \
dt_confusion_matrix_dataframe, \
dt_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_predictions_nparray,
         'DECISION TREE MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    = [dt_accuracy_score_float * 100]

model_performance_ranking_dictionary['decision_tree'] \
    = dt_accuracy_score_float * 100

### **Random Undersampling**

In [None]:
decision_tree_undersampled_model.get_params()

In [None]:
dt_undersampled_predictions_nparray \
    = decision_tree_undersampled_model.predict(x_test_scaled_dataframe)

dt_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree undersampled from actual vs. test predictions is {:.2f}%' \
         .format(dt_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
dt_undersampled_accuracy_score_float, \
dt_undersampled_confusion_matrix_dataframe, \
dt_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_undersampled_predictions_nparray,
         'DECISION TREE MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_undersampling'] \
    = dt_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
decision_tree_oversampled_model.get_params()

In [None]:
dt_oversampled_predictions_nparray \
    = decision_tree_oversampled_model.predict(x_test_scaled_dataframe)

dt_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree oversampled from actual vs. test predictions is {:.2f}%' \
         .format(dt_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
dt_oversampled_accuracy_score_float, \
dt_oversampled_confusion_matrix_dataframe, \
dt_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_oversampled_predictions_nparray,
         'DECISION TREE MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_oversampling'] \
    = dt_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
decision_tree_cluster_centroids_model.get_params()

In [None]:
dt_cluster_centroids_predictions_nparray \
    = decision_tree_cluster_centroids_model.predict(x_test_scaled_dataframe)

dt_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(dt_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
dt_cluster_centroids_accuracy_score_float, \
dt_cluster_centroids_confusion_matrix_dataframe, \
dt_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_cluster_centroids_predictions_nparray,
         'DECISION TREE MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_cluster_centroids'] \
    = dt_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
decision_tree_SMOTE_model.get_params()

In [None]:
dt_SMOTE_predictions_nparray \
    = decision_tree_SMOTE_model.predict(x_test_scaled_dataframe)

dt_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(dt_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
dt_SMOTE_accuracy_score_float, \
dt_SMOTE_confusion_matrix_dataframe, \
dt_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_SMOTE_predictions_nparray,
         'DECISION TREE MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_smote'] \
    = dt_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
decision_tree_SMOTEENN_model.get_params()

In [None]:
dt_SMOTEENN_predictions_nparray \
    = decision_tree_SMOTEENN_model.predict(x_test_scaled_dataframe)

dt_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, dt_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for decision tree SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(dt_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
dt_SMOTEENN_accuracy_score_float, \
dt_SMOTEENN_confusion_matrix_dataframe, \
dt_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         dt_SMOTEENN_predictions_nparray,
         'DECISION TREE MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['decision_tree'] \
    .append(dt_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['decision_tree_smoteen'] \
    = dt_SMOTEENN_accuracy_score_float * 100

## **9.3: Random Forest**

### **Original**

In [None]:
random_forest_model.get_params()

In [None]:
rt_predictions_nparray \
    = random_forest_model.predict(x_test_scaled_dataframe)

rf_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest from actual vs. test predictions is {:.2f}%' \
         .format(rf_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_accuracy_score_float, rf_confusion_matrix_dataframe, rf_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_predictions_nparray,
         'RANDOM FOREST MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    = [rf_accuracy_score_float * 100]

model_performance_ranking_dictionary['random_forest'] \
    = rf_accuracy_score_float * 100

### **Random Undersampling**

In [None]:
random_forest_undersampled_model.get_params()

In [None]:
rt_undersampled_predictions_nparray \
    = random_forest_undersampled_model.predict(x_test_scaled_dataframe)

rf_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest undersampled from actual vs. test predictions is {:.2f}%' \
         .format(rf_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_undersampled_accuracy_score_float, \
rf_undersampled_confusion_matrix_dataframe, \
rf_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_undersampled_predictions_nparray,
         'RANDOM FOREST MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_undersampled'] \
    = rf_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
random_forest_oversampled_model.get_params()

In [None]:
rt_oversampled_predictions_nparray \
    = random_forest_oversampled_model.predict(x_test_scaled_dataframe)

rf_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rt_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest oversampled from actual vs. test predictions is {:.2f}%' \
         .format(rf_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_oversampled_accuracy_score_float, \
rf_oversampled_confusion_matrix_dataframe, \
rf_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rt_oversampled_predictions_nparray,
         'RANDOM FOREST MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_oversampled'] \
    = rf_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
random_forest_cluster_centroids_model.get_params()

In [None]:
rf_cluster_centroids_predictions_nparray \
    = random_forest_cluster_centroids_model.predict(x_test_scaled_dataframe)

rf_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(rf_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_cluster_centroids_accuracy_score_float, \
rf_cluster_centroids_confusion_matrix_dataframe, \
rf_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_cluster_centroids_predictions_nparray,
         'RANDOM FOREST MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_cluster_centroids'] \
    = rf_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
random_forest_SMOTE_model.get_params()

In [None]:
rf_SMOTE_predictions_nparray \
    = random_forest_SMOTE_model.predict(x_test_scaled_dataframe)

rf_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(rf_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_SMOTE_accuracy_score_float, \
rf_SMOTE_confusion_matrix_dataframe, \
rf_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_SMOTE_predictions_nparray,
         'RANDOM FOREST MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_smote'] \
    = rf_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
random_forest_SMOTEENN_model.get_params()

In [None]:
rf_SMOTEENN_predictions_nparray \
    = random_forest_SMOTEENN_model.predict(x_test_scaled_dataframe)

rf_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, rf_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for random forest SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(rf_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
rf_SMOTEENN_accuracy_score_float, \
rf_SMOTEENN_confusion_matrix_dataframe, \
rf_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         rf_SMOTEENN_predictions_nparray,
         'RANDOM FOREST MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['random_forest'] \
    .append(rf_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['random_forest_smoteen'] \
    = rf_SMOTEENN_accuracy_score_float * 100

## **9.4: Support Vector Machine (SVM)**

### **Original**

In [None]:
svm_model.get_params()

In [None]:
svm_predictions_nparray \
    = svm_model.predict(x_test_scaled_dataframe)

svm_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for SVM from actual vs. test predictions is {:.2f}%' \
         .format(svm_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
svm_accuracy_score_float, svm_confusion_matrix_dataframe, svm_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_predictions_nparray,
         'SVM MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    = [svm_accuracy_score_float * 100]

model_performance_ranking_dictionary['svm'] \
    = svm_accuracy_score_float * 100

### **Random Undersampling**

In [None]:
svm_undersampled_model.get_params()

In [None]:
svm_undersampled_predictions_nparray \
    = svm_undersampled_model.predict(x_test_scaled_dataframe)

svm_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for svm undersampled from actual vs. test predictions is {:.2f}%' \
         .format(svm_undersampled_balanced_accuracy_score_float)
     + '\033[0m')

In [None]:
svm_undersampled_accuracy_score_float, \
svm_undersampled_confusion_matrix_dataframe, \
svm_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_undersampled_predictions_nparray,
         'SVM MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_undersampled'] \
    = svm_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
svm_oversampled_model.get_params()

In [None]:
svm_oversampled_predictions_nparray \
    = svm_oversampled_model.predict(x_test_scaled_dataframe)

svm_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for svm oversampled from actual vs. test predictions is {:.2f}%' \
         .format(svm_oversampled_balanced_accuracy_score_float)
     + '\033[0m')

In [None]:
svm_oversampled_accuracy_score_float, \
svm_oversampled_confusion_matrix_dataframe, \
svm_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_oversampled_predictions_nparray,
         'SVM MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_oversampled'] \
    = svm_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
svm_cluster_centroids_model.get_params()

In [None]:
svm_cluster_centroids_predictions_nparray \
    = svm_cluster_centroids_model.predict(x_test_scaled_dataframe)

svm_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(svm_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
svm_cluster_centroids_accuracy_score_float, \
svm_cluster_centroids_confusion_matrix_dataframe, \
svm_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_cluster_centroids_predictions_nparray,
         'SVM MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_cluster_centroids'] \
    = svm_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
svm_SMOTE_model.get_params()

In [None]:
svm_SMOTE_predictions_nparray \
    = svm_SMOTE_model.predict(x_test_scaled_dataframe)

svm_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(svm_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
svm_SMOTE_accuracy_score_float, \
svm_SMOTE_confusion_matrix_dataframe, \
svm_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_SMOTE_predictions_nparray,
         'SVM MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_smote'] \
    = svm_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
svm_SMOTEENN_model.get_params()

In [None]:
svm_SMOTEENN_predictions_nparray \
    = svm_SMOTEENN_model.predict(x_test_scaled_dataframe)

svm_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, svm_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for svm SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(svm_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
svm_SMOTEENN_accuracy_score_float, \
svm_SMOTEENN_confusion_matrix_dataframe, \
svm_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         svm_SMOTEENN_predictions_nparray,
         'SVM MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['svm'] \
    .append(svm_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['svm_smoteen'] \
    = svm_SMOTEENN_accuracy_score_float * 100

## **9.5: K-Nearest Neighbor (KNN)**

### **Original**

In [None]:
knn_model.get_params()

In [None]:
knn_predictions_nparray = knn_model.predict(x_test_scaled_dataframe)

knn_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for KNN from actual vs. test predictions is {:.2f}%' \
         .format(knn_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
knn_accuracy_score_float, \
knn_confusion_matrix_dataframe, \
knn_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_predictions_nparray,
         'KNN MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    = [knn_accuracy_score_float * 100]

model_performance_ranking_dictionary['knn'] \
    = knn_accuracy_score_float * 100

### **Random Undersampling**

In [None]:
knn_undersampled_model.get_params()

In [None]:
knn_undersampled_predictions_nparray \
    = knn_undersampled_model.predict(x_test_scaled_dataframe)

knn_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn undersampled from actual vs. test predictions is {:.2f}%' \
         .format(knn_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
knn_undersampled_accuracy_score_float, \
knn_undersampled_confusion_matrix_dataframe, \
knn_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_undersampled_predictions_nparray,
         'KNN MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_undersampled'] \
    = knn_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
knn_oversampled_model.get_params()

In [None]:
knn_oversampled_predictions_nparray \
    = knn_oversampled_model.predict(x_test_scaled_dataframe)

knn_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m'
     + 'The balanced accuracy score for knn oversampled from actual vs. test predictions is {:.2f}%' \
         .format(knn_oversampled_balanced_accuracy_score_float)
     + '\033[0m')

In [None]:
knn_oversampled_accuracy_score_float, \
knn_oversampled_confusion_matrix_dataframe, \
knn_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_oversampled_predictions_nparray,
         'KNN MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_oversampled'] \
    = knn_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
knn_cluster_centroids_model.get_params()

In [None]:
knn_cluster_centroids_predictions_nparray \
    = knn_cluster_centroids_model.predict(x_test_scaled_dataframe)

knn_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(knn_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
knn_cluster_centroids_accuracy_score_float, \
knn_cluster_centroids_confusion_matrix_dataframe, \
knn_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_cluster_centroids_predictions_nparray,
         'KNN MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_cluster_centroids'] \
    = knn_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
knn_SMOTE_model.get_params()

In [None]:
knn_SMOTE_predictions_nparray \
    = knn_SMOTE_model.predict(x_test_scaled_dataframe)

knn_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(knn_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
knn_SMOTE_accuracy_score_float, \
knn_SMOTE_confusion_matrix_dataframe, \
knn_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_SMOTE_predictions_nparray,
         'KNN MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_smote'] \
    = knn_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
knn_SMOTEENN_model.get_params()

In [None]:
knn_SMOTEENN_predictions_nparray \
    = knn_SMOTEENN_model.predict(x_test_scaled_dataframe)

knn_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, knn_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(knn_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
knn_SMOTEENN_accuracy_score_float, \
knn_SMOTEENN_confusion_matrix_dataframe, \
knn_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         knn_SMOTEENN_predictions_nparray,
         'KNN MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['knn'] \
    .append(knn_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['knn_smoteen'] \
    = knn_SMOTEENN_accuracy_score_float * 100

## **9.6: Gaussian Naive Bayes (GNB)**

### **Original**

In [None]:
gnb_model.get_params()

In [None]:
gnb_predictions_nparray = gnb_model.predict(x_test_scaled_dataframe)

gnb_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for GNB from actual vs. test predictions is {:.2f}%' \
         .format(gnb_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_accuracy_score_float, \
gnb_confusion_matrix_dataframe, \
gnb_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_predictions_nparray,
         'GNB MODEL',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    = [gnb_accuracy_score_float * 100]

model_performance_ranking_dictionary['gnb'] \
    = gnb_accuracy_score_float * 100

### **Random Undersampling**

In [None]:
gnb_undersampled_model.get_params()

In [None]:
gnb_undersampled_predictions_nparray \
    = gnb_undersampled_model.predict(x_test_scaled_dataframe)

gnb_undersampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_undersampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb undersampled from actual vs. test predictions is {:.2f}%' \
         .format(gnb_undersampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_undersampled_accuracy_score_float, \
gnb_undersampled_confusion_matrix_dataframe, \
gnb_undersampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_undersampled_predictions_nparray,
         'GNB MODEL (Undersampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_undersampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_undersampled'] \
    = gnb_undersampled_accuracy_score_float * 100

### **Random Oversampling**

In [None]:
gnb_oversampled_model.get_params()

In [None]:
gnb_oversampled_predictions_nparray \
    = gnb_oversampled_model.predict(x_test_scaled_dataframe)

gnb_oversampled_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_oversampled_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb oversampled from actual vs. test predictions is {:.2f}%' \
         .format(gnb_oversampled_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_oversampled_accuracy_score_float, \
gnb_oversampled_confusion_matrix_dataframe, \
gnb_oversampled_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_oversampled_predictions_nparray,
         'GNB MODEL (Oversampled)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_oversampled_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_oversampled'] \
    = gnb_oversampled_accuracy_score_float * 100

### **Cluster Centroids**

In [None]:
gnb_cluster_centroids_model.get_params()

In [None]:
gnb_cluster_centroids_predictions_nparray \
    = gnb_cluster_centroids_model.predict(x_test_scaled_dataframe)

gnb_cluster_centroids_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_cluster_centroids_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for knn cluster centroids from actual vs. test predictions is {:.2f}%' \
         .format(gnb_cluster_centroids_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_cluster_centroids_accuracy_score_float, \
gnb_cluster_centroids_confusion_matrix_dataframe, \
gnb_cluster_centroids_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_cluster_centroids_predictions_nparray,
         'GNB MODEL (Cluster Centroids)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_cluster_centroids_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_cluster_centroids'] \
    = gnb_cluster_centroids_accuracy_score_float * 100

### **SMOTE**

In [None]:
gnb_SMOTE_model.get_params()

In [None]:
gnb_SMOTE_predictions_nparray \
    = gnb_SMOTE_model.predict(x_test_scaled_dataframe)

gnb_SMOTE_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_SMOTE_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb SMOTE from actual vs. test predictions is {:.2f}%' \
         .format(gnb_SMOTE_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_SMOTE_accuracy_score_float, \
gnb_SMOTE_confusion_matrix_dataframe, \
gnb_SMOTE_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_SMOTE_predictions_nparray,
         'GNB MODEL (SMOTE)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_SMOTE_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_smote'] \
    = gnb_SMOTE_accuracy_score_float * 100

### **SMOTEEN**

In [None]:
gnb_SMOTEENN_model.get_params()

In [None]:
gnb_SMOTEENN_predictions_nparray \
    = gnb_SMOTEENN_model.predict(x_test_scaled_dataframe)

gnb_SMOTEENN_balanced_accuracy_score_float \
    = accuracy_score(y_test_series, gnb_SMOTEENN_predictions_nparray) * 100

logx.print_and_log_text \
    ('\033[1m' \
     + 'The balanced accuracy score for gnb SMOTEENN from actual vs. test predictions is {:.2f}%' \
         .format(gnb_SMOTEENN_balanced_accuracy_score_float) \
     + '\033[0m')

In [None]:
gnb_SMOTEENN_accuracy_score_float, \
gnb_SMOTEENN_confusion_matrix_dataframe, \
gnb_SMOTEENN_classification_report_string \
    = classificationsx.return_binary_classification_confusion_matrix \
        (y_test_series,
         gnb_SMOTEENN_predictions_nparray,
         'GNB MODEL (SMOTEENN)',
         'Healthy', 'High-Risk')

model_performance_dictionary['gnb'] \
    .append(gnb_SMOTEENN_accuracy_score_float * 100)

model_performance_ranking_dictionary['gnb_smoteen'] \
    = gnb_SMOTEENN_accuracy_score_float * 100

## **9.7: Model Performance Results**

### **Performance Matrix**

In [None]:
index_string_list \
    = ['original', 'undersampled', 'oversampled', 'cluster centroids', 'smote', 'smoteen']

model_performace_dataframe \
    = pd.DataFrame.from_dict \
        (model_performance_dictionary, orient = 'index').transpose()

model_performace_dataframe['resampling_method'] = index_string_list

model_performace_dataframe.set_index('resampling_method', drop = True, inplace = True)

logx.log_write_object(model_performace_dataframe)

In [None]:
pandas_processx.return_formatted_table \
    (model_performace_dataframe,
     'Table 9.7.1: Model Performance Matrix',
     line_count_integer = 36,
     hide_index_boolean = False) \
        .format('{:,.1f}%')

### **Performance Ranking**

In [None]:
temp_dictionary \
    = dict(sorted(model_performance_ranking_dictionary.items(), key = lambda x: x[1], reverse = True))

model_performace_rankings_dataframe \
    = pd.DataFrame.from_dict \
        (temp_dictionary, orient = 'index', columns = ['accuracy'])

model_performace_rankings_dataframe.index.name = 'model'

logx.log_write_object(model_performace_rankings_dataframe)

In [None]:
pandas_processx.return_formatted_table \
    (model_performace_rankings_dataframe,
     'Table 9.7.2: Model Performance Rankings',
     line_count_integer = 36,
     hide_index_boolean = False) \
        .format({'accuracy': '{:,.1f}%'})

## **Question:**
### How well does the logistic regression model predict both the `0` (healthy loan) and `1` (high-risk loan) labels?

## **Answer:**
### This logistic regression model does an excellent job predicting healthy loans with a small number of false positives and negatives leading to a precision score of 100%, a recall score of 100%, and an f1-score of 100%.  Nevertheless, this model less accurately predicts high-risk loans with a precision of 87%, a recall of 92%, and an f1-score of 90%. The balanced accuracy, 99%, is higher than the actual accuracy, 96%, because of the significant difference in label value counts, 75,036 vs. 2,500. The model's potential for an increase in accuracy and the comparatively inadequate performance in predicting high-risk loans vs. healthy loans are concerning. Thus, the model warrants further optimization either by closing the value count gap with additional data or random oversampling.

## **Question:**
### How well does the logistic regression model, fit with oversampled data, predict both the `0` (healthy loan) and `1` (high-risk loan) labels?

## **Answer:**

### In terms of accuracy, this Logistic Regression model with random oversampling matches the first model for predicting healthy loans and outperforms it for high-risk loans. For instance, the number of accepted healthy loans falls (18,642 to 18,632); the number of rejected high-risk loans expands (604 to 650); the number of false positives increases slightly (102 to 116); and the number of false negatives significantly drops (49 to 3). Moreover, using random oversampling to generate additional synthetic samples for the minority labels's class eliminates the label value count discrepancy leading to, among other things, the balanced accuracy score matching the overall accuracy score, 99%. For healthy loans, both models have 100% precision, 99% recall, and 100% f1-scores; for high-risk loans, and the precision remains at 87%, the recall, 92%, increases by 8% to 100%, and the f1-score, 90%, increases by 3% to 93%. Consequently, using random oversampling with the Logistic Regression model maintains its identification of healthy loans while improving its identification of high-risk loans.

# <br> **Section 10: Save Models To Files**

## **10.1: Logistic Regression**

### **Original**

In [None]:
pickle.dump \
    (logistic_regression_model,
     open(credit_risk_constants.CONSTANT_LR_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (logistic_regression_undersampled_model,
     open(credit_risk_constants.CONSTANT_LR_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (logistic_regression_oversampled_model,
     open(credit_risk_constants.CONSTANT_LR_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (logistic_regression_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_LR_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (logistic_regression_SMOTE_model,
     open(credit_risk_constants.CONSTANT_LR_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (logistic_regression_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_LR_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.2: Decision Tree**

### **Original**

In [None]:
pickle.dump \
    (decision_tree_model,
     open(credit_risk_constants.CONSTANT_DT_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (decision_tree_undersampled_model,
     open(credit_risk_constants.CONSTANT_DT_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (decision_tree_oversampled_model,
     open(credit_risk_constants.CONSTANT_DT_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (decision_tree_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_DT_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (decision_tree_SMOTE_model,
     open(credit_risk_constants.CONSTANT_DT_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (decision_tree_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_DT_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.3: Random Forest**

### **Original**

In [None]:
pickle.dump \
    (random_forest_model,
     open(credit_risk_constants.CONSTANT_RF_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (random_forest_undersampled_model,
     open(credit_risk_constants.CONSTANT_RF_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (random_forest_oversampled_model,
     open(credit_risk_constants.CONSTANT_RF_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (random_forest_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_RF_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (random_forest_SMOTE_model,
     open(credit_risk_constants.CONSTANT_RF_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (random_forest_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_RF_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.4: Support Vector Machine (SVM)**

### **Original**

In [None]:
pickle.dump \
    (svm_model,
     open(credit_risk_constants.CONSTANT_SVM_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (svm_undersampled_model,
     open(credit_risk_constants.CONSTANT_SVM_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (svm_oversampled_model,
     open(credit_risk_constants.CONSTANT_SVM_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (svm_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_SVM_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (svm_SMOTE_model,
     open(credit_risk_constants.CONSTANT_SVM_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (svm_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_SVM_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.5: K-Nearest Neighbor (KNN)**

### **Original**

In [None]:
pickle.dump \
    (knn_model,
     open(credit_risk_constants.CONSTANT_KNN_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (knn_undersampled_model,
     open(credit_risk_constants.CONSTANT_KNN_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (knn_oversampled_model,
     open(credit_risk_constants.CONSTANT_KNN_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (knn_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_KNN_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (knn_SMOTE_model,
     open(credit_risk_constants.CONSTANT_KNN_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (knn_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_KNN_SMOTEENN_MODEL_FILE_PATH, 'wb'))

## **10.6: Gaussian Naive Bayes (GNB)**

### **Original**

In [None]:
pickle.dump \
    (gnb_model,
     open(credit_risk_constants.CONSTANT_GNB_MODEL_FILE_PATH, 'wb'))

### **Random Undersampling**

In [None]:
pickle.dump \
    (gnb_undersampled_model,
     open(credit_risk_constants.CONSTANT_GNB_UNDERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Random Oversampling**

In [None]:
pickle.dump \
    (gnb_oversampled_model,
     open(credit_risk_constants.CONSTANT_GNB_OVERSAMPLED_MODEL_FILE_PATH, 'wb'))

### **Cluster Centroids**

In [None]:
pickle.dump \
    (gnb_cluster_centroids_model,
     open(credit_risk_constants.CONSTANT_GNB_CENTROIDS_MODEL_FILE_PATH, 'wb'))

### **SMOTE**

In [None]:
pickle.dump \
    (gnb_SMOTE_model,
     open(credit_risk_constants.CONSTANT_GNB_SMOTE_MODEL_FILE_PATH, 'wb'))

### **SMOTEEN**

In [None]:
pickle.dump \
    (gnb_SMOTEENN_model,
     open(credit_risk_constants.CONSTANT_GNB_SMOTEENN_MODEL_FILE_PATH, 'wb'))

In [None]:
# logx.end_program()