In [1]:
from scipy.stats import ttest_rel
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score



In [2]:
# Read the data
df = pd.read_csv('data.csv')
df.rename(columns=lambda x: x.strip(), inplace=True) # Remove leading/trailing whitespaces from column names
df.drop(columns=["Net Income Flag"], inplace=True)   # Drop the 'Net Income Flag' column as it's not needed

# Separate features and target variable
X = df.drop(columns=["Bankrupt?"])   # Features
y = df["Bankrupt?"]                   # Target variable


In [3]:
# 10-fold cross-validation setup
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [4]:
# Dictionary to store performance metrics for Artificial Neural Network (ANN)
ann_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'auc': []}

# Dictionary to store performance metrics for Naive Bayes (NB)
nb_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'auc': []}


In [5]:
# Define the Artificial Neural Network (ANN) model
ann = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
# Parameters:
# - hidden_layer_sizes: Tuple, length = n_layers - 2, default=(100,)
#   The ith element represents the number of neurons in the ith hidden layer.
# - max_iter: int, default=200
#   Maximum number of iterations. The solver iterates until convergence or this number of iterations.
# - random_state: int, RandomState instance, default=None
#   Determines random number generation for weights and bias initialization, training data shuffling, etc.

# Define the Naive Bayes (NB) model
nb = GaussianNB()
# Parameters:
# GaussianNB does not have any specific parameters to define.


In [6]:
# Evaluate models using 10-fold cross-validation
for train_index, test_index in skf.split(X, y):
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train and evaluate the Artificial Neural Network (ANN) model
    ann.fit(X_train, y_train)                # Train the ANN model
    y_pred_ann = ann.predict(X_test)         # Predict using the ANN model
    y_proba_ann = ann.predict_proba(X_test)[:, 1]  # Predict probabilities using the ANN model
    
    # Store performance metrics for the ANN model
    ann_metrics['accuracy'].append(accuracy_score(y_test, y_pred_ann))
    ann_metrics['precision'].append(precision_score(y_test, y_pred_ann))
    ann_metrics['recall'].append(recall_score(y_test, y_pred_ann))
    ann_metrics['f1'].append(f1_score(y_test, y_pred_ann))
    ann_metrics['auc'].append(roc_auc_score(y_test, y_proba_ann))
    
    # Train and evaluate the Naive Bayes (NB) model
    nb.fit(X_train, y_train)                 # Train the NB model
    y_pred_nb = nb.predict(X_test)           # Predict using the NB model
    y_proba_nb = nb.predict_proba(X_test)[:, 1]   # Predict probabilities using the NB model
    
    # Store performance metrics for the NB model
    nb_metrics['accuracy'].append(accuracy_score(y_test, y_pred_nb))
    nb_metrics['precision'].append(precision_score(y_test, y_pred_nb))
    nb_metrics['recall'].append(recall_score(y_test, y_pred_nb))
    nb_metrics['f1'].append(f1_score(y_test, y_pred_nb))
    nb_metrics['auc'].append(roc_auc_score(y_test, y_proba_nb))





In [7]:
# Function to perform paired t-test between two sets of metrics
def perform_ttest(metric1, metric2, metric_name):
 
    # Perform paired t-test
    t_stat, p_value = ttest_rel(metric1, metric2)
    
    # Print t-statistic and p-value
    print(f"{metric_name} - t-statistic: {t_stat:.4f}, p-value: {p_value:.4f}")
    
    # Interpret the test results based on the p-value
    if p_value < 0.05:
        print(f"Reject the null hypothesis: There is a significant difference in {metric_name} between the models.")
    else:
        print(f"Fail to reject the null hypothesis: No significant difference in {metric_name} between the models.")
    print()


In [8]:
# Perform t-test for each performance metric
for metric in ann_metrics.keys():
    # Call the perform_ttest function for each metric
    perform_ttest(ann_metrics[metric], nb_metrics[metric], metric.capitalize())


Accuracy - t-statistic: 23.2105, p-value: 0.0000
Reject the null hypothesis: There is a significant difference in Accuracy between the models.

Precision - t-statistic: 2.1127, p-value: 0.0638
Fail to reject the null hypothesis: No significant difference in Precision between the models.

Recall - t-statistic: -19.8578, p-value: 0.0000
Reject the null hypothesis: There is a significant difference in Recall between the models.

F1 - t-statistic: 1.1751, p-value: 0.2701
Fail to reject the null hypothesis: No significant difference in F1 between the models.

Auc - t-statistic: -3.4304, p-value: 0.0075
Reject the null hypothesis: There is a significant difference in Auc between the models.



In [9]:
# Print performance metrics 
print("ANN Metrics (10-fold CV):")
for metric, values in ann_metrics.items():
    print(f"{metric.capitalize()}: Mean = {np.mean(values):.4f}, Std = {np.std(values):.4f}")

print("\nNaive Bayes Metrics (10-fold CV):")
for metric, values in nb_metrics.items():
    print(f"{metric.capitalize()}: Mean = {np.mean(values):.4f}, Std = {np.std(values):.4f}")

ANN Metrics (10-fold CV):
Accuracy: Mean = 0.9402, Std = 0.0258
Precision: Mean = 0.1232, Std = 0.1283
Recall: Mean = 0.1000, Std = 0.0808
F1: Mean = 0.0915, Std = 0.0715
Auc: Mean = 0.5302, Std = 0.0329

Naive Bayes Metrics (10-fold CV):
Accuracy: Mean = 0.1016, Std = 0.1097
Precision: Mean = 0.0324, Std = 0.0021
Recall: Mean = 0.9273, Std = 0.1098
F1: Mean = 0.0626, Std = 0.0038
Auc: Mean = 0.6536, Std = 0.0958
