In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (confusion_matrix, accuracy_score, precision_score,
                             recall_score, f1_score, classification_report, roc_auc_score)
import pickle
import os


# 1. Load Test Data

test_data_path = ""  # Add the test data set here
test_df = pd.read_csv(test_data_path)

# Separate features and target
X_test = test_df.drop(columns=['Bankrupt?'])
y_test = test_df['Bankrupt?']


# 2. Feature Selection

selected_features = [' Cash flow rate',
 ' Tax rate (A)',
 ' Net Value Per Share (A)',
 ' Persistent EPS in the Last Four Seasons',
 ' Cash Flow Per Share',
 ' Operating Profit Per Share (Yuan ¥)',
 ' Total debt/Total net worth',
 ' Debt ratio %',
 ' Borrowing dependency',
 ' Contingent liabilities/Net worth',
 ' Operating profit per person',
 ' Working Capital to Total Assets',
 ' Quick Assets/Total Assets',
 ' Cash/Total Assets',
 ' Current Liability to Assets',
 ' Operating Funds to Liability',
 ' Working Capital/Equity',
 ' Long-term Liability to Current Assets',
 ' Retained Earnings to Total Assets',
 ' Total expense/Assets',
 ' Fixed Assets to Assets',
 ' Equity to Long-term Liability',
 ' Cash Flow to Total Assets',
 ' CFO to Assets',
 ' Current Liability to Current Assets',
 ' Liability-Assets Flag',
 ' Net Income to Total Assets',
 ' Gross Profit to Sales',
 " Net Income to Stockholder's Equity",
 ' Equity to Liability']

X_test_selected = X_test[selected_features]


# 3. Scale the Test Data

if os.path.exists('scaler.pkl'):
    with open('scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)
    X_test_scaled = scaler.transform(X_test_selected)
else:
    raise FileNotFoundError("scaler.pkl not found. Ensure the scaler file is available.")


# 4. Load the Saved Models

if os.path.exists('dnn_model.h5'):
    dnn_model = load_model('dnn_model.h5')
else:
    raise FileNotFoundError("dnn_model.h5 not found. Ensure the model file is available.")

if os.path.exists('GaussianNB_model (1).pkl'):
    with open('GaussianNB_model (1).pkl', 'rb') as f:
        gnb_model = pickle.load(f)
else:
    raise FileNotFoundError("GaussianNB_model.pkl not found. Ensure the model file is available.")


# 5. Generate Predictions

y_pred_gnb_prob = gnb_model.predict_proba(X_test_scaled)[:, 1]
y_pred_dnn_prob = dnn_model.predict(X_test_scaled).flatten()

# Combine predictions using soft voting
combined_prob = (y_pred_gnb_prob + y_pred_dnn_prob) / 2

# Tune threshold for best F1-score
thresholds = np.arange(0.30, 0.60, 0.01)
best_f1 = 0
best_thresh = 0.5
for thresh in thresholds:
    y_pred_temp = (combined_prob > thresh).astype(int)
    current_f1 = f1_score(y_test, y_pred_temp)
    if current_f1 > best_f1:
        best_f1 = current_f1
        best_thresh = thresh

print(f"\nBest threshold: {best_thresh:.2f} with F1-Score: {best_f1:.4f}")

# Final predictions using the best threshold
y_pred_final = (combined_prob > best_thresh).astype(int)


# 6. Evaluation Metrics

accuracy = accuracy_score(y_test, y_pred_final)
precision = precision_score(y_test, y_pred_final)
recall = recall_score(y_test, y_pred_final)
f1 = f1_score(y_test, y_pred_final)
roc_auc = roc_auc_score(y_test, combined_prob)

print("\nFinal Evaluation:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"ROC-AUC:   {roc_auc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_final))

cm = confusion_matrix(y_test, y_pred_final)
print("\nConfusion Matrix:")
print(cm)




[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step

Best threshold: 0.37 with F1-Score: 0.4262

Final Evaluation:
Accuracy:  0.9489
Precision: 0.4643
Recall:    0.3939
F1 Score:  0.4262
ROC-AUC:   0.8825

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1303
           1       0.46      0.39      0.43        66

    accuracy                           0.95      1369
   macro avg       0.72      0.69      0.70      1369
weighted avg       0.95      0.95      0.95      1369


Confusion Matrix:
[[1273   30]
 [  40   26]]
