In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, cohen_kappa_score
import matplotlib.pyplot as plt

In [None]:
# Constants representing labels
Bankruptcy = 1
Non_Bankruptcy = 0

# Load images
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            image_path = os.path.join(folder, filename)
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image_array = tf.keras.preprocessing.image.img_to_array(image)
            images.append(image_array)
            labels.append(label)
    return images, labels

bankruptcy_images, bankruptcy_labels = load_images_from_folder('FS0/Bankruptcy', Bankruptcy)
non_bankruptcy_images, non_bankruptcy_labels = load_images_from_folder('FS0/NonBankruptcy', Non_Bankruptcy)

In [None]:
# Adjust for 1:2 ratio
# Ensure there are twice as many non-bankruptcy images as bankruptcy images
min_size = len(bankruptcy_images)  # Number of bankruptcy images
non_bankruptcy_images = non_bankruptcy_images[:2 * min_size]  # Double the number of bankruptcy images
non_bankruptcy_labels = [Non_Bankruptcy] * len(non_bankruptcy_images)  # Adjust labels accordingly

# Combine and shuffle the dataset
images = np.array(bankruptcy_images + non_bankruptcy_images)
labels = np.array(bankruptcy_labels + non_bankruptcy_labels)
indices = np.arange(len(labels))
np.random.shuffle(indices)
images = images[indices]
labels = labels[indices]


In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [None]:
# Define the CNN model

model = Sequential()

# Convolutional layers with dropout for regularization
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(Dropout(0.25))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Dropout(0.25))
model.add(MaxPooling2D((2, 2)))

# Flatten the output and add dense layers with dropout
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification

# Compile the model with Adam optimizer and binary cross-entropy loss
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
# Perform 5-fold cross-validation
skfold = StratifiedKFold(n_splits=5, shuffle=True)

avg_accuracy = []
avg_recall = []
avg_precision = []
avg_f1 = []
avg_roc_auc = []
avg_kappa = []
avg_type_ii_error = []

for fold, (train_index, test_index) in enumerate(skfold.split(X_train, y_train), 1):  
    # Split the data into train and test sets for this fold
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    
    history = model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, validation_split=0.2)

    # Evaluate the model on the test set
    y_pred = model.predict(X_test_fold)
    y_pred_binary = (y_pred > 0.5).astype(int)

    # Calculate metrics for this fold
    accuracy = accuracy_score(y_test_fold, y_pred_binary)
    recall = recall_score(y_test_fold, y_pred_binary)
    precision = precision_score(y_test_fold, y_pred_binary)
    f1 = f1_score(y_test_fold, y_pred_binary)
    roc_auc = roc_auc_score(y_test_fold, y_pred)
    kappa = cohen_kappa_score(y_test_fold, y_pred_binary)
    type_ii_error = 1 - recall
    
    print(f'Fold {fold}:')
    print(f'  Accuracy: {accuracy}')
    print(f'  Recall: {recall}')
    print(f'  Precision: {precision}')
    print(f'  F1 Score: {f1}')
    print(f'  ROC AUC: {roc_auc}')
    print(f'  Kappa: {kappa}')
    print(f'  type_ii_error: {type_ii_error}')
    print()

    # Append metrics for this fold to the lists
    avg_accuracy.append(accuracy)
    avg_recall.append(recall)
    avg_precision.append(precision)
    avg_f1.append(f1)
    avg_roc_auc.append(roc_auc)
    avg_kappa.append(kappa)
    avg_type_ii_error.append(type_ii_error)



In [None]:
# Calculate average metrics
avg_accuracy = np.mean(avg_accuracy)
avg_recall = np.mean(avg_recall)
avg_precision = np.mean(avg_precision)
avg_f1 = np.mean(avg_f1)
avg_roc_auc = np.mean(avg_roc_auc)
avg_kappa = np.mean(avg_kappa)
avg_type_ii_error = np.mean(type_ii_error)

# Print average metrics
print(f'Average Accuracy: {avg_accuracy}')
print(f'Average Recall: {avg_recall}')
print(f'Average Precision: {avg_precision}')
print(f'Average F1 Score: {avg_f1}')
print(f'Average ROC AUC: {avg_roc_auc}')
print(f'Average Kappa: {avg_kappa}')
print(f'Average Type II Error: {avg_type_ii_error}')


In [None]:
# Save the model
model.save('CNNFS_balanced_1_2.h5')