In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.utils import resample, class_weight

In [None]:
# Constants representing labels
Bankruptcy = 1
Non_Bankruptcy = 0

def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            image_path = os.path.join(folder, filename)
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image_array = tf.keras.preprocessing.image.img_to_array(image)
            images.append(image_array)
            labels.append(label)
    return images, labels

# Load images
bankruptcy_images, bankruptcy_labels = load_images_from_folder('FS0/Bankruptcy', Bankruptcy)
non_bankruptcy_images, non_bankruptcy_labels = load_images_from_folder('FS0/NonBankruptcy', Non_Bankruptcy)

In [None]:
# Data Augmentation for bankruptcy images
data_gen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_bankruptcy_images = []
augmented_bankruptcy_labels = []

for image in bankruptcy_images:
    augmented_bankruptcy_images.append(image)  # Keep original image
    augmented_bankruptcy_labels.append(Bankruptcy)
    image = image.reshape((1,) + image.shape)
    for _ in range(1):  # Generate one additional image for each original
        batch = next(data_gen.flow(image, batch_size=1))
        augmented_image = batch[0]
        augmented_bankruptcy_images.append(augmented_image)
        augmented_bankruptcy_labels.append(Bankruptcy)

In [None]:
# Downsample Non-Bankruptcy Images to half
non_bankruptcy_images_resampled, non_bankruptcy_labels_resampled = resample(
    non_bankruptcy_images,
    non_bankruptcy_labels,
    replace=False,
    n_samples=len(non_bankruptcy_images) // 2,
    random_state=42)

In [None]:
# Combine datasets
images_combined = np.concatenate((augmented_bankruptcy_images, non_bankruptcy_images_resampled))
labels_combined = np.concatenate((augmented_bankruptcy_labels, non_bankruptcy_labels_resampled))

In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(images_combined, labels_combined, test_size=0.2, random_state=42)


In [None]:
# Define the CNN model

model = Sequential()

# Convolutional layers with dropout for regularization
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(Dropout(0.25))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Dropout(0.25))
model.add(MaxPooling2D((2, 2)))

# Flatten the output and add dense layers with dropout
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification

# Compile the model with Adam optimizer and binary cross-entropy loss
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
# Perform 5-fold cross-validation
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

avg_accuracy = []
avg_recall = []
avg_precision = []
avg_f1 = []
avg_roc_auc = []
avg_kappa = []
avg_type_ii_error = []

for fold, (train_index, test_index) in enumerate(skfold.split(X_train, y_train), 1):  
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
    
    # Train the model with class weights
    history = model.fit(
        X_train_fold, y_train_fold, 
        epochs=10, 
        batch_size=32, 
        validation_split=0.2, 
        class_weight=class_weight_dict
    )

    # Evaluate the model
    y_pred = model.predict(X_test_fold)
    y_pred_binary = (y_pred > 0.5).astype(int)

    # Calculate metrics
    accuracy = accuracy_score(y_test_fold, y_pred_binary)
    recall = recall_score(y_test_fold, y_pred_binary)
    precision = precision_score(y_test_fold, y_pred_binary, zero_division=0)
    f1 = f1_score(y_test_fold, y_pred_binary)
    roc_auc = roc_auc_score(y_test_fold, y_pred)
    kappa = cohen_kappa_score(y_test_fold, y_pred_binary)
    type_ii_error = 1 - recall
    
    # Append metrics
    avg_accuracy.append(accuracy)
    avg_recall.append(recall)
    avg_precision.append(precision)
    avg_f1.append(f1)
    avg_roc_auc.append(roc_auc)
    avg_kappa.append(kappa)
    avg_type_ii_error.append(type_ii_error)
    

In [None]:
# Calculate and print average metrics
avg_accuracy = np.mean(avg_accuracy)
avg_recall = np.mean(avg_recall)
avg_precision = np.mean(avg_precision)
avg_f1 = np.mean(avg_f1)
avg_roc_auc = np.mean(avg_roc_auc)
avg_kappa = np.mean(avg_kappa)
avg_type_ii_error = np.mean(avg_type_ii_error)

print(f'Average Accuracy: {avg_accuracy}')
print(f'Average Recall: {avg_recall}')
print(f'Average Precision: {avg_precision}')
print(f'Average F1 Score: {avg_f1}')
print(f'Average ROC AUC: {avg_roc_auc}')
print(f'Average Kappa: {avg_kappa}')
print(f'Average Type II Error: {avg_type_ii_error}')

In [None]:
# Save the model
model.save('CNNFS_balanced_equal_a half.h5')