Import necessary libraries

In [None]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout

from keras.utils import to_categorical
from keras.metrics import AUC
import matplotlib.pyplot as plt

from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LogisticRegression

In [None]:
images = np.load('images.npy')
labels = np.load('labels.npy')

encoder = LabelEncoder()
labels = to_categorical(encoder.fit_transform(labels))

num_classes = labels.shape[1]

In [None]:
def getModel():
    model = Sequential([
        Input(shape=(64, 64, 1)),
        Conv2D(16, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(units=256, activation='relu'),
        Dropout(0.5),
        Dense(units=num_classes, activation='softmax')
    ])
    return model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
k = 5
kfold = KFold(n_splits=k, shuffle=True, random_state=42)

for i, (train, val) in enumerate(kfold.split(X_train, y_train)):
    print(f"Fold {i + 1}/{k}")
    
    model = getModel()
    model.compile(loss='categorical_crossentropy', metrics=['accuracy', AUC(multi_label=True)])
    
    validation_data=(X_train[val], y_train[val])
    
    val_labels = np.argmax(y_train[val], axis=1)
    train_labels = np.argmax(y_train[train], axis=1)

    # # Plot the distribution of val_labels
    # plt.hist(val_labels, bins=num_classes)
    # plt.title('Distribution of val_labels')
    # plt.xlabel('Label')
    # plt.ylabel('Count')
    # plt.show()

    # # Plot the distribution of train_labels
    # plt.hist(train_labels, bins=num_classes)
    # plt.title('Distribution of train_labels')
    # plt.xlabel('Label')
    # plt.ylabel('Count')
    # plt.show()
    

    model.fit(X_train[train], y_train[train], validation_data=(X_train[val], y_train[val]),batch_size=128, epochs=15)
    model.evaluate(X_train[train], y_train[train], verbose=0)
    