In [11]:
import numpy as np
import cv2
import glob
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
# from my_model import MyModel # Import your model here
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.vgg16 import VGG16
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import keras

# Define the path to the two folders containing the image files
folder1_path = "/kaggle/input/fundus-images/Data_Processed_Gray/Diseased/"
folder2_path = "/kaggle/input/fundus-images/Data_Processed_Gray/Healthy/"

# Define the image size and number of channels

# Define the image size and number of channels
img_size = (224, 224)
num_channels = 3
NUM_CLASSES=2
BATCH_SIZE=32
EPOCHS=200
# Define the number of folds for cross-validation
n_splits = 2


# Initialize lists to store image data and labels
X = []
y = []

# Load image files from folder1 and append to X and y lists
for filepath in glob.glob(folder1_path + "*.jpg"):
    img = cv2.imread(filepath)
    img = cv2.resize(img, img_size)
    X.append(img/255.)
    y.append(0) # Set label to 0 for images in folder1

# Load image files from folder2 and append to X and y lists
for filepath in glob.glob(folder2_path + "*.jpg"):
    img = cv2.imread(filepath)
    img = cv2.resize(img, img_size)
    X.append(img/255.)
    y.append(1) # Set label to 1 for images in folder2

# Convert X and y to numpy arrays
X = np.array(X)
y = np.array(y)

print('Loading Complete')
# Define the VGG16 model
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_size[0], img_size[1], 3))

# Freeze the weights of the VGG16 layers
for layer in vgg_model.layers:
    layer.trainable = False

# Add a new top layer to the model
x = Flatten()(vgg_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=vgg_model.input, outputs=x)

# Define the data augmentation generator
data_augmentation = ImageDataGenerator(
        rotation_range=10,
#         width_shift_range=0.1,
#         height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest'
)


# Initialize lists to store evaluation metrics for each fold
accuracies = []
precisions = []
f1scores = []
recalls = []

# Initialize a KFold object
kf = KFold(n_splits=n_splits, shuffle=True)

# Perform cross-validation
fold = 1
for train_index, test_index in kf.split(X):
    print(f"Fold {fold}")
    X_train, X_test = X[train_index], X[test_index]
#     print(X)
    y_train, y_test = y[train_index], y[test_index]

    # Train your model on the training data
    optimizer = Adam(lr=0.0001)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    # Calculate class weights based on the training data
    class_weights = {0: len(y_train[y_train== 0])/len(y_train), 
                     1: len(y_train[y_train == 1])/len(y_train)}
    
    print(class_weights)
    # Create a checkpoint to save the best model weights
    checkpoint_path = f'fold{fold}-best.h5'
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
#         save_weights_only=True,
        verbose=1
    )
    
    # Fit the model with data augmentation and the checkpoint
    history = model.fit(
        data_augmentation.flow(X_train, y_train, batch_size=BATCH_SIZE),
        steps_per_epoch=len(X_train) // BATCH_SIZE,
        epochs=EPOCHS,
        class_weight=class_weights,
        validation_data=(X_test, y_test),
        callbacks=[checkpoint]
    )
    
    model = keras.models.load_model(f'fold{fold}-best.h5')

    # Make predictions on the test data using the trained model
    y_pred = model.predict(X_test)

    # Evaluate the predictions
    y_pred=np.argmax(y_pred,axis=1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    f1score = f1_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    # Print the evaluation metrics
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"F1 Score: {f1score}")
    print(f"Recall: {recall}")

    # Append the evaluation metrics to the lists
    accuracies.append(accuracy)
    precisions.append(precision)
    f1scores.append(f1score)
    recalls.append(recall)

# Write the evaluation metrics to a text file
with open('evaluation_metrics_VGG.txt', 'w') as f:
    f.write(f"Accuracy: {accuracies}\n")
    f.write(f"Precision: {precisions}\n")
    f.write(f"F1 Score: {f1scores}\n")
    f.write(f"Recall: {recalls}\n")


Loading Complete
Fold 1
{0: 0.7397260273972602, 1: 0.2602739726027397}
Epoch 1/200
Epoch 1: val_accuracy improved from -inf to 0.71233, saving model to fold1-best.h5
Epoch 2/200
Epoch 2: val_accuracy did not improve from 0.71233
Epoch 3/200
Epoch 3: val_accuracy did not improve from 0.71233
Epoch 4/200
Epoch 4: val_accuracy improved from 0.71233 to 0.84932, saving model to fold1-best.h5
Epoch 5/200
Epoch 5: val_accuracy did not improve from 0.84932
Epoch 6/200
Epoch 6: val_accuracy improved from 0.84932 to 0.94521, saving model to fold1-best.h5
Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.94521
Epoch 8/200
Epoch 8: val_accuracy did not improve from 0.94521
Epoch 9/200
Epoch 9: val_accuracy did not improve from 0.94521
Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.94521
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.94521
Epoch 12/200
Epoch 12: val_accuracy improved from 0.94521 to 0.96575, saving model to fold1-best.h5
Epoch 13/200
Epoch 13: val_acc