# K-Fold Cross Validation

### Load Data

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, InputLayer
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
device_name = tf.test.gpu_device_name()
if len(device_name) > 0:
    print("Found GPU at: {}".format(device_name))
else:
    device_name = "/device:CPU:0"
    print("No GPU, using {}.".format(device_name))

Found GPU at: /device:GPU:0


In [None]:
! pip install -q kaggle

In [None]:
from google.colab import files

files.upload() # upload your kaggle api key

In [None]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d samuelcortinhas/muffin-vs-chihuahua-image-classification

Downloading muffin-vs-chihuahua-image-classification.zip to /content
100% 474M/474M [00:12<00:00, 42.7MB/s]
100% 474M/474M [00:12<00:00, 40.8MB/s]


In [None]:
!unzip -q ./muffin-vs-chihuahua-image-classification.zip -d .

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount= True)

Mounted at /content/drive


In [None]:
# Define the paths to the train and test directories
train_dir = '/content/train'
test_dir = '/content/test'

# Define image dimensions and batch size
img_height, img_width = 100, 100
batch_size = 32
color_mode='rgb'

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_dir,
  seed=56,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode= color_mode)

Found 4733 files belonging to 2 classes.


In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  seed=56,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  shuffle=False,
  color_mode= color_mode)

Found 1184 files belonging to 2 classes.


### Import functions

In [None]:
from utilities import *

In [None]:
train_images, train_labels = preprocess_train_dataset(train_ds)
test_images, test_labels = preprocess_test_dataset(test_ds)

In [None]:
folds_accuracy= []
folds_loss = [] # zero-one loss

In [None]:
kf = KFold(n_splits=5, shuffle = True, random_state = 56)

In [None]:
es = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True)

In [None]:
with tf.device(device_name):
  for train_index, test_index in kf.split(train_images):
    # Subset the training data into training and validation sets
    Train_images_cv, Val_images_cv = train_images[train_index], train_images[test_index]
    Train_labels_cv, Val_labels_cv = train_labels[train_index], train_labels[test_index]

    model_cv = Sequential([
    InputLayer(input_shape=(100,100,1)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((4, 4)),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((4, 4)),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((4, 4)),
    BatchNormalization(),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.7),
    BatchNormalization(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
    model_cv.compile(loss='binary_crossentropy',optimizer=keras.optimizers.Adam(0.001),metrics=['accuracy'])
    history_cv = model_cv.fit(Train_images_cv, Train_labels_cv, epochs=50, batch_size = 32, validation_data = (Val_images_cv, Val_labels_cv), callbacks=[es])

    loss,acc = model_cv.evaluate(Val_images_cv,Val_labels_cv)
    zero_one_loss = 1-acc
    folds_loss.append(zero_one_loss)
    folds_accuracy.append(acc)

    print("Accuracies per fold:", folds_accuracy)
    print("Zero-One Loss per fold:", folds_loss)
    average_accuracy = np.mean(folds_accuracy)
    print("Average Accuracy:", average_accuracy)
    average_loss = np.mean(folds_loss)
    print("Average Zero-One Loss:", average_loss)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Accuracies per fold: [0.9017951488494873]
Zero-One Loss per fold: [0.0982048511505127]
Average Accuracy: 0.9017951488494873
Average Zero-One Loss: 0.0982048511505127
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
