### Imports

In [1]:
from utils import *
import cv2
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras import models, layers, optimizers, losses
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report

### Read/Preprocess Data

In [2]:
IMG_SIZE=224
X, y = read_images()
X = resize_and_rescale(X, IMG_SIZE=IMG_SIZE, rescale=False)
X_train, y_train, X_test, y_test = split_and_shuffle(X, y)

# convert to rgb images
X_train = np.array([[[[x,x,x]  for x in x2] for x2 in x1] for x1 in X_train])
X_test = np.array([[[[x,x,x]  for x in x2] for x2 in x1] for x1 in X_test])

### Model

In [3]:
def vgg16_model():
    vgg16 = VGG16(include_top=False, weights="imagenet", input_shape = (IMG_SIZE,IMG_SIZE, 3))

    # prevent training of VGG16
    vgg16.trainable = False

    model = models.Sequential()
    model.add(vgg16)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dense(2, activation="sigmoid"))
    model.compile(optimizer=optimizers.Adam(learning_rate=0.001),
                  loss=losses.SparseCategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    return model

In [4]:
model = vgg16_model()
model.summary()

2022-04-30 20:42:06.290746: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-30 20:42:06.335395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-30 20:42:06.335602: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-30 20:42:06.336620: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 4096)              2101248   
                                                                 
 dense_1 (Dense)             (None, 4096)              16781312  
                                                                 
 dense_2 (Dense)             (None, 2)                 8194      
                                                        

### Cross Validation and Testing

In [5]:
def cross_validation(X, y, epochs=10):
    results = []
    
    # split into 4 folds and fit 4 times
    kf = KFold(n_splits=4)
    for train_index, test_index in kf.split(X):
        model = vgg16_model()
        res = model.fit(X[train_index],y[train_index],validation_data=(X[test_index],y[test_index]),
                        epochs=epochs, batch_size=32)
        results.append(res)
    
    # obtain average results throughout the epochs
    history = dict()
    history["accuracy"] = []
    history["val_accuracy"] = []
    history["loss"] = []
    history["val_loss"] = []
    
    for i in range(epochs):
        history["accuracy"].append(sum([res.history['accuracy'][i] for res in results])/len(results))
        history["val_accuracy"].append(sum([res.history['val_accuracy'][i] for res in results])/len(results))
        history["loss"].append(sum([res.history['loss'][i] for res in results])/len(results))
        history["val_loss"].append(sum([res.history['val_loss'][i] for res in results])/len(results))

    return history

In [None]:
history = cross_validation(X_train, y_train)

In [None]:
plot_accuracy_comparison(history["accuracy"], history["val_accuracy"])

In [None]:
plot_loss_comparison(history["loss"], history["val_loss"])

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32)
loss, acc = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", acc)

In [None]:
# show confusion matrix
y_pred=np.argmax(model.predict(X_test), axis=-1)
plot_confusion_matrix(y_test, y_pred)

# show other metrics
print(classification_report(y_pred,y_test))