In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from tensorflow.keras import datasets, layers, models, losses, Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from sklearn.model_selection import cross_val_score, cross_validate
#!pip install tensorcross
from tensorcross.model_selection import GridSearch

### Training the data

We will begin witht the very first network we trained, and check if by seeing more data, it can perform a better job at classifying our 4 classes.

In [3]:
#set up some configuration variables
TRAIN_IMAGE_DIR = 'augmented_dataset_resized/Training'
TEST_IMAGE_DIR = 'augmented_dataset_resized/Testing'
img_height=167
img_width=167
batch_size=32

In [4]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  TRAIN_IMAGE_DIR,
  validation_split=0.13,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 8582 files belonging to 4 classes.
Using 7467 files for training.


2022-12-14 12:03:21.230189: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-14 12:03:21.230229: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-14 12:03:21.230255: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c100.local): /proc/driver/nvidia/version does not exist
2022-12-14 12:03:21.230708: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  TRAIN_IMAGE_DIR,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 8582 files belonging to 4 classes.
Using 1716 files for validation.


In [6]:
test_ds=tf.keras.utils.image_dataset_from_directory(
  TEST_IMAGE_DIR,
  seed=123,
  shuffle=False,
  image_size=(img_height, img_width),
  batch_size=batch_size)

num_classes = 4

Found 1705 files belonging to 4 classes.


In [8]:
simple_cnn_model = tf.keras.Sequential([
  #rescale pixel values to [0,1] interval
  tf.keras.layers.Rescaling(1./255),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])

simple_cnn_model.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

history_simple_cnn=simple_cnn_model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=50
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [9]:
def plot_accuracy(model_history, model_name):
    plt.plot(model_history.history['accuracy'])
    plt.plot(model_history.history['val_accuracy'])
    plt.title(model_name+' accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

def plot_loss(model_history, model_name):
    plt.plot(model_history.history['loss'])
    plt.plot(model_history.history['val_loss'])
    plt.title(model_name+' loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

In [11]:
def compute_accuracy(y_true, y_pred):
    correct_predictions = 0
    # iterate over each label and check
    for true, predicted in zip(y_true, y_pred):
        if true == predicted:
            correct_predictions += 1
    # compute the accuracy
    accuracy = correct_predictions/len(y_true)
    return accuracy

predicted_probabilities=simple_cnn_model.predict(test_ds)
predictions_simple_cnn_model=np.argmax(predicted_probabilities, axis=1)
test_labels = np.concatenate([y for x, y in test_ds], axis=0) 
compute_accuracy(test_labels,predictions_simple_cnn_model)

0.9202346041055719

The results are amazing, we increased the accuracy by 16%!<br>
It seems that data augmentation yielded the greatest improvement by far.
Due to the long training time on so much data, my lack of computing power and lack of time left, I am not able to persue more experiments, but I think the results are great. We achieved our goal of at least 70% accuracy, and actually greatly overcomed it!