# Comparison of two CNN
- Tadhg Ryan 21310408
- Szymon Szulc 21323208

##### Code executes to end with no errors

## Resources:
- 

In [None]:
import tensorflow as tf
print(tf.config.list_physical_devices())

In [None]:
# Import dataset
import kagglehub

# Download latest version
dataset_path = kagglehub.dataset_download("muratkokludataset/rice-image-dataset") + "\\Rice_Image_Dataset"

print("Path to dataset files:", dataset_path)


In [None]:
# Hyperparameters
BATCH_SIZE = 64
IMG_HEIGHT = 250
IMG_WIDTH = 250
K = 3
EPOCHS = 10
LEARNING_RATE = 0.001
MAX_SIZE_DATASET = 1000
NUM_OF_BATCHES = MAX_SIZE_DATASET // BATCH_SIZE
RESIZED_SIZE = 128


In [None]:
# Load data in
import tensorflow as tf

# Create the full dataset (without splitting for validation)
train_dataset, test_dataset = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    color_mode='grayscale',  # Load labels in grayscale
    shuffle=True,
    validation_split=0.004,
    subset="both",
    seed=42
)

# Get class names for later use
class_names = train_dataset.class_names

train_dataset = train_dataset.take(NUM_OF_BATCHES)
print(f"Taking {NUM_OF_BATCHES} batches")


In [None]:
# Data Augmentation
from tensorflow.keras import layers

# Define the data augmentation pipeline
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"), 		# Flip horizontally
    layers.RandomZoom(-0.2, 0.2),        				# Zoom in on the image
    layers.RandomRotation(0.2),          				# Randomly rotate image
    layers.RandomBrightness(factor=(-0.2, 0.2)),
])
data_scaling = tf.keras.Sequential([
    layers.Resizing(RESIZED_SIZE, RESIZED_SIZE),		# Resize to desired dimensions
])
data_normalisation = tf.keras.Sequential([
    layers.Rescaling(1./255),
])

# Define functions to apply the augmentation
def augment_image(image, label):
    image = data_augmentation(image, training=True)
    return image, label

def scale_image(image, label):
    image = data_scaling(image, training=True)
    return image, label

def normalise_image(image, label):
    image = data_normalisation(image)
    return image, label


In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf

num_images = 9  # Number of images to display

# Create lists to store images and augmented images
original_images = []
augmented_images = []
labels_list = []

# Iterate through the dataset and collect images and their augmented versions
i = 0
for image_batch, label_batch in train_dataset:
    if i >= num_images:
        break
    original_images.append(image_batch)  # Save original image
    augmented_image = augment_image(image_batch, None)[0]
    augmented_image = scale_image(augmented_image, None)[0]
    augmented_image = normalise_image(augmented_image, None)[0]
    augmented_images.append(augmented_image)  # Save augmented image
    labels_list.append(label_batch)
    i += BATCH_SIZE

# Concatenate the batches into single arrays
original_images_array = tf.concat(original_images, axis=0)
augmented_images_array = tf.concat(augmented_images, axis=0)
labels_array = tf.concat(labels_list, axis=0)

# Select the first `num_images` images and labels to display
original_images_to_display = original_images_array[:num_images]
augmented_images_to_display = augmented_images_array[:num_images]
labels_to_display = labels_array[:num_images]

# Plot the images in a grid
plt.figure(figsize=(12, 12))

for i in range(num_images):
    # Plot original images
    ax = plt.subplot(3, 6, 2 * i + 1)  # Adjust for both original and augmented
    plt.imshow(original_images_to_display[i].numpy().squeeze(), cmap='gray')
    plt.title(f"Original - {class_names[labels_to_display[i].numpy()]}")  # Show class name
    plt.axis("off")

    # Plot augmented images
    ax = plt.subplot(3, 6, 2 * i + 2)
    plt.imshow(augmented_images_to_display[i].numpy().squeeze(), cmap='gray')
    plt.title(f"Augmented - {class_names[labels_to_display[i].numpy()]}")
    plt.axis("off")

plt.tight_layout()
plt.show()

augmented_train_dataset         = train_dataset.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
scale_augmented_train_dataset   = augmented_train_dataset.map(scale_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset                   = scale_augmented_train_dataset.map(normalise_image, num_parallel_calls=tf.data.AUTOTUNE)

scale_test_dataset              = test_dataset.map(scale_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset                    = scale_test_dataset.map(normalise_image, num_parallel_calls=tf.data.AUTOTUNE)


In [None]:
# Create Model 1

# Step 1: Import necessary libraries
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout, Concatenate, Input
from tensorflow.keras.models import Model

# Step 2: Define the Inception module
def inception_module(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj):
    # 1x1 Convolution branch
    conv_1x1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(x)

    # 3x3 Convolution branch
    conv_3x3 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_3x3 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu')(conv_3x3)

    # 5x5 Convolution branch
    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu')(conv_5x5)

    # 3x3 MaxPooling branch
    pool_proj = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool_proj = Conv2D(filters_pool_proj, (1, 1), padding='same', activation='relu')(pool_proj)

    # Concatenate all branches
    output = Concatenate(axis=-1)([conv_1x1, conv_3x3, conv_5x5, pool_proj])
    return output

# Step 3: Define the GoogleLeNet model
def GoogleLeNet(input_shape=(224, 224, 1), num_classes=5):
    # Input layer
    input_layer = Input(shape=input_shape)
    resized_layer = layers.Resizing(224, 224)(input_layer)

    # Initial layers (similar to VGG)
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu')(resized_layer)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = Conv2D(64, (1, 1), padding='same', activation='relu')(x)
    x = Conv2D(192, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    # Inception modules
    x = inception_module(x, 64, 96, 128, 16, 32, 32)
    x = inception_module(x, 128, 128, 192, 32, 96, 64)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 192, 96, 208, 16, 48, 64)
    x = inception_module(x, 160, 112, 224, 24, 64, 64)
    x = inception_module(x, 128, 128, 256, 24, 64, 64)
    x = inception_module(x, 112, 144, 288, 32, 64, 64)
    x = inception_module(x, 256, 160, 320, 32, 128, 128)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 256, 160, 320, 32, 128, 128)
    x = inception_module(x, 384, 192, 384, 48, 128, 128)

    # Average pooling layer
    x = AveragePooling2D((3, 3), strides=(1, 1), padding='valid')(x)

    # Dropout layer
    x = Dropout(0.4)(x)

    # Fully connected layer
    x = Flatten()(x)
    x = Dense(num_classes, activation='softmax', name="outputs")(x)

    # Model
    model = Model(input_layer, x, name="GoogleLeNet")
    return model

# Step 4: Instantiate the model
googleLeNetModel = GoogleLeNet(input_shape=(IMG_HEIGHT, IMG_WIDTH, 1), num_classes=len(class_names))

# Display model summary
googleLeNetModel.summary()


In [None]:
# Create Model 2
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.models import Sequential

def SimpleNet(input_shape=(250, 250, 1), num_classes=5):
    model = Sequential([
    	# Input layer
		Input(input_shape),
		layers.Resizing(224, 224),
		Conv2D(filters=16, kernel_size=(7, 7), activation="relu"),
		MaxPooling2D((2,2)),
		Conv2D(filters=32, kernel_size=(5, 5), activation="relu"),
		MaxPooling2D((2,2)),
		Conv2D(filters=64, kernel_size=(3, 3), activation="relu"),
		MaxPooling2D((2,2)),
		Conv2D(filters=128, kernel_size=(3, 3),activation="relu"),
		MaxPooling2D((2,2)),

    	Flatten(),
     
    	# Dropout layer
    	Dropout(0.4),
     
    	# Fully connected layer
    	Dense(num_classes, activation='softmax', name="outputs"),
	])

    return model

SimpleNet().summary()


In [None]:
# VGG16 define

import tensorflow as tf
from tensorflow.keras import layers, models

def VGG16(input_shape=(224, 224, 1), num_classes=5, dropout_rate=0.5):
	model = models.Sequential()

	model.add(layers.Input(input_shape))
	model.add(layers.Resizing(224, 224))
 
	# Block 1
	model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
 
	# Block 2
	model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
 
	# Block 3
	model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
 
	# Block 4
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
 
	# Block 5
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
 
	# Fully connected layers
	model.add(layers.Flatten())
	model.add(layers.Dense(4096, activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Dropout(dropout_rate))
	model.add(layers.Dense(4096, activation='relu'))
	model.add(layers.BatchNormalization())
	model.add(layers.Dropout(dropout_rate))
	model.add(layers.Dense(num_classes, activation='softmax'))  # Output layer for 5 classes
 
	return model

VGG16().summary()

In [None]:
# Defining k-fold cross validation
from sklearn.model_selection import KFold
import numpy as np

def RunKFold(modelFunction, optimiser_class, loss_function):
	# Create a KFold object
	kf = KFold(n_splits=K, shuffle=True)

	indices = np.arange(len(train_dataset))

	best_model = None
	best_val_loss = float('inf')

	# Iterate over K folds
	for fold, (train_index, val_index) in enumerate(kf.split(indices)):
		print(f"==================== Fold: {fold+1} ====================")
		train = train_dataset.skip(train_index[0]).take(1)
		for index in train_index[1:]:
			train = ((train_dataset.skip(index)).take(1)).concatenate(train)
		train.cache().prefetch(tf.data.AUTOTUNE)

		val = train_dataset.skip(val_index[0]).take(1)
		for index in val_index[1:]:
			val = ((train_dataset.skip(index)).take(1)).concatenate(val)
		val.cache().prefetch(tf.data.AUTOTUNE)

  		# Recreate the model to avoid reusing weights
		model = modelFunction(input_shape=(RESIZED_SIZE, RESIZED_SIZE, 1), num_classes=len(class_names))

		optimiser = optimiser_class(learning_rate=LEARNING_RATE)

		# Compile the model
		model.compile(optimizer=optimiser, loss=loss_function, metrics=['accuracy'])

		# Train the model on the training dataset
		model.fit(train, epochs=EPOCHS, validation_data=val)
		val_loss = min(model.history.history['val_loss'])
  
		# Update the best model if this fold is better
		if val_loss < best_val_loss:
			best_val_loss = val_loss
			best_model = model

	return best_model



In [None]:
# Run Models with k-fold cross validation
from tensorflow.keras import optimizers, losses

train_dataset.cache().prefetch(tf.data.AUTOTUNE)

ModelFunctions = [VGG16, GoogleLeNet, SimpleNet]
ModelNames = ["VGG16", "GoogleLeNet", "SimpleNet"]
bestModels = []

for model in ModelFunctions:
	optimiser = optimizers.Adam
	loss_function = losses.SparseCategoricalCrossentropy(from_logits=False)
	bestModels.append(RunKFold(model, optimiser, loss_function))


In [None]:
# Calculate and display metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
import numpy as np

def evaluate_models(models, test_dataset, class_names):
    for i, model in enumerate(models):
        print(f"================ Metrics for {ModelNames[i]} Model ================")

        y_true = []
        y_pred = []
        for images, labels in test_dataset:
            y_true.extend(labels.numpy())
            predictions = model.predict(images)
            y_pred.extend(np.argmax(predictions, axis=1))

        y_true = np.array(y_true)
        y_pred = np.array(y_pred)

        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
        plt.title(f"Confusion Matrix for Model {i+1}")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.show()

        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, zero_division=0)
        accuracy = accuracy_score(y_true, y_pred)

        print(f"Accuracy: {accuracy:.4f}")

        metrics = [precision, recall, f1]
        metric_names = ["Precision", "Recall", "F1-Score"]

        fig, axes = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
        for ax, metric, name in zip(axes, metrics, metric_names):
            ax.bar(class_names, metric, color='skyblue')
            ax.set_title(name)
            ax.set_ylim(0, 1)
            ax.set_xlabel("Class")
            ax.tick_params(axis='x', rotation=45)
        axes[0].set_ylabel("Score")

        plt.tight_layout()
        plt.show()

        print("======================================================\n")

evaluate_models(bestModels, test_dataset, class_names)


In [None]:
# Compare metrics