In [None]:
from google.colab import drive, files
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image
import time

student_id = 404488
np.random.seed(student_id)
tf.random.set_seed(student_id)

In [None]:
drive.flush_and_unmount
drive.mount('/content/drive')
!unzip -q '/content/drive/MyDrive/rock-paper-scissors.zip' -d '/content/drive/MyDrive/gesture_dataset'

In [None]:
#load images from properly formated directories
data_dir = '/content/drive/MyDrive/gesture_dataset'
train_dir = os.path.join(data_dir, 'Rock-Paper-Scissors', 'train')
test_dir = os.path.join(data_dir, 'Rock-Paper-Scissors', 'test')

In [None]:
img_height = 224
img_width = 224
batch_size = 32

train_ds_full = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(img_height, img_width),
    batch_size=None,
    shuffle=False,
    seed=student_id
)

test_ds_full = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(img_height, img_width),
    batch_size=None,
    shuffle=False,
    seed=student_id
)

class_names = train_ds_full.class_names
print(f"Classes: {class_names}")

In [None]:
#combine into 1 dataset
all_images = train_ds_full.concatenate(test_ds_full)
#shuffle images
all_images = all_images.shuffle(buffer_size=1000, seed=student_id)

In [None]:
#get total size
total_size = len(list(all_images))
print(f"Total dataset size: {total_size}")

#get data set sizes
train_size = int(0.7 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size

#create the splits
train_ds = all_images.take(train_size).batch(batch_size)
remaining = all_images.skip(train_size)
val_ds = remaining.take(val_size).batch(batch_size)
test_ds = remaining.skip(val_size).batch(batch_size)

In [None]:
print("Data set sizes:")
print(f"Training set: {tf.data.experimental.cardinality(train_ds)}")
print(f"Validation set: {tf.data.experimental.cardinality(val_ds)}")
print(f"Test set: {tf.data.experimental.cardinality(test_ds)}")

In [None]:
#visualise sample images
plt.figure(figsize=(12, 12))
plt.suptitle("Sample Images from Rock-Paper-Scissors Dataset")

#get a batch of images from the training dataset
for images, labels in train_ds.take(50):
    for i in range(min(9, len(images))):
        plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.axis("off")
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#visualise class distribution
def count_examples_per_class(dataset):
    class_counts = [0] * len(class_names)
    for _, labels in dataset:
        for label in labels:
            class_counts[label] += 1
    return class_counts

#calculate class distribution in each split
train_counts = count_examples_per_class(train_ds)
val_counts = count_examples_per_class(val_ds)
test_counts = count_examples_per_class(test_ds)

In [None]:
# Plot class distribution
plt.figure(figsize=(12, 6))
plt.suptitle("Class Distribution Across Datasets")
x = np.arange(len(class_names))
width = 0.25

plt.bar(x - width, train_counts, width, label='Training')
plt.bar(x, val_counts, width, label='Validation')
plt.bar(x + width, test_counts, width, label='Test')

plt.xlabel('Class', fontsize=14)
plt.ylabel('Number of Images', fontsize=14)
plt.xticks(x, class_names, fontsize=12)
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#visualize image characteristics (RGB channels)
plt.figure(figsize=(15, 5))
plt.suptitle("RGB Channel Analysis of Sample Images")

for images, labels in train_ds.take(1):
    for i in range(3):
        img = images[i].numpy().astype("uint8")
        class_label = class_names[labels[i]]

        plt.subplot(1, 3, i+1)

        #plot RGB histograms
        for j, color in enumerate(['red', 'green', 'blue']):
            histogram = plt.hist(img[:,:,j].flatten(), bins=256, alpha=0.5, color=color, label=color)

        plt.title(f"{class_label}")
        plt.xlabel("Pixel Value")
        plt.ylabel("Frequency")
        if i == 0:
            plt.legend()
        plt.ylim(0, 1500)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

# 3 initial models
Here we test 3 models a baseline a deeper model and a wider model to see which generalises better

## Basline model

In [None]:
#simple baseline CNN
baseline_model = models.Sequential([
    #first convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),

    #second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    #flatten and dense layers
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(3, activation='softmax')
])

#compile model
baseline_model.compile(
    optimizer = 'adam',
    loss = 'sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("baseline CNN architecture")
baseline_model.summary()

In [None]:
#train the model
#callback for early stopping
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

print("Baseline model")
start_time = time.time()
baseline_history = baseline_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=[early_stop]
)

baseline_time = time.time() - start_time
print(f"\nBaseline train time: {baseline_time} seconds")

baseline_val_loss, baseline_val_acc = baseline_model.evaluate(val_ds)
print(f"Baseline model val accuracy: {baseline_val_acc:.4f}")

## Deeper Model

In [None]:
#create a deeper CNN model
deeper_model = models.Sequential([
    #first convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    #second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    #third convolutional block
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

#compile the model
deeper_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

#print model summary
print("Deeper CNN Architecture:")
deeper_model.summary()

In [None]:
#train the deeper model
print("Deeper model")
start_time = time.time()

deeper_history = deeper_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=[early_stop]
)

deeper_training_time = time.time() - start_time
print(f"\nDeeper model training time: {deeper_training_time:.2f} seconds")

#evaluate on validation set
deeper_val_loss, deeper_val_acc = deeper_model.evaluate(val_ds)
print(f"Deeper model validation accuracy: {deeper_val_acc:.4f}")

## Wider model

In [None]:
#wider CNN model
wider_model = models.Sequential([
    #first convolutional block with more filters
    layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),

    #second convolutional block with more filters
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    #flatten and wider dense layers
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

#compile the model
wider_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

#print model summary
print("\nWider CNN Architecture:")
wider_model.summary()

In [None]:
#train the wider model
print("Wider model")
start_time = time.time()

wider_history = wider_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=[early_stop]
)

wider_training_time = time.time() - start_time
print(f"\nWider model training time: {wider_training_time:.2f} seconds")

#evaluate on validation set
wider_val_loss, wider_val_acc = wider_model.evaluate(val_ds)
print(f"Wider model validation accuracy: {wider_val_acc:.4f}")

## Results

In [None]:
# Compare the three model architectures
architecture_comparison = {
    'Model': ['Baseline CNN', 'Deeper CNN', 'Wider CNN'],
    'Validation Accuracy': [baseline_val_acc, deeper_val_acc, wider_val_acc],
    'Training Time (s)': [baseline_time, deeper_training_time, wider_training_time]
}

# Create a bar chart to compare accuracies
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.bar(architecture_comparison['Model'], architecture_comparison['Validation Accuracy'])
plt.title('Validation Accuracy by Architecture')
plt.xlabel('Model Architecture')
plt.ylabel('Validation Accuracy')
plt.ylim(0, 1)  # Set y-axis from 0 to 1 for accuracy

# Create a bar chart to compare training times
plt.subplot(1, 2, 2)
plt.bar(architecture_comparison['Model'], architecture_comparison['Training Time (s)'])
plt.title('Training Time by Architecture')
plt.xlabel('Model Architecture')
plt.ylabel('Training Time (seconds)')

plt.tight_layout()
plt.show()

# Print the comparison as a formatted table
print("\nModel Architecture Comparison:")
for i in range(len(architecture_comparison['Model'])):
    model = architecture_comparison['Model'][i]
    acc = architecture_comparison['Validation Accuracy'][i]
    time = architecture_comparison['Training Time (s)'][i]
    print(f"{model}: Accuracy = {acc:.4f}, Training Time = {time:.2f} seconds")

In [None]:
test_loss_base, test_acc_base = baseline_model.evaluate(test_ds)
test_loss_deep, test_acc_deep = deeper_model.evaluate(test_ds)
test_loss_wide, test_acc_wide = wider_model.evaluate(test_ds)

print("Test accuracy for each model")
print(f"Baseline Model: {test_acc_base}")
print(f"Deeper Model: {test_acc_deep}")
print(f"Wider Model: {test_acc_wide}")

In [None]:
models = ['Baseline Model', 'Deeper Model', 'Wider Model']
test_acc = [test_acc_base, test_acc_deep, test_acc_wide]
val_acc = [baseline_val_acc, deeper_val_acc, wider_val_acc]

barWidth = 0.3
#set bar positions
r1 = np.arange(len(models))
r2 = [x + barWidth for x in r1]

#create the grouped bar chart
plt.figure(figsize=(10, 6))
plt.bar(r1, test_acc, width=barWidth, label='Test Accuracy', color='blue')
plt.bar(r2, val_acc, width=barWidth, label='Validation Accuracy', color='orange')

#add labels and title
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Test and Validation Accuracy Comparison')
plt.xticks([r + barWidth/2 for r in range(len(models))], models)
plt.ylim(0, 1.1)

plt.legend()

#add grid
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

## Test on own images

In [None]:
# Create the directory if it doesn't exist
!mkdir -p /content/my_test_images

# Upload images
from google.colab import files
uploaded = files.upload()

# Save each file with its original name and extension
for fn in uploaded.keys():
   os.rename(fn, f'/content/my_test_images/{fn}')
   print(f"Saved {fn} to /content/my_test_images/{fn}")

# Verify the uploads worked
image_files = [f for f in os.listdir('/content/my_test_images')
              if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
print(f"\nFound {len(image_files)} image files in /content/my_test_images:")
for img in image_files:
    print(f"- {img}")

In [None]:
# Check if the folder exists
folder_path = '/content/my_test_images'
print(f"Folder exists: {os.path.exists(folder_path)}")

# List contents of the parent directory to verify the folder name
parent_dir = os.path.dirname(folder_path)
print(f"Contents of {parent_dir}:")
print(os.listdir(parent_dir))

# If the folder exists, list its contents
if os.path.exists(folder_path):
    print(f"\nContents of {folder_path}:")
    files = os.listdir(folder_path)
    print(files)

    # Check file extensions
    print("\nFile extensions:")
    for file in files:
        _, ext = os.path.splitext(file)
        print(f"{file}: {ext}")

In [None]:
# Define variables
model = baseline_model  # Or whichever model you want to test
image_folder = '/content/my_test_images'  # Path to your test images folder
class_names = ['paper', 'rock', 'scissors']  # Make sure these match your model's classes
image_size = (224, 224)  # Size to resize images to

# Get all image files
image_files = [f for f in os.listdir(image_folder)
              if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

print(f"Found {len(image_files)} image files: {image_files}")

if not image_files:
    print(f"No images found in {image_folder}")
else:
    # Set up the plot
    n_images = len(image_files)
    fig = plt.figure(figsize=(15, 4 * n_images))

    # Process each image
    for i, img_file in enumerate(image_files):
        # Load and preprocess image
        img_path = os.path.join(image_folder, img_file)
        img = image.load_img(img_path, target_size=image_size)
        img_array = image.img_to_array(img)
        img_array = img_array / 255.0  # Normalize to [0,1]
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Make prediction
        predictions = model.predict(img_array, verbose=0)
        predicted_class_idx = np.argmax(predictions[0])
        predicted_class = class_names[predicted_class_idx]
        confidence = predictions[0][predicted_class_idx] * 100

        # Display image and prediction
        plt.subplot(n_images, 2, i*2 + 1)
        plt.imshow(img)
        plt.title(f"File: {img_file}")
        plt.axis('off')

        # Display prediction details
        plt.subplot(n_images, 2, i*2 + 2)
        # Create bar chart of predictions
        bars = plt.bar(class_names, predictions[0])
        bars[predicted_class_idx].set_color('red')
        plt.ylim([0, 1.0])
        plt.title(f"Prediction: {predicted_class} ({confidence:.1f}%)")

    plt.tight_layout()
    plt.show()

# Hyperparameters
here we apply different hyperparameters to each model to see if we can improve the models further

## Learning Rate

In [None]:
learning_rates = [0.01, 0.001, 0.0001]

lr_results = {}

for model_name, model_architecture in [
    ("baseline", baseline_model),
    ("deeper", deeper_model),
    ("wider", wider_model)
]:

  print(f"\ntesting lr for {model_name} model")

  model_results = {}
  for lr in learning_rates:
    print(f"testing lr: {lr}")

    if model_name == "baseline":
      model = baseline_model
    elif model_name == "deeper":
      model = deeper_model
    else:
      model = wider_model

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'],
    )

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=10,
        callbacks=[early_stop],
        verbose=1
    )

    test_loss, test_acc = model.evaluate(test_ds)

    model_results[lr] = {
       'test_accuracy': test_acc,
       'history': history.history
    }

    print(f"LR={lr}, Test accuracy={test_acc:.4f}")

  lr_results[model_name] = model_results


In [None]:
#visualise learning rate results
plt.figure(figsize=(15, 5))
for i, model_name in enumerate(lr_results.keys()):
    plt.subplot(1, 3, i+1)

    for lr, results in lr_results[model_name].items():
        plt.plot(results['history']['val_accuracy'], label=f'LR={lr}')

    plt.title(f'{model_name} Model')
    plt.xlabel('Epoch')
    plt.ylabel('Validation Accuracy')
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.suptitle('Effect of Learning Rate on Model Convergence', fontsize=16)
plt.subplots_adjust(top=0.85)
plt.show()

In [None]:
#create a bar chart of final test accuracies
plt.figure(figsize=(12, 6))
model_names = list(lr_results.keys())
x = np.arange(len(model_names))
width = 0.25

for i, lr in enumerate(learning_rates):
    accuracies = [lr_results[model][lr]['test_accuracy'] for model in model_names]
    plt.bar(x + i*width, accuracies, width, label=f'LR={lr}')

plt.ylabel('Test Accuracy')
plt.title('Impact of Learning Rate on Test Accuracy')
plt.xticks(x + width, model_names)
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.ylim(0, 1.1)
plt.show()