In [36]:
# Lib imports
import os
from google.colab import drive
import shutil
import filecmp
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, Add, MaxPooling2D, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras import regularizers
import numpy as np


In [17]:
# Mount Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
# 1. Define paths
source_path = '/content/drive/MyDrive/pebbles vs shells'
destination_path = '/content/custom_data'

# HELPER FUNCTION: Recursively check if directories match
def dirs_match(dir1, dir2):
    """
    Compares two directories recursively.
    Returns True if they are identical (same files, same structure), False otherwise.
    """
    # dircmp compares files and directories in 'dir1' and 'dir2'
    # strict=False allows for comparing on file attributes (faster),
    # but for "perfect" copies we rely on diff_files logic.
    comparison = filecmp.dircmp(dir1, dir2)

    # Check for files found only in one directory (left_only/right_only)
    # or files that differ in content/metadata (diff_files)
    if comparison.left_only or comparison.right_only or comparison.diff_files:
        return False

    # Recursively check subdirectories
    for subdir in comparison.common_dirs:
        if not dirs_match(os.path.join(dir1, subdir), os.path.join(dir2, subdir)):
            return False

    return True

# 2. Main Logic: Check, Clean, and Copy
print(f"Source: {source_path}")
print(f"Destination: {destination_path}")
print("-" * 30)

try:
    should_copy = True

    # Step A: Check if destination exists
    if os.path.exists(destination_path):
        print("Folder found. Verifying integrity against Google Drive source...")

        # Step B: Check if it is a 1:1 match
        if dirs_match(source_path, destination_path):
            print("✅ Perfect match confirmed. Skipping copy to save time.")
            should_copy = False
        else:
            print("⚠️ Mismatch detected (files are missing or modified).")
            print("Reinstalling to ensure 1:1 copy...")
            shutil.rmtree(destination_path) # Delete the imperfect copy

    # Step C: Copy if needed (either new or after deletion)
    if should_copy:
        print(f"Copying data from Drive... (This may take a moment)")
        shutil.copytree(source_path, destination_path)
        print(f"✅ Success! Folder copied to: {destination_path}")

except FileNotFoundError:
    print(f"❌ Error: Could not find folder at {source_path}. Check the path/spelling.")
except Exception as e:
    print(f"❌ An error occurred: {e}")

print("-" * 30)

# DATASET DIRECTORY CONFIGURATION
base_dir = destination_path
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Verification
print(f"Training directory set to: {train_dir}")
print(f"Testing directory set to: {test_dir}")

if os.path.exists(train_dir):
    print(f"Verified: Found {len(os.listdir(train_dir))} files/folders in train dir.")
else:
    print(f"Warning: {train_dir} does not exist. Check your folder structure.")

Source: /content/drive/MyDrive/pebbles vs shells
Destination: /content/custom_data
------------------------------
Folder found. Verifying integrity against Google Drive source...
⚠️ Mismatch detected (files are missing or modified).
Reinstalling to ensure 1:1 copy...
Copying data from Drive... (This may take a moment)
✅ Success! Folder copied to: /content/custom_data
------------------------------
Training directory set to: /content/custom_data/train
Testing directory set to: /content/custom_data/test
Verified: Found 2 files/folders in train dir.


In [19]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [20]:
# DATA PREPROCESSING & AUGMENTATION

# 1. Force the class names to ensure Train and Test use the EXACT same mapping
# Make sure these match your actual folder names exactly!
CLASS_NAMES = ['pebbles', 'shells']

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

# 2. Add 'classes=CLASS_NAMES' to all generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    classes=CLASS_NAMES,  # <--- FORCE ORDER
    class_mode='binary',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    classes=CLASS_NAMES,  # <--- FORCE ORDER
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    classes=CLASS_NAMES,  # <--- FORCE ORDER
    class_mode='binary',
    shuffle=False
)

# 3. Print the mapping so we are 100% sure
print("------------------------------------------------")
print(f"Class Mapping: {train_generator.class_indices}")
print("------------------------------------------------")

Found 2743 images belonging to 2 classes.
Found 685 images belonging to 2 classes.
Found 856 images belonging to 2 classes.
------------------------------------------------
Class Mapping: {'pebbles': 0, 'shells': 1}
------------------------------------------------


In [38]:
# IMPROVED ARCHITECTURE: CUSTOM RESNET

def residual_block(x, filters, kernel_size=3, stride=1):
    """
    Creates a standard ResNet block with a skip connection.
    If the stride is > 1, the input is downsampled to match the output dimensions.
    """
    shortcut = x

    # --- Main Path ---
    # First Convolution
    x = Conv2D(filters, kernel_size, strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Second Convolution
    x = Conv2D(filters, kernel_size, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # --- Skip Connection (Shortcut) Path ---
    # If the dimensions don't match (due to stride or changing filters),
    # we process the shortcut to match the main path.
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, (1, 1), strides=stride, padding='same', use_bias=False)(shortcut)
        shortcut = BatchNormalization()(shortcut)

    # --- Add and Activate ---
    x = Add()([x, shortcut]) # The "Residual" connection
    x = Activation('relu')(x)
    return x

def build_resnet(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # 1. Initial Convolution & MaxPool
    # We use 64 filters and a 7x7 kernel to capture initial broad features
    x = Conv2D(64, (7, 7), strides=2, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # 2. Residual Stages
    # We stack blocks. The filter size doubles as the image size halves.

    # Stage 1: 64 filters (Dimension remains roughly 32x32)
    x = residual_block(x, 64)
    x = residual_block(x, 64)

    # Stage 2: 128 filters (Downsample to 16x16)
    x = residual_block(x, 128, stride=2)
    x = residual_block(x, 128)

    # Stage 3: 256 filters (Downsample to 8x8)
    x = residual_block(x, 256, stride=2)
    x = residual_block(x, 256)

    # Stage 4: 512 filters (Downsample to 4x4)
    x = residual_block(x, 512, stride=2)
    x = residual_block(x, 512)

    # 3. Classification Head
    # ResNet uses Global Average Pooling instead of Flattening
    x = GlobalAveragePooling2D()(x)

    # Final Output
    if num_classes == 1:
        activation = 'sigmoid'
    else:
        activation = 'softmax'

    outputs = Dense(num_classes, activation=activation)(x)

    model = Model(inputs, outputs, name="Custom_ResNet")
    return model

# Instantiate the model
model = build_resnet(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), num_classes=1)

# Define Optimizer (ResNets work well with standard Adam or SGD+Momentum)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.summary()

In [44]:
# TRAINING CONFIGURATION
EPOCHS = 25
STEPS_PER_EPOCH = train_generator.samples // BATCH_SIZE
VALIDATION_STEPS = val_generator.samples // BATCH_SIZE

In [45]:
# COMPILE THE MODEL
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [46]:
# TRAIN THE MODEL
history = model.fit(
    train_generator,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=val_generator,
    validation_steps=VALIDATION_STEPS,
)

Epoch 1/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 354ms/step - accuracy: 0.7024 - loss: 0.5752 - val_accuracy: 0.5461 - val_loss: 0.7416
Epoch 2/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - accuracy: 0.6562 - loss: 0.6727 - val_accuracy: 0.4673 - val_loss: 0.9138
Epoch 3/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 265ms/step - accuracy: 0.7147 - loss: 0.5741 - val_accuracy: 0.5045 - val_loss: 0.7004
Epoch 4/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 43ms/step - accuracy: 0.7812 - loss: 0.5131 - val_accuracy: 0.5179 - val_loss: 0.6962
Epoch 5/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 266ms/step - accuracy: 0.7426 - loss: 0.5319 - val_accuracy: 0.7277 - val_loss: 0.5337
Epoch 6/25
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 43ms/step - accuracy: 0.7500 - loss: 0.5538 - val_accuracy: 0.7202 - val_loss: 0.5473
Epoch 7/25
[1m85/85[0m [3

In [47]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 116ms/step - accuracy: 0.7717 - loss: 0.4489
Test Accuracy: 0.697429895401001


In [53]:

# SAVE THE MODEL
model.save('exercise_6_trained_model_resnet.h5')



In [54]:
# SIMPLE INFERENCE SCRIPT
from tensorflow.keras.preprocessing import image

# 1. define the mapping explicitly (Same as Cell 4)
class_map = {0: 'pebbles', 1: 'shells'}

def predict_image(img_path, model_path='pebbls_vs_shells_cnn_custom.h5'):
    try:
        # Load model and image
        model = tf.keras.models.load_model(model_path)
        img = image.load_img(img_path, target_size=IMG_SIZE)
        img_array = image.img_to_array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)

        # Predict
        pred_score = model.predict(img_array)[0,0]

        # Logic: If score > 0.5, it is class 1. If < 0.5, it is class 0.
        if pred_score > 0.5:
            label = class_map[1] # Muffin
            confidence = pred_score
        else:
            label = class_map[0] # Chihuahua
            confidence = 1.0 - pred_score

        print(f"Prediction: {label.upper()} (Confidence: {confidence:.2f})")

    except Exception as e:
        print(f"Error predicting {img_path}: {e}")

In [56]:
base_path = '/content/custom_data/test'
chi_dir = os.path.join(base_path, 'pebbles')
muf_dir = os.path.join(base_path, 'shells')

# 2. Create the specific list of 10 images
image_paths = [
    # Pebbles images
    os.path.join(chi_dir, "Pebbles (342).jpg"),

    # Shells images
    os.path.join(muf_dir, "Shells (373).jpg"),
]

# 3. Loop through and predict
for img_path in image_paths:
    # Safety check: Only try to predict if the file actually exists
    if os.path.exists(img_path):
        print(f"\n--- Testing file: {os.path.basename(img_path)} ---")
        predict_image(img_path)  # <--- Function is now active!
    else:
        # This handles the placeholders so the loop doesn't break
        print(f"Skipping: {os.path.basename(img_path)} (File not found)")


--- Testing file: Pebbles (342).jpg ---




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Prediction: PEBBLES (Confidence: 0.97)

--- Testing file: Shells (373).jpg ---




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Prediction: SHELLS (Confidence: 0.83)
