<a href="https://colab.research.google.com/github/safwanny1/apples-and-oranges/blob/main/applesandoranges.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Core Libraries
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

# KaggleHub for dataset download
import kagglehub

# Define datasert details
kaggle_dataset_id = "balraj98/apple2orange-dataset"
local_dataset_name = "./AppleOrange_dataset/" # Keeps consistent with block 2

try:
  # Download latest version
  print(f"Attempting to download the latest dataset: {kaggle_dataset_id}")
  path = kagglehub.dataset_download(kaggle_dataset_id)
  print(f"Dataset successfully downloaded to: {path}")
except Exception as e:
  print(f"Error downloading dataset: {e}")
  # Handles the error as appropriate, e.g., exit the script
  exit()

Attempting to download the latest dataset: balraj98/apple2orange-dataset
Downloading from https://www.kaggle.com/api/v1/datasets/download/balraj98/apple2orange-dataset?dataset_version_number=1...


100%|██████████| 74.8M/74.8M [00:00<00:00, 175MB/s]

Extracting files...





Dataset successfully downloaded to: /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1


In [None]:
# Use the local_dataset_name variable from block 1
local_path = f"./{local_dataset_name}/"

# Ensure the local directory exists
os.makedirs(local_path, exist_ok = True)

# Define the path for the new training_set and testing_set directories
training_set_dir_name = "training_set"
testing_set_dir_name = "testing_set"

training_set_path = os.path.join(local_path, training_set_dir_name)
testing_set_path = os.path.join(local_path, testing_set_dir_name)

# Ensure the training_set and testing_set directories exist
os.makedirs(training_set_path, exist_ok = True)
os.makedirs(testing_set_path, exist_ok = True)

# Mapping the source directory names to destination directory names
dataset_structure_map = {
    "trainA" : training_set_path,
    "trainB" : training_set_path,
    "testA" : testing_set_path,
    "testB" : testing_set_path,
  }

# Copy the dataset to the local directory
print(f"Organizing dataset from {path} to {local_path}")
for item in os.listdir(path):
  s = os.path.join(path, item)
  d = dataset_structure_map.get(item) # Use .get() for safe access

  # Only attempt to copy if the item is in our mapping
  if d is not None:
    try:
      if os.path.isdir(s):
        # Create the subdirectory within the destination path
        dest_subdir = os.path.join(d, item)
        print(f"Copying directory {s} to {dest_subdir}")
        shutil.copytree(s, dest_subdir, dirs_exist_ok = True)
      else:
        # For files directly in the downloaded root we want to copy
        print(f"Copying file {s} to {d}")
        shutil.copy2(s, d)
    except Exception as e:
      print(f"Error copying {item}: {e}")

print(f"Dataset organization complete. Saved at: {local_path}")

Organizing dataset from /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1 to ././AppleOrange_dataset//
Copying directory /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1/testB to ././AppleOrange_dataset//testing_set/testB
Copying directory /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1/trainA to ././AppleOrange_dataset//training_set/trainA
Copying directory /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1/trainB to ././AppleOrange_dataset//training_set/trainB
Copying directory /root/.cache/kagglehub/datasets/balraj98/apple2orange-dataset/versions/1/testA to ././AppleOrange_dataset//testing_set/testA
Dataset organization complete. Saved at: ././AppleOrange_dataset//


In [None]:

# from google.colab import drive
# drive.mount('/content/drive')

# Define the base path using the variable from Block 1
base_path = f"/content/{local_dataset_name}/"

# Correcting paths using os.path.join
train_dir = os.path.join(base_path, training_set_dir_name)
test_dir = os.path.join(base_path, testing_set_dir_name)

# Define ImageDataGenerator for augmentation and rescaling
# Rescale the pixel values to be between 0 and 1
# Apply data augemtation to the training set to improve robustness
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20, # Rotate images by up to 20 degrees
    width_shift_range=0.2, # Shift image horizontally by up to 20% of the width
    height_shift_range=0.2, # Shift image vertically by up to 20% of the height
    shear_range=0.2, # Apply shearing transformation
    zoom_range=0.2, # Apply zoom transformation
    horizontal_flip=True, # Flip images horizontally
    fill_mode='nearest' # Fill newly created pixels after transformations
    # You can experiment with adding other augmentation within this section
    # e.g brigntess_range, channel_shift_range
)

# Only rescale the test set
test_datagen = ImageDataGenerator(rescale=1./255)

# Load training dataset
# Target size of 150x150 pixels
# Batch size of 32 used for training
# class_mode = 'binary' is use for binary classification
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode = 'binary'
    # You may adjust batch_size depending on memory
)

# Load testing dataset
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

Found 2014 images belonging to 2 classes.
Found 514 images belonging to 2 classes.


In [None]:
# Define the Sequential model
model = tf.keras.models.Sequential([
    # First convolutional layer with 32 filters, 3x3 kernel, ReLU activation, and input shape
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    # First max pooling layer
    tf.keras.layers.MaxPooling2D(2,2),

    # Second convolutional layer with 64 filters, 3x3 kernel, ReLU activation
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    # Second max pooling layer
    tf.keras.layers.MaxPooling2D(2,2),

    # Third convolutional layer with 128 filters, 3x3 kernel, ReLU activation
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    # Third max pooling layer
    tf.keras.layers.MaxPooling2D(2,2),

    # Flatten the output from the convolutional layers
    tf.keras.layers.Flatten(),
    # First dense layer with 512 units and ReLU activation
    tf.keras.layers.Dense(512, activation='relu'),
    # Output dense layer with 1 unit and sigmoid activation for binary classification
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
# Use binary crossentropy as the loss function for binary classification
# Use the Adam optimizer (consider experimenting with other optimizers and learning rates)
# Evaluate the model based on accuracy
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train the model using the data generators
# Train for a specified number of epochs (consider increasing this for better results)
# Validate the model during training using the testing data
print ("Starting model training...")
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=test_generator
)
print ("Model training has finished")

# Saves model to h5 file
model.save(model_path)

Starting model training...


  self._warn_if_super_not_called()


Epoch 1/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 2s/step - accuracy: 0.7089 - loss: 0.7541 - val_accuracy: 0.8969 - val_loss: 0.2671
Epoch 2/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 2s/step - accuracy: 0.9096 - loss: 0.1979 - val_accuracy: 0.9280 - val_loss: 0.2100
Epoch 3/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 2s/step - accuracy: 0.9370 - loss: 0.1714 - val_accuracy: 0.9222 - val_loss: 0.2365
Epoch 4/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2s/step - accuracy: 0.9451 - loss: 0.1607 - val_accuracy: 0.9241 - val_loss: 0.2216
Epoch 5/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 0.9305 - loss: 0.1789 - val_accuracy: 0.9300 - val_loss: 0.2006
Model training has finished


NameError: name 'model_path' is not defined

In [None]:
# === Visualization of training results ===
print("Visualizing training results...")

# Create a figure with two subplots for accuracy and loss
plt.figure(figsize=(12,5))

# Plot training and validation accuracy
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training and validation loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Adjust layout to prevent overlap and display the plot
plt.tight_layout()
plt.show()

In [None]:
# Model evaulation
print ("Evaluating model on test set...")
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}")

In [None]:
# Load the pre-trained Keras model
model = load_model(model_path)


def predict_image(img_path):
    """
    Predicts the class of a single image (Apple or Orange) and displays the image
    with the prediction.

    Args:
        img_path (str): The path to the image file.
    """
    if not os.path.exists(img_path):
        print(f"Error: Image not found at {img_path}")
        return

    try:
        # Load the image and resize it to the target size
        img = image.load_img(img_path, target_size=(150, 150))
        # Convert the image to a NumPy array
        img_array = image.img_to_array(img)
        # Expand the dimensions to create a batch of size 1 and normalize the pixel values
        img_array = np.expand_dims(img_array, axis=0) / 255.0
        # Make a prediction using the trained model
        prediction = model.predict(img_array)
        # Determine the label based on the prediction threshold (0.5 for binary classification)
        label = "Orange" if prediction[0][0] > 0.5 else "Apple"

        # Display the image with the predicted label
        plt.imshow(img)
        plt.axis("off")
        plt.title(f"Prediction: {label}", fontsize=14, fontweight='bold')
        plt.show()

    except Exception as e:
        print(f"Error processing image {img_path}: {e}")

# Specify the path to an image for prediction
img_path = "/content/AppleOrange_dataset/testing_set/testA/n07740461_11260.jpg"
# Call the predict_image function to predict and display the result
predict_image(img_path)
# The function itself displays the image and prediction, so no print is needed here.