In [1]:
#Importing modules
import tensorflow as tf
import numpy as np
import seaborn as sns
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
import os
from tensorflow.keras import models, layers
from PIL import Image


In [2]:
import tensorflow as tf; print(tf.__version__)

In [3]:
#using tensforlow's dataset to download images into tf.data.dataset
STD_IMG_SIZE= 256 #constant image size
BATCH_SIZE= 32
EPISODES= 2
CHANNELS=3

#storing images into dataset

tomato_image_dataset= tf.keras.preprocessing.image_dataset_from_directory(
    "dataset_images", shuffle=True, image_size=(STD_IMG_SIZE, STD_IMG_SIZE), batch_size= BATCH_SIZE
)

In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data augmentation parameters
train_data_augmentation = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)

# Create a data generator for training images
train_data_generator = train_data_augmentation.flow_from_directory(
    "dataset/train",
    target_size=(STD_IMG_SIZE, STD_IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="sparse"
)


In [6]:
train_data_generator.class_indices

In [7]:
class_names = list(train_data_generator.class_indices.keys())
class_names

In [8]:
from PIL import Image
!pip3 install Pillow==6.0.0

In [9]:
for image_batch, label_batch in train_data_generator:
    print(image_batch.shape)
    break

In [10]:
# Define data augmentation parameters for validation dataset
validation_data_augmentation = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)

# Create a data generator for validation dataset
validation_data_generator = train_data_augmentation.flow_from_directory(
    "dataset/val",
    target_size=(STD_IMG_SIZE, STD_IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="sparse"
)


In [11]:
# Define data augmentation parameters for test dataset
test_data_augmentation = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)

# Create a data generator for validation dataset
test_data_generator = train_data_augmentation.flow_from_directory(
    "dataset/test",
    target_size=(STD_IMG_SIZE, STD_IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="sparse"
)


In [12]:
# Define the input shape for the model, including batch size (32), image dimensions (STD_IMG_SIZE x STD_IMG_SIZE), and color channels (3 for RGB).
input_shape = (STD_IMG_SIZE, STD_IMG_SIZE, CHANNELS) 


# Define the Sequential model with data augmentation and resizing/rescaling layers.
model = models.Sequential([
    layers.InputLayer(input_shape=input_shape),

    # Add Convolutional layers with ReLU activation for feature extraction.
    layers.Conv2D(32, kernel_size = (3, 3), activation="relu"),
    layers.MaxPooling2D((2, 2)),  # Apply MaxPooling for downsampling.

    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # Additional Conv2D layer.
    layers.MaxPooling2D((2, 2)),  # MaxPooling after the second Conv2D layer.

    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # Another Conv2D layer.
    layers.MaxPooling2D((2, 2)),  # MaxPooling after the third Conv2D layer.

    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # Fourth Conv2D layer.
    layers.MaxPooling2D((2, 2)),  # MaxPooling after the fourth Conv2D layer.

    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # Fifth Conv2D layer.
    layers.MaxPooling2D((2, 2)),  # MaxPooling after the fifth Conv2D layer.

    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),  # Fifth Conv2D layer.
    layers.MaxPooling2D((2, 2)),  # MaxPooling after the fifth Conv2D layer.

    layers.Flatten(),  # Flatten the 2D feature maps into a 1D vector for Dense layers.
    
    layers.Dense(64, activation="relu"),  # Dense layer with ReLU activation.
    layers.Dense(3, activation="softmax"),  # Final Dense layer for classification.
])


In [13]:
model.summary()

In [14]:
#this prints the total number of batches that has been produced by tensorflow input pipeline
# every batch is of size 32 as we have mentioned in global variable. Thus, 32 batches multiplied by len value equals to total no. of images
len(tomato_image_dataset)

In [16]:
# Set figure size and spacing between subplots
plt.figure(figsize=(12, 8))
plt.subplots_adjust(wspace=0.4, hspace=0.4)

# Displaying few images to check if the labels correspond correctly to the images
for batch, labels in tomato_image_dataset.take(1):
    for i in range(15):  # Displaying 15 different shuffled images
        ax = plt.subplot(3, 5, i + 1)  # Adjust subplot layout to 3x5 grid
        plt.imshow(batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

plt.show() 

👇
model.compile sets up the training configuration for the model, including how it learns (optimizer), how it measures performance (loss function), and what metrics to track during training. Each parameter is carefully chosen to optimize model training and improve classification accuracy.

In [17]:
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy"]
)

In [None]:
# Train the model using model.fit()
epoch_history = model.fit(
    train_data_generator,
    epochs=EPISODES,
    batch_size=BATCH_SIZE,
    verbose=1,
    validation_data=validation_data_generator
)

In [None]:
scores = model.evaluate(test_generator)

In [None]:

# Set the style and color palette
sns.set_style("whitegrid")
sns.set_palette("Set2")

# Handle infinite values in epoch_history
epoch_history.history["accuracy"] = np.where(np.isinf(epoch_history.history["accuracy"]), np.nan, epoch_history.history["accuracy"])
epoch_history.history["val_accuracy"] = np.where(np.isinf(epoch_history.history["val_accuracy"]), np.nan, epoch_history.history["val_accuracy"])
epoch_history.history['loss'] = np.where(np.isinf(epoch_history.history['loss']), np.nan, epoch_history.history['loss'])
epoch_history.history['val_loss'] = np.where(np.isinf(epoch_history.history['val_loss']), np.nan, epoch_history.history['val_loss'])

# Create a figure for displaying graphs
plt.figure(figsize=(12, 6))

# Plotting Training and Validation Accuracy
plt.subplot(1, 2, 1)
sns.lineplot(x=range(len(epoch_history.history["accuracy"])), y=epoch_history.history["accuracy"], label='Training Accuracy')
sns.lineplot(x=range(len(epoch_history.history["val_accuracy"])), y=epoch_history.history["val_accuracy"], label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

# Plotting Training and Validation Loss
plt.subplot(1, 2, 2)
sns.lineplot(x=range(len(epoch_history.history['loss'])), y=epoch_history.history['loss'], label='Training Loss')
sns.lineplot(x=range(len(epoch_history.history['val_loss'])), y=epoch_history.history['val_loss'], label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


👇
using expand_dims to add an extra dimension at index 0 creates a batch of one image, which is necessary when working with models that expect input data in batches, even if you're processing a single image. This ensures compatibility between the input data shape and the model's input requirements.

Why It's Necessary:
Machine learning models, especially deep learning models, are often designed to process data in batches for efficiency and parallelization.
Even if you're working with a single image during inference (making predictions), the model expects input data in batch format.
Adding this extra dimension ensures that the input data conforms to the expected batch format, allowing the model to process the image correct

ly.

In [None]:
def predict(trained_model, input_image):
    # Convert the input image to an array and add a batch dimension
    input_image_array = tf.expand_dims(tf.keras.preprocessing.image.img_to_array(input_image), 0)
    
    # Make predictions using the trained model
    predictions = trained_model.predict(input_image_array)

    # Extract predicted class and confidence
    predicted_class = classes_dataset[np.argmax(predictions[0])]
    confidence = round(100 * np.max(predictions[0]), 2)
    
    return predicted_class, confidence


## Running prediction on few images

In [None]:
# Create a figure for displaying images
plt.figure(figsize=(18,18))

# Iterate through the first batch of images and labels in the test dataset
for batch_images, batch_labels in testing_data.take(1):
    # Display only 15 images
    for i in range(15):
        # Create a subplot for each image
        ax = plt.subplot(3,5, i + 1)
        
        # Display the image
        plt.imshow(batch_images[i].numpy().astype("uint8"))
        
        # Get the predicted class and confidence
        predicted_class, confidence = predict(model, batch_images[i].numpy())
        
        # Get the actual class from class_names using labels
        actual_class = classes_dataset[batch_labels[i]] 
        
        # Set the title with actual class, predicted class, and confidence
        plt.title(f"Actual: {actual_class},\nPredicted: {predicted_class}.\nConfidence: {confidence}%")
        
        # Turn off axis for cleaner display
        plt.axis("off")

# Show the plot
plt.show()


## Saving the model

In [None]:
import os
import tensorflow as tf

# Define the directory to save models
model_dir = "../model_versions"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

# Find the maximum version number in the directory
versions = [int(i.split('_')[1].split('.')[0]) for i in os.listdir(model_dir) if i.startswith('v_')]
latest_version = max(versions + [0])
new_version = latest_version + 1

# Save the model in SavedModel format
saved_model_dir = f"{model_dir}/v_{new_version}"
tf.saved_model.save(model, saved_model_dir)

# Optionally, save a copy of the model in .h5 format
model.save(f"{saved_model_dir}/model_v{new_version}.h5")
