# Dataset with Augmentation

## Details about Notebook

This Notebook has all the steps to Augmentat the Raw Dataset for training a model.

### Importing Libraries

In [None]:
import os
import math
import json
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Conv2D,
    MaxPool2D,
    Flatten,
    Dense,
    Dropout,
    GlobalAveragePooling2D,
    BatchNormalization,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import image_dataset_from_directory, load_img, img_to_array
from tensorflow.keras.applications import InceptionV3
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay
)

In [None]:
# Global Variables
IMAGE_SIZE = 224
BATCH_SIZE = 32

### Importing Dataset

#### Dataset Preparation

In [None]:
# Paths to the dataset folders
TRAIN_DIR = "../datasets/cropped_plant_village_dataset/train"
VALID_DIR = "../datasets/cropped_plant_village_dataset/valid"
SAMPLE_IMAGE = "../datasets/cropped_plant_village_dataset/sample_image.JPG"

##### Training Set

In [None]:
training_set = image_dataset_from_directory(
    TRAIN_DIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
)

##### Validation Set

In [None]:
validation_set = tf.keras.utils.image_dataset_from_directory(
    VALID_DIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
)

In [None]:
test_set = tf.keras.utils.image_dataset_from_directory(
    VALID_DIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=1,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    shuffle=False,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
)

#### Dataset Details

In [None]:
# Function to count images in each class
def count_images_in_classes(dataset_dir):
    """
    Counts the number of images in each class within a dataset directory.

    Args:
        dataset_dir (str): The path to the dataset directory.

    Returns:
        dict: A dictionary where the keys are the class names and the values are the number of images in each class.
    """
    class_counts = {}
    for class_name in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_path):
            class_counts[class_name] = len(os.listdir(class_path))
    return class_counts


# Count images in training and validation sets
train_class_counts = count_images_in_classes(TRAIN_DIR)
valid_class_counts = count_images_in_classes(VALID_DIR)

# Create a DataFrame for better visualization
df = pd.DataFrame(
    {
        "Class": list(train_class_counts.keys()),
        "Training Images": list(train_class_counts.values()),
        "Validation Images": 
            list(valid_class_counts.values())
        # [
            # valid_class_counts.get(cls, 0) for cls in train_class_counts.keys()
        # ],
    }
).sort_values(by="Class", ascending=True)

In [None]:
# Display the DataFrame
print(df)

In [None]:
# Plot the class distribution with adjustments for readability
df.plot(
    x="Class", kind="bar", stacked=True, figsize=(20, 8), title="Class Distribution"
)
plt.ylabel("Number of Images")
plt.xlabel("Class")

# Rotate x-ticks for better readability
plt.xticks(rotation=90, ha="center")

# Adjust layout to prevent clipping of labels
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Function to visualize one image per class
def visualize_sample_images(dataset_dir):
    """
    Visualizes one sample image per class in the dataset directory.

    Args:
        dataset_dir (str): The path to the dataset directory.

    Displays a grid of images, with one image per class, using matplotlib.
    """
    class_names = os.listdir(dataset_dir)
    class_names.sort()  # Sort for consistent order

    # Calculate the number of rows and columns for the subplot grid
    num_classes = len(class_names)
    num_cols = 5  # You can adjust this number
    num_rows = math.ceil(num_classes / num_cols)

    plt.figure(figsize=(num_cols * 3, num_rows * 3))
    for i, class_name in enumerate(class_names, start=1):
        class_path = os.path.join(dataset_dir, class_name)
        image_path = os.path.join(
            class_path, os.listdir(class_path)[0]
        )  # Get the first image in the class
        img = plt.imread(image_path)

        plt.subplot(num_rows, num_cols, i)  # Adjust grid size dynamically
        plt.imshow(img)
        plt.title(class_name)
        plt.axis("off")

    plt.tight_layout()
    plt.show()

In [None]:
# Visualize sample images from training set
visualize_sample_images(TRAIN_DIR)

### Image Preprocessing

#### Normalization

In [None]:
normalize = tf.keras.layers.Rescaling(1.0 / 255)

# Apply normalization on both Training and Validation set
normalized_training_set = training_set.map(lambda x, y: (normalize(x), y))
normalized_validation_set = validation_set.map(lambda x, y: (normalize(x), y))
normalized_test_set = test_set.map(lambda x, y: (normalize(x), y))

#### Image Augmentation

In [None]:
# Brightness adjustment
brighten = (
    tf.keras.layers.RandomBrightness(
        factor=(-0.1, 0.1),
        value_range=(0.0, 1.0),
    ),
)  # Adjust brightness by ±20%

# Contrast adjustment
add_contrast = (tf.keras.layers.RandomContrast(factor=0.7),)  # Adjust contrast by ±20%

# Rotation
rotate = (
    tf.keras.layers.RandomRotation(
        factor=0.2,
        fill_mode="constant",
        fill_value=0.0,
    ),
)  # Rotate by ±10% (36°)

# Horizontal and vertical flips
flip = (
    tf.keras.layers.RandomFlip(mode="horizontal_and_vertical"),
)  # Flip both horizontally and vertically

# Zoom
zoom = (
    tf.keras.layers.RandomZoom(
        height_factor=(-0.2, 0.2),
        width_factor=(-0.2, 0.2),
        fill_mode="constant",
        fill_value=0.0,
    ),
)  # Zoom in/out by 20%

# Gaussian noise
add_noise = (tf.keras.layers.GaussianNoise(stddev=0.01),)  # Add Gaussian noise

In [None]:
data_augmentation = Sequential(
    [brighten, add_contrast, rotate, flip, zoom, add_noise]
)

# Apply augmentation to the training set
augmented_training_set = normalized_training_set.map(
    lambda x, y: (data_augmentation(x), y)
)

#### Image Enhancement (not implemented)

In [None]:
# Image Enhancements have not strongly proven to increase the accuracy

#### Image Preprocessing Details

##### Augmentation Details

In [None]:
# Test the augmentation pipeline with individual augmentations
def visualize_individual_augmentations(image_path):
    """
    Visualizes the effect of individual augmentations on an input image.

    Args:
        image_path (str): The path to the input image.

    Applies a series of individual augmentations to the input image and displays the results in a grid.
    """
    # Load and preprocess the image
    image = load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))  # Adjust to your image size
    image_array = img_to_array(image) / 255.0  # Normalize to [0, 1]
    image_array = tf.expand_dims(image_array, axis=0)  # Add batch dimension

    # Define individual augmentation layers
    augmentations = [
        ("Original", None),
        (
            "Random Brightness",
            brighten,
        ),
        ("Random Contrast", add_contrast),
        (
            "Random Rotation",
            rotate,
        ),
        ("Random Flip", flip),
        (
            "Random Zoom",
            zoom,
        ),
        ("Gaussian Noise", add_noise),
    ]

    # Apply each augmentation and plot
    plt.figure(figsize=(20, 5))
    for i, (title, layer) in enumerate(augmentations, start=1):
        if layer is None:
            augmented_image = image_array[0]
        else:
            augmented_image = layer(image_array)[0]

        plt.subplot(1, len(augmentations), i)
        plt.imshow(augmented_image.numpy())
        plt.title(title)
        plt.axis("off")

    plt.show()

In [None]:
# Visualize individual augmentation techniques
visualize_individual_augmentations(SAMPLE_IMAGE)

In [None]:
# Test the augmentation pipeline with a sample image
def visualize_augmentation(image_path):
    """
    Visualizes the effect of the augmentation pipeline on a sample image.

    Args:
        image_path (str): The path to the input image.

    Applies the augmentation pipeline to the input image and displays the original image alongside 5 augmented versions.
    """

    # Load and preprocess the image
    image = load_img(
        image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE)
    )  # Adjust to your image size
    image_array = img_to_array(image) / 255.0  # Normalize to [0, 1]
    image_array = tf.expand_dims(image_array, axis=0)  # Add batch dimension

    # Apply augmentations
    augmented_images = [data_augmentation(image_array)[0] for _ in range(5)]

    # Plot original and augmented images
    plt.figure(figsize=(15, 3))
    plt.subplot(1, 6, 1)
    plt.imshow(image_array[0])
    plt.title("Original")
    plt.axis("off")

    for i, aug_img in enumerate(augmented_images, start=2):
        plt.subplot(1, 6, i)
        plt.imshow(aug_img.numpy())
        plt.title(f"Augmented {i-1}")
        plt.axis("off")
    plt.show()

In [None]:
# Visualize the Original vs Augmented Image
visualize_augmentation(SAMPLE_IMAGE)

## Conclusion