#### 1. Importing libraries

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import numpy as np
import os

#### 2. Augmentation for Healthy images

In [None]:
input_folder = 'D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Healthy'
output_folder = 'D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Augmented Healthy'
os.makedirs(output_folder, exist_ok=True)

# Defining ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3]
)

# Loading images and applying augmentation
for img_name in os.listdir(input_folder):
    img_path = os.path.join(input_folder, img_name)
    img = Image.open(img_path)
    img_array = np.array(img)
    img_array = img_array.reshape((1,) + img_array.shape)

    # Generating and saving 10 augmented versions of each image
    i = 0
    for batch in datagen.flow(img_array, batch_size=1, save_to_dir=output_folder,
                              save_prefix=img_name.split('.')[0], save_format='jpg'):
        i += 1
        if i >= 10:  
            break

print("Augmentation complete!")

Augmentation complete!


#### 3. Augmentation for Infected images

In [None]:
input_folder = 'D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Powdery Mildew'
output_folder = 'D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Augmented Powdery Mildew'
os.makedirs(output_folder, exist_ok=True)

# Defining ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3]
)

# Loading images and applying augmentation
for img_name in os.listdir(input_folder):
    img_path = os.path.join(input_folder, img_name)
    img = Image.open(img_path)
    img_array = np.array(img)
    img_array = img_array.reshape((1,) + img_array.shape) 

    # Generating and saving 10 augmented versions of each image
    i = 0
    for batch in datagen.flow(img_array, batch_size=1, save_to_dir=output_folder,
                              save_prefix=img_name.split('.')[0], save_format='jpg'):
        i += 1
        if i >= 10:  
            break

print("Augmentation complete!")

Augmentation complete!


#### 4. Merging Augmented Images with Original images 

In [4]:
import shutil

In [None]:
#Dataset Structure
dataset_dir = 'D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset'
categories = ['Healthy', 'Powdery Mildew']

#Merging augmented images to respective original folders
for category in categories:
    augmented_dir = os.path.join(dataset_dir, f"Augmented {category}")
    original_dir = os.path.join(dataset_dir, category)
    if os.path.exists(augmented_dir):
        for file in os.listdir(augmented_dir):
            shutil.move(os.path.join(augmented_dir, file), original_dir)
        os.rmdir(augmented_dir)
print("Merged augmented images to respective categories")

Merged augmented images to respective categories


#### 5. Splitting Data into Training, validation and Testing

In [10]:
import random
from sklearn.model_selection import train_test_split

In [11]:
output_dir = "D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Split Dataset"
os.makedirs(output_dir, exist_ok=True)

for category in ["Healthy", "Powdery Mildew"]:
    img_list = [os.path.join(dataset_dir, category, img) for img in os.listdir(os.path.join(dataset_dir, category))]
    train_imgs, temp_imgs = train_test_split(img_list, test_size=0.2, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)

    for split, imgs in zip(["train", "validation", "test"], [train_imgs, val_imgs, test_imgs]):
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)
        
        for img in imgs:
            shutil.move(img, os.path.join(output_dir, split, category, os.path.basename(img)))

print("Data split into train, validation and test")

Data split into train, validation and test


#### 6. Loading Data for training with TensorFlow/Keras

In [18]:
train_dir = "D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Split Dataset\train"
val_dir = "D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Split Dataset\validation"
test_dir = "D:\Computer Science Y4\CS Y4S2\Project II\MangoLeafBD Dataset\Split Dataset\test"

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224, 224), batch_size=32, class_mode="binary")
val_generator = train_datagen.flow_from_directory(val_dir, target_size=(224, 224), batch_size=32, class_mode="binary")
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224, 224), batch_size=32, class_mode="binary")

OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'D:\\Computer Science Y4\\CS Y4S2\\Project II\\MangoLeafBD Dataset\\Split Dataset\train'