In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [3]:
import os
import splitfolders
from PIL import Image
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

## 1. Augmentation
* Rotation
* Width Shift
* Height Shift
* Shear
* Zoom
* Flip

In [8]:
disease_names = ['Canker', 'Curling', 'Leaf_spot', 'Nutritional Deficiency', 'Powdery Mildew', 'Rust']

# root_directory = 'Raw_Datas'
# root_output_directory = 'Augmented'
root_directory = '/content/drive/MyDrive/Raw_Datas'
root_output_directory = '/content/drive/MyDrive/Raw_Datas/Augmented_Dataset'

# Get a list of all directories inside the root directory excluding .ipynb_checkpoints
subdirectories = [f.path for f in os.scandir(root_directory) if f.is_dir() and f.name != '.ipynb_checkpoints']

# Generate the source_directories list using list comprehension
source_directories = subdirectories

output_directories = [os.path.join(root_output_directory,
                                   f"{disease.lower().replace(' ', '_')}") for disease in disease_names]

# Create the output directories if they don't exist
for out_dir in output_directories:
    os.makedirs(out_dir, exist_ok=True)

# Define the data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Iterate over source directories
for source_dir, output_dir in zip(source_directories, output_directories):
    image_file_names = [f for f in os.listdir(source_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    # Generate and save augmented images for each image in the source directory
    for img_name in image_file_names:
        img_path = os.path.join(source_dir, img_name)

        # Resize the image to 512x512
        img = load_img(img_path, target_size=(512, 512))

        img_array = img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)

        aug_iter = datagen.flow(
            x=img_array,
            batch_size=1,
            save_to_dir=output_dir,
            save_prefix='aug',
            save_format='jpg'
        )

        n_augmented_images = 10
        for _ in range(n_augmented_images):
            batch = aug_iter.next()


## 2. Split Data Into Training, Test and Validation Set
* 70% Train
* 15% Test
* 15% Validation

In [9]:
input_folder = "/content/drive/MyDrive/Raw_Datas/Augmented_Dataset"

splitfolders.ratio(input_folder, output="Output_Datas", seed=42, ratio=(.7, .15, .15),
                  group_prefix=None)

Copying files: 4225 files [00:00, 6024.96 files/s]
