In [None]:
# import system libs
import os
import time
import shutil
import pathlib
import itertools
import random

In [None]:
# import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report



In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
image_size = 256
batch_size = 32
channels = 3

In [None]:
dataset_path = '/content/drive/MyDrive/dataset1/tomato1'
split_dataset_path = '/content/drive/MyDrive/dataset1/splitnew'
class_names = [
    'Tomato___Tomato_Yellow_Leaf_Curl_Virus',
    'Tomato___Late_blight',
    'Tomato___Spider_mites Two-spotted_spider_mite',
    'Tomato___Septoria_leaf_spot',
    'Tomato___Early_blight',
    'Tomato___Bacterial_spot',
    'Tomato___Target_Spot',
    'Tomato___Leaf_Mold',
    'Tomato___Tomato_mosaic_virus',
    'Tomato___healthy'
]


In [None]:
# Split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

In [None]:
# Create directories for the split dataset
for subset in ['train', 'val', 'test']:
    for class_name in class_names:
        os.makedirs(os.path.join(split_dataset_path, subset, class_name), exist_ok=True)

In [None]:
# Function to split and copy images
def split_and_copy_images(class_path, class_name):
    images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
    random.shuffle(images)

    train_count = int(len(images) * train_ratio)
    val_count = int(len(images) * val_ratio)

    train_images = images[:train_count]
    val_images = images[train_count:train_count + val_count]
    test_images = images[train_count + val_count:]

    # Copy images to the respective directories
    for image in train_images:
        shutil.copy(os.path.join(class_path, image), os.path.join(split_dataset_path, 'train', class_name, image))
    for image in val_images:
        shutil.copy(os.path.join(class_path, image), os.path.join(split_dataset_path, 'val', class_name, image))
    for image in test_images:
        shutil.copy(os.path.join(class_path, image), os.path.join(split_dataset_path, 'test', class_name, image))

# Split and copy the dataset
for class_name in class_names:
    class_path = os.path.join(dataset_path, class_name)
    if os.path.exists(class_path):
        split_and_copy_images(class_path, class_name)
        print(f"Class '{class_name}' images split into train, validation, and test sets.")
    else:
        print(f"Directory does not exist: {class_path}")


Class 'Tomato___Tomato_Yellow_Leaf_Curl_Virus' images split into train, validation, and test sets.
Class 'Tomato___Late_blight' images split into train, validation, and test sets.
Class 'Tomato___Spider_mites Two-spotted_spider_mite' images split into train, validation, and test sets.
Class 'Tomato___Septoria_leaf_spot' images split into train, validation, and test sets.
Class 'Tomato___Early_blight' images split into train, validation, and test sets.
Class 'Tomato___Bacterial_spot' images split into train, validation, and test sets.
Class 'Tomato___Target_Spot' images split into train, validation, and test sets.
Class 'Tomato___Leaf_Mold' images split into train, validation, and test sets.
Class 'Tomato___Tomato_mosaic_virus' images split into train, validation, and test sets.
Class 'Tomato___healthy' images split into train, validation, and test sets.


In [None]:

# Paths to the split datasets
split_dataset_path = '/content/drive/MyDrive/dataset1/splitnew'
train_dir = os.path.join(split_dataset_path, 'train')
val_dir = os.path.join(split_dataset_path, 'val')
test_dir = os.path.join(split_dataset_path, 'test')

In [None]:
# Create an ImageDataGenerator for normalization only
datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
# Load and split the data
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

Found 14000 images belonging to 10 classes.
Found 3000 images belonging to 10 classes.
Found 3000 images belonging to 10 classes.
