In [None]:
from PIL import Image
import os

def clean_dataset(dataset_path):
    for class_folder in ["ripe", "unripe"]:
        class_path = os.path.join(dataset_path, class_folder)
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)

            try:
                # Attempt to open the image
                img = Image.open(image_path)
            except (IOError, SyntaxError):
                # Remove corrupt images
                # print(f"Removing corrupt image: {image_path}")
                print(f"Removing currupt image: {image_path}")
                os.remove(image_path)
                continue

            # Additional checks and cleaning steps can be added here

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

clean_dataset(dataset_path)


In [None]:
import os

def rename_images(dataset_path):
    for class_folder in ["ripe", "unripe"]:
        class_path = os.path.join(dataset_path, class_folder)
        
        # Choose a prefix for the renamed images
        prefix = class_folder + "_"
        
        # Start the index from 1
        index = 1
        
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            
            # Check if the file is an image
            _, file_extension = os.path.splitext(image_file)
            if file_extension.lower() not in {".jpg", ".jpeg", ".png"}:
                continue
            
            # Construct the new filename
            new_filename = f"{prefix}{index:04d}{file_extension.lower()}"
            new_path = os.path.join(class_path, new_filename)
            
            # Rename the file
            os.rename(image_path, new_path)
            
            # Increment the index
            index += 1

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"
rename_images(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
import shutil

def split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)
            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Move images to their respective directories
            for img in train_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(train_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

            for img in validation_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(validation_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Split the dataset
split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)
            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Set up generators for each split
            train_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',  # Use 'binary' for binary classification
                subset='training',
                classes=[class_folder]
            )

            validation_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',
                subset='validation',
                classes=[class_folder]
            )

            # Move augmented images to their respective directories
            for _ in range(len(train_images)):
                augmented_image, _ = train_generator.next()
                dest_path = os.path.join(train_dir, class_folder, f"augmented_{len(os.listdir(os.path.join(train_dir, class_folder))) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for _ in range(len(validation_images)):
                augmented_image, _ = validation_generator.next()
                dest_path = os.path.join(validation_dir, class_folder, f"augmented_{len(os.listdir(os.path.join(validation_dir, class_folder))) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)
            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Set up generators for each split
            train_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',  # Use 'binary' for binary classification
                subset='training',
                classes=[class_folder]
            )

            validation_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',
                subset='validation',
                classes=[class_folder]
            )

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for _ in range(len(train_images)):
                augmented_image, _ = train_generator.next()
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for _ in range(len(validation_images)):
                augmented_image, _ = validation_generator.next()
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)
            
            # If the unripe class doesn't have enough images for the split, use all available images
            if class_folder == 'unripe' and len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)
            
            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Set up generators for each split
            train_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',  # Use 'binary' for binary classification
                subset='training',
                classes=[class_folder]
            )

            validation_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',
                subset='validation',
                classes=[class_folder]
            )

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for _ in range(len(train_images)):
                augmented_image, _ = train_generator.next()
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for _ in range(len(validation_images)):
                augmented_image, _ = validation_generator.next()
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

# Replace with your actual dataset path\
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
import shutil

def split_unripe_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)

            # If the unripe class doesn't have enough images for the split, use all available images
            if class_folder == 'unripe' and len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)

            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Move images to their respective directories
            for img in train_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(train_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

            for img in validation_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(validation_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Split the unripe class without augmentation
split_unripe_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)

            # If the class doesn't have enough images for the split, use all available images
            if len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)

            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Set up generators for each split
            train_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',  # Use 'binary' for binary classification
                subset='training',
                classes=[class_folder]
            )

            validation_generator = datagen.flow_from_directory(
                dataset_path,
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary',
                subset='validation',
                classes=[class_folder]
            )

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for _ in range(len(train_images)):
                augmented_image, _ = train_generator.next()
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for _ in range(len(validation_images)):
                augmented_image, _ = validation_generator.next()
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.move(augmented_image[0], dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path):
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)

            # If the class doesn't have enough images for the split, use all available images
            if len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)

            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for img in train_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in validation_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path) and class_folder in ['ripe', 'unripe']:
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)

            # If the class doesn't have enough images for the split, use all available images
            if len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)

            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for img in train_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in validation_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)


In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import shutil

def augment_and_split_dataset(dataset_path, train_size=0.7, validation_size=0.15, test_size=0.15, random_state=42):
    # Create directories for training, validation, and test sets
    train_dir = os.path.join(dataset_path, 'train')
    validation_dir = os.path.join(dataset_path, 'validation')
    test_dir = os.path.join(dataset_path, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Define data augmentation parameters
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=validation_size + test_size
    )

    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        if os.path.isdir(class_path) and class_folder in ['ripe', 'unripe']:
            # Get the list of images for the current class
            images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Check if there are images in the class directory
            if not images:
                print(f"Skipping class '{class_folder}' due to lack of images.")
                continue

            # Split the images into training, validation, and test sets
            train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state)

            # If the class doesn't have enough images for the split, use all available images
            if len(train_images) == 0:
                train_images, test_images = train_test_split(images, test_size=(validation_size + test_size), random_state=random_state, train_size=train_size)

            validation_images, test_images = train_test_split(test_images, test_size=(test_size / (validation_size + test_size)), random_state=random_state)

            # Create destination directories for augmented images
            train_class_dir = os.path.join(train_dir, class_folder)
            os.makedirs(train_class_dir, exist_ok=True)

            validation_class_dir = os.path.join(validation_dir, class_folder)
            os.makedirs(validation_class_dir, exist_ok=True)

            # Move augmented images to their respective directories
            for img in train_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(train_class_dir, f"augmented_{len(os.listdir(train_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in validation_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(validation_class_dir, f"augmented_{len(os.listdir(validation_class_dir)) + 1}.jpg")
                shutil.copy(src_path, dest_path)

            for img in test_images:
                src_path = os.path.join(class_path, img)
                dest_path = os.path.join(test_dir, class_folder, img)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

# Replace with your actual dataset path
dataset_path = "C:/Users/user/Desktop/project/test"

# Augment and split the dataset
augment_and_split_dataset(dataset_path)

# Define the directories for training, validation, and test sets
train_dir = os.path.join(dataset_path, 'train')
validation_dir = os.path.join(dataset_path, 'validation')
test_dir = os.path.join(dataset_path, 'test')

# Use data generators to load and augment the data
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

# Define the model architecture
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // 32,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // 32
)

# Evaluate the model on the test set
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc}")
# Save the model
model.save("tomato_model.h5")
print("Model saved successfully!")


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, model
import seaborn as sns
import matplotlib.pyplot as plt

# Predict on the test set
predictions = model.predict(test_generator)
predicted_labels = (predictions > 0.5).astype(int)

# Get true labels
true_labels = test_generator.classes

# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['ripe', 'unripe'], yticklabels=['ripe', 'unripe'])
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=['ripe', 'unripe']))
