In [1]:
import os
os.chdir("../")
%pwd

'e:\\Deep Learning\\TENSORFLOW\\rice_image_detection'

In [2]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen= True)
class DataPreparationConfig:
    root_dir: Path
    data_dir: Path
    train_dir: Path
    test_dir: Path
    val_dir: Path

In [3]:
from src.RICE_IMAGE_DETECTION.constants import *
from src.RICE_IMAGE_DETECTION.utils.common import read_yaml, create_directories

In [4]:
class ConfigureationManager:
    def __init__(self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_preparation_config(self) -> DataPreparationConfig:
        config = self.config.data_preparation

        create_directories([config.root_dir])

        data_preparation_config = DataPreparationConfig(
            root_dir = config.root_dir,
            data_dir = config.data_dir,
            train_dir = config.train_dir,
            test_dir = config.test_dir,
            val_dir = config.val_dir
        )

        return data_preparation_config

In [5]:
import os
import shutil
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from src.RICE_IMAGE_DETECTION import logger

In [7]:
import sys
import time
import threading

In [8]:
class DataPreparation:
    def __init__(self, config: DataPreparationConfig):
        self.config = config
        self.stop_animation = False
        self.current_message = ""
        self.animation_lock = threading.Lock()
    
    def making_traintest_folder(self):
        try:
            train_folder = self.config.train_dir
            test_folder = self.config.test_dir
            val_folder = self.config.val_dir
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(test_folder, exist_ok=True)
            os.makedirs(val_folder, exist_ok=True)
            logger.info("Created validation, test, and train folders")
        except Exception as e:
            raise e

    def animate(self):
        symbols = ['-', '\\', '|', '/']
        i = 0
        while not self.stop_animation:
            with self.animation_lock:
                message = self.current_message
            sys.stdout.write(f"\rSplitting the Data {symbols[i % len(symbols)]}")
            sys.stdout.flush()
            time.sleep(0.1)
            i += 1

    def split_data(self):
        data_path = self.config.data_dir
        class_names = os.listdir(data_path)
        train_folder = self.config.train_dir
        test_folder = self.config.test_dir
        val_folder = self.config.val_dir

        self.stop_animation = False
        animation_thread = threading.Thread(target=self.animate)
        animation_thread.start()

        try:
            for class_name in class_names:
                class_path = os.path.join(data_path, class_name)
                if not os.path.isdir(class_path):
                    continue

                # List all files in the class directory
                files = os.listdir(class_path)
                files = [os.path.join(class_path, f) for f in files if os.path.isfile(os.path.join(class_path, f))]

                # Split the files into training and the remaining set
                train_files, remaining_files = train_test_split(files, test_size=0.3, random_state=42)
                # Split the remaining files into validation and testing sets
                val_files, test_files = train_test_split(remaining_files, test_size=1/3, random_state=42)

                # Create class directories in train, validation, and test folders
                train_class_folder = os.path.join(train_folder, class_name)
                val_class_folder = os.path.join(val_folder, class_name)
                test_class_folder = os.path.join(test_folder, class_name)
                os.makedirs(train_class_folder, exist_ok=True)
                os.makedirs(val_class_folder, exist_ok=True)
                os.makedirs(test_class_folder, exist_ok=True)

                # Move the files to the respective directories
                for file in train_files:
                    shutil.copy(file, train_class_folder)
                for file in val_files:
                    shutil.copy(file, val_class_folder)
                for file in test_files:
                    shutil.copy(file, test_class_folder)
        
            logger.info("The data has been split into train, validation, and test sets")
        finally:
            self.stop_animation = True
            animation_thread.join()
            sys.stdout.write("\rSplitting data complete.          \n")
            sys.stdout.flush()

    def train_test_set(self):
        train_folder = self.config.train_dir
        test_folder = self.config.test_dir
        train_datagen = ImageDataGenerator(
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest',
            rescale=1./255)
        test_datagen = ImageDataGenerator(rescale=1./255)
        logger.info(f"-------The train and test datagen created-------")
        training_set = train_datagen.flow_from_directory(
            train_folder,
            target_size=(64, 64),
            batch_size=32,
            class_mode='categorical',
            shuffle=True)
        test_set = test_datagen.flow_from_directory(
            test_folder,
            target_size=(64, 64),
            batch_size=32,
            class_mode='categorical',
            shuffle=False)  # Ensure the order of the test set remains the same
        logger.info(f"-------The test and train set created-------")
        return training_set, test_set

In [9]:
try:
    config = ConfigureationManager()
    data_preparation_config = config.get_data_preparation_config()
    data_preparation = DataPreparation(config=data_preparation_config)
    data_preparation.making_traintest_folder()
    data_preparation.split_data()
    training_set, test_set = data_preparation.train_test_set()
except Exception as e:
    raise e

[2024-07-02 13:05:55,869: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-02 13:05:55,885: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-02 13:05:55,886: INFO: common: created directory at: artifacts]
[2024-07-02 13:05:55,886: INFO: common: created directory at: artifacts/data_preparation]
[2024-07-02 13:05:55,886: INFO: 2474412924: Created validation, test, and train folders]
Splitting the Data \[2024-07-02 13:24:41,538: INFO: 2474412924: The data has been split into train, validation, and test sets]
Splitting data complete.          
[2024-07-02 13:24:41,573: INFO: 2474412924: -------The train and test datagen created-------]
Found 52500 images belonging to 5 classes.
Found 7500 images belonging to 5 classes.
[2024-07-02 13:24:45,590: INFO: 2474412924: -------The test and train set created-------]
