In [1]:
%pwd

'e:\\PROJECTS\\ML\\Brain-Tumor-Image-Classfication\\notebook'

In [2]:
%cd ..


e:\PROJECTS\ML\Brain-Tumor-Image-Classfication


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
%pwd

'e:\\PROJECTS\\ML\\Brain-Tumor-Image-Classfication'

In [4]:
from dataclasses import dataclass
from typing import Tuple
from brainMRI.logging import logger
import tensorflow as tf
from pathlib import Path
import os

@dataclass
class PrepareDatasets:
    data_dir:Path
    save_dir: Path
    validation_split: float
    image_size: tuple[int, int]
    batch_size: int
    labels: str
    subset: str
    seed: int

    def prepare_datasets(self):
        """
        Prepare the training and validation datasets for the machine learning model.

        Returns:
            Tuple[tf.data.Dataset, tf.data.Dataset]: The prepared training and validation datasets.

        Raises:
            None
        """

        AUTOTUNE = tf.data.AUTOTUNE
        logger.info("Loading image datasets from directory: %s", self.data_dir)
        train_dataset, val_dataset = tf.keras.utils.image_dataset_from_directory(
            self.data_dir,
            validation_split=self.validation_split,
            image_size=self.image_size,
            batch_size=self.batch_size,
            labels=self.labels,
            subset=self.subset,
            seed=self.seed,
        )
        self.class_names = train_dataset.class_names
        logger.info("Saving class names to file: %s/class_names.txt", self.save_dir)
        with open(self.save_dir + '/class_names.txt', 'w') as f:
            f.write('\n'.join(self.class_names))

        logger.info("Class names: %s", self.class_names)
        logger.info("Prefetching datasets")
        train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
        val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)

        logger.info("Saving datasets to directory: %s", self.save_dir)
        train_dataset.save(self.save_dir + '/train_dataset')
        val_dataset.save(self.save_dir + '/val_dataset')

        logger.info("Datasets prepared successfully")
        return train_dataset, val_dataset

In [5]:
from brainMRI.constants import *
from brainMRI.utils.helpers import load_config, create_directories
class ConfigHandler:
    def __init__(self, file_path=CONFIG_FILE_PATH, params_path = PARAMS_FILE_PATH):
        self.config = load_config(file_path)
        self.params = load_config(params_path)
        create_directories([self.config.root_dir])

    
    def get_prepare_datasets_config(self) -> PrepareDatasets:
        config = self.config.prepare_datasets
        params = self.params.prepare_datasets
        create_directories([config.save_dir])
        
        prepare_datasets_config = PrepareDatasets(
            data_dir= config.data_dir,
            save_dir= config.save_dir,
            validation_split= params.validation_split,
            image_size= params.image_size,
            batch_size= params.batch_size,
            labels= params.labels,
            subset= params.subset,
            seed= params.seed
        )

        return prepare_datasets_config

In [6]:
try:
    config = ConfigHandler()
    prepare_datasets_config = config.get_prepare_datasets_config()
    prepare_datasets_config.prepare_datasets()
except Exception as e:
    logger.error("Error occurred while preparing datasets: %s", str(e))
    raise e

[2024-06-01 10:00:31,657: INFO: helpers: YAML file: config\config.yaml loaded successfully]
[2024-06-01 10:00:31,852: INFO: helpers: YAML file: params.yaml loaded successfully]
[2024-06-01 10:00:31,856: INFO: helpers: Created directory at: project_outputs]
[2024-06-01 10:00:31,858: INFO: helpers: Created directory at: project_outputs/data/preprocesses_data]
[2024-06-01 10:00:31,864: INFO: 1209089901: Loading image datasets from directory: project_outputs/data/extracted]
Found 253 files belonging to 2 classes.
Using 203 files for training.
Using 50 files for validation.
[2024-06-01 10:00:35,614: INFO: 1209089901: Saving class names to file: project_outputs/data/preprocesses_data/class_names.txt]
[2024-06-01 10:00:35,616: INFO: 1209089901: Class names: ['no', 'yes']]
[2024-06-01 10:00:35,617: INFO: 1209089901: Prefetching datasets]
[2024-06-01 10:00:35,629: INFO: 1209089901: Saving datasets to directory: project_outputs/data/preprocesses_data]
[2024-06-01 10:00:44,584: INFO: 1209089901: 