In [2]:
# this stage is responsible for processing the data
# pre-requsite is the data should be downloaded... locally.
%pwd

'e:\\Projects\\large-class-image-classifcation\\notebooks'

In [2]:
import os
os.chdir("100-class-image-classifaction")

In [2]:
import os
os.getcwd()

'e:\\Projects\\large-class-image-classifcation\\research'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    model_path: Path
    trained_model_path: Path
    model_name: str

    dataset_path: str
    batch_size: int
    seed: int
    rescale: float
    shear_range: float
    zoom_range: float
    width_shift_range: float
    height_shift_range: float
    horizontal_flip: bool
    validation_split: float
    fill_mode: str


In [4]:
from LCIC.constants import *
from LCIC import logger
from LCIC.utils.common import read_yaml, create_directories


In [6]:
class ConfigurationManager:
    def __init__(self,
                 config_path: Path = CONFIG_FILE_PATH,
                 params_path: Path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_path)

        create_directories([self.config.model_training.root_dir])

    def get_model_train_config(self) -> ModelTrainingConfig:

        preprocessed_cfg = self.config.data_processing
        model_cfg = self.config.model_training
        self.params = self.params[model_cfg.model_name]

        logger.info(f"Model configurations: ---> {model_cfg}")
        logger.info(f"Model parameters: ---> {preprocessed_cfg}")
        logger.info(
            f"Data Preprocessing configurations: ---> {preprocessed_cfg}")

        _cfg = ModelTrainingConfig(
            root_dir=Path(model_cfg.root_dir),
            model_path=Path(model_cfg.model_path),
            trained_model_path=Path(model_cfg.trained_model_path),
            model_name=model_cfg.model_name,

            dataset_path=input(
                "Enter the data path: ") if preprocessed_cfg.dataset_path is None else preprocessed_cfg.dataset_path,
            batch_size=preprocessed_cfg.batch_size,
            seed=preprocessed_cfg.seed,
            rescale=preprocessed_cfg.rescale,
            shear_range=preprocessed_cfg.shear_range,
            zoom_range=preprocessed_cfg.zoom_range,
            width_shift_range=preprocessed_cfg.width_shift_range,
            height_shift_range=preprocessed_cfg.height_shift_range,
            horizontal_flip=preprocessed_cfg.horizontal_flip,
            validation_split=preprocessed_cfg.validation_split,
            fill_mode=preprocessed_cfg.fill_mode,

        )
        return _cfg

In [9]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
class DataPreprocessing():
    def __init__(self,
                 config: ModelTrainingConfig,
                 params_path: Path = PARAMS_FILE_PATH):
        self.config = config
        self.params = read_yaml(params_path)
        # setting the params for to have the target size and classes of the model
        self.params = self.params[self.config.model_name]

        self.train_generator = None
        self.valid_generator = None

    def __generator(self):
        _train_datagen = ImageDataGenerator(
            rescale=self.config.rescale,
            shear_range=self.config.shear_range,
            zoom_range=self.config.zoom_range,
            width_shift_range=self.config.width_shift_range,
            height_shift_range=self.config.height_shift_range,
            horizontal_flip=self.config.horizontal_flip,
            validation_split=self.config.validation_split,
            fill_mode=self.config.fill_mode,
            batch_size=self.config.batch_size
        )

        _test_datagen = ImageDataGenerator(rescale=1./255)

        return _train_datagen, _test_datagen

    def get_train_and_valid_set(self):
        train_datagen, val_datagen = self.__generator()

        training_set = train_datagen.flow_from_directory(
            directory=self.config.dataset_path,
            target_size=self.params.IMAGE_SIZE,
            color_mode='rgb',
            classes=self.params.CLASSES,
            class_mode='categorical',
            batch_size=self.config.batch_size,
            shuffle=True,
            seed=self.config.seed,
            interpolation='nearest',
            subset="training"
        )

        validation_set = val_datagen.flow_from_directory(
            directory=self.config.dataset_path,
            target_size=self.params.IMAGE_SIZE,
            color_mode='rgb',
            classes=self.params.CLASSES,
            class_mode='categorical',
            batch_size=self.config.batch_size,
            interpolation='nearest',
            subset="validation"
        )

        self.train_generator =  training_set
        self.valid_generator = validation_set

        return self.train_generator, self.valid_generator

In [None]:
class Training():
    def __init__(self, config: ModelTrainingConfig,
                 params_path: Path = PARAMS_FILE_PATH) -> None:
        self.config = config
        self.params = read_yaml(params_path)

        self.training_data, self.validation_data = DataPreprocessing(
            config=self.config).get_train_and_valid_set()

        self.trains_steps = self.training_data.samples // self.config.batch_size
        self.validation_steps = self.validation_data.samples // self.config.batch_size
        self.model = tf.keras.load_model(self.config.model_path)
        self.history = None

    @staticmethod
    def save_model(model: tf.keras.Model, path: Path):
        model.save(path)

    def __getoptimizer(self, optimizer_name: str):
        if optimizer_name == "adam":
            return tf.keras.optimizers.Adam(learning_rate=self.params.LEARNING_RATE, beta_1=0.9, beta_2=0.999, amsgrad=False)
        elif optimizer_name == "rmsprop":
            return tf.keras.optimizers.RMSprop(learning_rate=self.params.LEARNING_RATE, rho=0.9)
        elif optimizer_name == "sgd":
            return tf.keras.optimizers.SGD(learning_rate=self.params.LEARNING_RATE, momentum=0.0, nesterov=False)

    def train(self, callbacks_list: list = [], save_model: bool = True, gethistory: bool = True):

        self.model.compile(optimizer=self.__getoptimizer(self.params.OPTIMIZER),
                           loss=tf.keras.losses.CategoricalCrossentropy(),
                           metrics=['accuracy'])

        self.history = self.model.fit(self.training_data,
                                      steps_per_epoch=self.trains_steps,
                                      epochs=self.params.EPOCHS,
                                      validation_data=self.validation_data,
                                      validation_steps=self.validation_steps,
                                      callbacks=callbacks_list)
        if save_model:
            self.save_model(self.model, self.config.trained_model_path)
        if gethistory:
            return self.history

In [None]:
# try:
#     config_mgr = ConfigurationManager()
#     preprocess_config = config_mgr.get_preprocess_config()
#     data_preprocessing = DataPreprocessing(preprocess_config)
#     training_set, validation_set = data_preprocessing.get_train_and_valid_set()
# except Exception as e:
#     raise e

In [None]:
try:
    config_mgr = ConfigurationManager()
    preprocess_config = config_mgr.get_model_train_config()
    training = Training(preprocess_config)
    training.train()
except Exception as e:
    raise e