In [1]:
import os

In [2]:
%pwd

'c:\\Users\\DELL\\Documents\\Data Science\\Projects\\Chicken_Disease_Classification\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\DELL\\Documents\\Data Science\\Projects\\Chicken_Disease_Classification'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list



@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [6]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

]


In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config
    

    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.train_data_dir)
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )

        return training_config

In [8]:
import time

In [9]:
class PrepareCallback:
    def __init__(self, config: PrepareCallbacksConfig):
        self.config = config


    
    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}",
        )
        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
    

    @property
    def _create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=str(self.config.checkpoint_model_filepath),
            save_best_only=True
        )


    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]


In [10]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [11]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )
    
    def train_valid_generator(self):

        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False, # Whether the images in the validation set should be shuffled before each epoch
            **dataflow_kwargs
        )
        
        # flow_from_directory returns:
        # A DirectoryIterator yielding tuples of (x, y) where x is a numpy array containing
        # a batch of images with shape (batch_size, *target_size, channels) and y is a numpy 
        # array of corresponding labels.

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=60,
                horizontal_flip=True,
                width_shift_range=0.3,
                height_shift_range=0.3,
                shear_range=0.2,
                zoom_range=0.4,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)


    def train(self, callback_list: list):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator,
            callbacks=callback_list
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

In the above code snippet, it looks like the data is split into training and validation sets using the subset parameter of the flow_from_directory method. The shuffle parameter is then used to control whether the images within each subset (training or validation) are shuffled before each epoch during training.

Let me clarify how the split and shuffling happen in this code:

Data Splitting:

subset="training": This creates a generator (self.train_generator) for the training subset of images. The training subset typically includes the portion of the data not used for validation. The specific images included in the training set are determined by the order in which they are found in the specified directory (self.config.training_data).

subset="validation": This creates a generator (self.valid_generator) for the validation subset of images. The validation subset includes the portion of the data specified by the validation_split parameter (20% in your case). The specific images included in the validation set are also determined by the order in which they are found in the specified directory.

Shuffling:

shuffle=True for the training set (self.train_generator): This means that the order of images in the training set will be randomized before each epoch during training. This is beneficial for preventing the model from memorizing the order of the training data and helps in achieving better generalization.

shuffle=False for the validation set (self.valid_generator): This means that the order of images in the validation set will remain constant across epochs. This is often done during validation to ensure consistent evaluation results and facilitate comparisons between different models or training runs.

In summary, the data splitting is done using the subset parameter, and shuffling is controlled by the shuffle parameter. The training set is shuffled before each epoch, while the validation set remains constant in order across epochs.

In [12]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e

[2023-12-03 21:24:08,496: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-03 21:24:08,502: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-03 21:24:08,504: INFO: common: created directory at: artifacts]
[2023-12-03 21:24:08,508: INFO: common: created directory at: artifacts\prepare_callbacks\checkpoint_dir]
[2023-12-03 21:24:08,510: INFO: common: created directory at: artifacts\prepare_callbacks\tensorboard_log_dir]
[2023-12-03 21:24:08,521: INFO: common: created directory at: artifacts\training]


]
]
Found 96 images belonging to 2 classes.
Found 384 images belonging to 2 classes.
Epoch 1/15
]


  saving_api.save_model(


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [12]:
# Without Augmentation and additional layers
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e

[2023-12-02 22:27:12,306: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-02 22:27:12,312: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-02 22:27:12,315: INFO: common: created directory at: artifacts]
[2023-12-02 22:27:12,316: INFO: common: created directory at: artifacts\prepare_callbacks\checkpoint_dir]
[2023-12-02 22:27:12,319: INFO: common: created directory at: artifacts\prepare_callbacks\tensorboard_log_dir]
[2023-12-02 22:27:12,334: INFO: common: created directory at: artifacts\training]
Found 78 images belonging to 2 classes.
Found 312 images belonging to 2 classes.
Epoch 1/15

  saving_api.save_model(


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [12]:
# With additional dense layers
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e

[2023-12-02 23:08:16,444: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-02 23:08:16,451: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-02 23:08:16,454: INFO: common: created directory at: artifacts]
[2023-12-02 23:08:16,457: INFO: common: created directory at: artifacts\prepare_callbacks\checkpoint_dir]
[2023-12-02 23:08:16,460: INFO: common: created directory at: artifacts\prepare_callbacks\tensorboard_log_dir]
[2023-12-02 23:08:16,465: INFO: common: created directory at: artifacts\training]
Found 78 images belonging to 2 classes.
Found 312 images belonging to 2 classes.
Epoch 1/15

  saving_api.save_model(


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
