In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
ls

[0m[01;34martifacts[0m/        dvc.yaml  params.yaml       setup.py     [01;34mvenv[0m/
[01;34mChicken_project[0m/  LICENSE   README.md         [01;34msrc[0m/
[01;34mconfig[0m/           [01;34mlogs[0m/     requirements.txt  template.py
[01;31mdata.zip[0m          main.py   [01;34mresearch[0m/         [01;34mtemplates[0m/


# Entity

### callback entity

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class CallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path
    
    batch_size: int   # set batch size for training
    epochs: int   # number of all epochs in training
    patience: int   #number of epochs to wait to adjust lr if monitored value does not improve
    stop_patience: int   # number of epochs to wait before stopping training if monitored value does not improve
    threshold: float   # if train accuracy is < threshold adjust monitor accuracy, else monitor validation loss
    factor: float   # factor to reduce lr by
    ask_epoch: int

In [52]:
@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path # to store the trained model
    base_model_path: Path # load the base model
    data_dir: Path # load data and csv
    csv_dir: Path
    img_size: list
    channels: int
    color: str
    epochs: int
    batch_size: int
    data_gen_path: Path # to save the data generators

# Config manager

In [53]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [54]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_callback_config(self) -> CallbacksConfig:
        config = self.config.callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])
        param_config = self.params

        callback_config = CallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath),
            batch_size = param_config.batch_size,   # set batch size for training
            epochs = param_config.epochs ,  # number of all epochs in training
            patience = param_config.patience,   #number of epochs to wait to adjust lr if monitored value does not improve
            stop_patience = param_config.stop_patience,   # number of epochs to wait before stopping training if monitored value does not improve
            threshold = param_config.threshold ,  # if train accuracy is < threshold adjust monitor accuracy, else monitor validation loss
            factor = param_config.factor,   # factor to reduce lr by
            ask_epoch = param_config.ask_epoch,   # number of epochs to run before asking if you want to halt training
        )

        return callback_config

        
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        params = self.params
        
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chicken-fecal-images")
        create_directories([
            Path(training.root_dir)
        ])


        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            base_model_path=Path(training.base_model_path),
            data_dir=Path(training.data_dir),
            csv_dir=Path(training.csv_dir),
            data_gen_path=Path(training.data_gen_path),
            img_size=params.img_size,
            channels=params.channels,
            color=params.color,
            epochs = params.epochs,
            batch_size=params.batch_size
        )

        return training_config

In [55]:
ConfigurationManager().get_training_config()

[2023-12-10 12:08:29,492: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-12-10 12:08:29,495: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-10 12:08:29,495: INFO: common: created directory at: artifacts]
[2023-12-10 12:08:29,496: INFO: common: created directory at: artifacts/training]


TrainingConfig(root_dir=PosixPath('artifacts/training'), trained_model_path=PosixPath('artifacts/training/trained_model.h5'), base_model_path=PosixPath('artifacts/base_model/base_model.h5'), data_dir=PosixPath('artifacts/data_ingestion/Train/Train'), csv_dir=PosixPath('artifacts/data_ingestion/Train/train_data.csv'), img_size=BoxList([224, 224]), channels=3, color='rgb', epochs=40, batch_size=40, data_gen_path=PosixPath('artifacts/training'))

# Component callback and training

In [56]:
import os
import tensorflow as tf
import keras
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd

In [63]:
class MyCallback(keras.callbacks.Callback):
    def __init__(self, model, train_gen, config: CallbacksConfig):
        super(MyCallback, self).__init__()
        self.config = config
        
        self.model = model
        self.patience = config.patience # specifies how many epochs without improvement before learning rate is adjusted
        self.stop_patience = config.stop_patience # specifies how many times to adjust lr without improvement to stop training
        self.threshold = config.threshold # specifies training accuracy threshold when lr will be adjusted based on validation loss
        self.factor = config.factor # factor by which to reduce the learning rate
        self.epochs = config.epochs
        self.ask_epoch = config.ask_epoch
        self.ask_epoch_initial = config.ask_epoch # save this value to restore if restarting training
        
        self.batches = int(np.ceil(len(train_gen.labels) / config.batch_size))    # number of training batch to run per epoch
        
        # callback variables
        self.count = 0 # how many times lr has been reduced without improvement
        self.stop_count = 0
        self.best_epoch = 1   # epoch with the lowest loss
        self.initial_lr = float(tf.keras.backend.get_value(model.optimizer.lr)) # get the initial learning rate and save it
        self.highest_tracc = 0.0 # set highest training accuracy to 0 initially
        self.lowest_vloss = np.inf # set lowest validation loss to infinity initially
        self.best_weights = self.model.get_weights() # set best weights to model's initial weights
        self.initial_weights = self.model.get_weights()   # save initial weights if they have to get restored

    # Define a function that will run when train begins
    def on_train_begin(self, logs= None):
        msg = 'Do you want model asks you to halt the training [y/n] ?'
        print(msg)
        ans = 'n' # can take input from user
        if ans in ['Y', 'y']:
            self.ask_permission = 1
        elif ans in ['N', 'n']:
            self.ask_permission = 0

        msg = '{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:10s}{9:^8s}'.format('Epoch', 'Loss', 'Accuracy', 'V_loss', 'V_acc', 'LR', 'Next LR', 'Monitor','% Improv', 'Duration')
        print(msg)
        self.start_time = time.time()


    def on_train_end(self, logs= None):
        stop_time = time.time()
        tr_duration = stop_time - self.start_time
        hours = tr_duration // 3600
        minutes = (tr_duration - (hours * 3600)) // 60
        seconds = tr_duration - ((hours * 3600) + (minutes * 60))

        msg = f'training elapsed time was {str(hours)} hours, {minutes:4.1f} minutes, {seconds:4.2f} seconds)'
        print(msg)

        # set the weights of the model to the best weights
        self.model.set_weights(self.best_weights)


    def on_train_batch_end(self, batch, logs= None):
        # get batch accuracy and loss
        acc = logs.get('accuracy') * 100
        loss = logs.get('loss')

        # prints over on the same line to show running batch count
        msg = '{0:20s}processing batch {1:} of {2:5s}-   accuracy=  {3:5.3f}   -   loss: {4:8.5f}'.format(' ', str(batch), str(self.batches), acc, loss)
        print(msg, '\r', end= '')


    def on_epoch_begin(self, epoch, logs= None):
        self.ep_start = time.time()


    # Define method runs on the end of each epoch
    def on_epoch_end(self, epoch, logs= None):
        ep_end = time.time()
        duration = ep_end - self.ep_start

        lr = float(tf.keras.backend.get_value(self.model.optimizer.lr)) # get the current learning rate
        current_lr = lr
        acc = logs.get('accuracy')  # get training accuracy
        v_acc = logs.get('val_accuracy')  # get validation accuracy
        loss = logs.get('loss')  # get training loss for this epoch
        v_loss = logs.get('val_loss')  # get the validation loss for this epoch

        if acc < self.threshold: # if training accuracy is below threshold adjust lr based on training accuracy
            monitor = 'accuracy'
            if epoch == 0:
                pimprov = 0.0
            else:
                pimprov = (acc - self.highest_tracc ) * 100 / self.highest_tracc # define improvement of model progres

            if acc > self.highest_tracc: # training accuracy improved in the epoch
                self.highest_tracc = acc # set new highest training accuracy
                self.best_weights = self.model.get_weights() # training accuracy improved so save the weights
                self.count = 0 # set count to 0 since training accuracy improved
                self.stop_count = 0 # set stop counter to 0
                if v_loss < self.lowest_vloss:
                    self.lowest_vloss = v_loss
                self.best_epoch = epoch + 1  # set the value of best epoch for this epoch

            else:
                # training accuracy did not improve check if this has happened for patience number of epochs
                # if so adjust learning rate
                if self.count >= self.patience - 1: # lr should be adjusted
                    lr = lr * self.factor # adjust the learning by factor
                    tf.keras.backend.set_value(self.model.optimizer.lr, lr) # set the learning rate in the optimizer
                    self.count = 0 # reset the count to 0
                    self.stop_count = self.stop_count + 1 # count the number of consecutive lr adjustments
                    self.count = 0 # reset counter
                    if v_loss < self.lowest_vloss:
                        self.lowest_vloss = v_loss
                else:
                    self.count = self.count + 1 # increment patience counter

        else: # training accuracy is above threshold so adjust learning rate based on validation loss
            monitor = 'val_loss'
            if epoch == 0:
                pimprov = 0.0

            else:
                pimprov = (self.lowest_vloss - v_loss ) * 100 / self.lowest_vloss

            if v_loss < self.lowest_vloss: # check if the validation loss improved
                self.lowest_vloss = v_loss # replace lowest validation loss with new validation loss
                self.best_weights = self.model.get_weights() # validation loss improved so save the weights
                self.count = 0 # reset count since validation loss improved
                self.stop_count = 0
                self.best_epoch = epoch + 1 # set the value of the best epoch to this epoch

            else: # validation loss did not improve
                if self.count >= self.patience - 1: # need to adjust lr
                    lr = lr * self.factor # adjust the learning rate
                    self.stop_count = self.stop_count + 1 # increment stop counter because lr was adjusted
                    self.count = 0 # reset counter
                    tf.keras.backend.set_value(self.model.optimizer.lr, lr) # set the learning rate in the optimizer

                else:
                    self.count = self.count + 1 # increment the patience counter

                if acc > self.highest_tracc:
                    self.highest_tracc = acc

        msg = f'{str(epoch + 1):^3s}/{str(self.epochs):4s} {loss:^9.3f}{acc * 100:^9.3f}{v_loss:^9.5f}{v_acc * 100:^9.3f}{current_lr:^9.5f}{lr:^9.5f}{monitor:^11s}{pimprov:^10.2f}{duration:^8.2f}'
        print(msg)

        if self.stop_count > self.stop_patience - 1: # check if learning rate has been adjusted stop_count times with no improvement
            msg = f' training has been halted at epoch {epoch + 1} after {self.stop_patience} adjustments of learning rate with no improvement'
            print(msg)
            self.model.stop_training = True # stop training

        else:
            if self.ask_epoch != None and self.ask_permission != 0:
                if epoch + 1 >= self.ask_epoch:
                    msg = 'enter H to halt training or an integer for number of epochs to run then ask again'
                    print(msg)

                    ans = input('')
                    if ans == 'H' or ans == 'h':
                        msg = f'training has been halted at epoch {epoch + 1} due to user input'
                        print(msg)
                        self.model.stop_training = True # stop training

                    else:
                        try:
                            ans = int(ans)
                            self.ask_epoch += ans
                            msg = f' training will continue until epoch {str(self.ask_epoch)}'
                            print(msg)
                            msg = '{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:10s}{9:^8s}'.format('Epoch', 'Loss', 'Accuracy', 'V_loss', 'V_acc', 'LR', 'Next LR', 'Monitor', '% Improv', 'Duration')
                            print(msg)

                        except Exception:
                            print('Invalid')

In [64]:
class PrepareCallback:
    def __init__(self, config: CallbacksConfig):
        self.config = config

    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}",
        )
        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
    

    @property
    def _create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=str(self.config.checkpoint_model_filepath),
            save_best_only=True,
            monitor='val_accuracy',
            mode='max'
        )


    def get_tb_ckpt_callbacks(self):
        config = self.config
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

In [65]:
#     root_dir: Path
#     trained_model_path: Path # to store the trained model
#     base_model_path: Path # load the base model
#     data_dir: Path # load data and csv
#     csv_dir: Path
# img_size
# channels
# color
# batch_size
# data_gen_path # to save the data generators

In [66]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.model = None
        self.histroy = None
        self.train_gen = None
        self.valid_gen = None
        self.test_gen = None
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.base_model_path
        )
        return self.model

    def split_csv_data(self):
        df = pd.read_csv(self.config.csv_dir)
        # change column name
        df.columns = ["filepaths","labels"]
        df['filepaths'] = df['filepaths'].apply(lambda x : os.path.join(self.config.data_dir, x))
    
        # only extracting two types of images
        filter = (df['labels'] == 'Coccidiosis') | (df['labels'] == 'Healthy')
        df = df[filter]
        
        # Create train df
        strat = df['labels']
        train_df, dummy_df = train_test_split(df,  train_size= 0.8, shuffle= True, random_state= 123, stratify=strat)
    
        # valid and test dataframe
        strat = dummy_df['labels']
        valid_df, test_df = train_test_split(dummy_df,  train_size= 0.5, shuffle= True, random_state= 123, stratify= strat)
        
        self.save_train_valid_test_df(train_df, valid_df, test_df)
        
        return train_df, valid_df, test_df

    
    def save_train_valid_test_df(self, train_df, valid_df, test_df):
        
        # Save generators
        path = os.path.join(self.config.data_gen_path,'train_df.pkl')
        with open(path, 'wb') as f:
            dill.dump(train_df, f)

        path = os.path.join(self.config.data_gen_path,'valid_df.pkl')
        with open(path, 'wb') as f:
            dill.dump(valid_df, f)

        path = os.path.join(self.config.data_gen_path,'test_df.pkl')
        with open(path, 'wb') as f:
            dill.dump(test_df, f)
    
    
    def create_train_valid_test_generator(self, train_df, valid_df, test_df):
        '''
        This function takes train, validation, and test dataframe and fit them into image data generator, because model takes data from image data generator.
        Image data generator converts images into tensors. 
        '''

        # define model parameters
        batch_size = self.config.batch_size
        img_size = self.config.img_size
        channels = self.config.channels
        color = self.config.color
        img_shape = (img_size[0], img_size[1], channels)
    
        ts_length = len(test_df)
        test_batch_size = self.config.batch_size
        test_steps = ts_length // test_batch_size
        # This function which will be used in image data generator for data augmentation, it just take the image and return it again.
        def scalar(img):
            return img
    
        tr_gen = ImageDataGenerator(preprocessing_function= scalar, horizontal_flip= True, rescale = 1./255)
        ts_gen = ImageDataGenerator(preprocessing_function= scalar, rescale = 1./255)
    
        train_gen = tr_gen.flow_from_dataframe(train_df, x_col = 'filepaths', y_col = 'labels', target_size= img_size, class_mode = "binary",
                                              color_mode= color, shuffle= False, batch_size= batch_size)
        valid_gen = ts_gen.flow_from_dataframe(valid_df, x_col = 'filepaths', y_col = 'labels', target_size= img_size, class_mode = "binary",
                                              color_mode= color, shuffle= False, batch_size= batch_size)
         # Note: we will use custom test_batch_size, and make shuffle= false
        test_gen = ts_gen.flow_from_dataframe(test_df, x_col = 'filepaths', y_col = 'labels', target_size= img_size, class_mode = "binary",
                                             color_mode= color, shuffle= False, batch_size= test_batch_size)
        self.train_gen = train_gen
        self.valid_gen = valid_gen
        self.test_gen = test_gen
        
        return train_gen, valid_gen, test_gen
        

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)

    
    

    def train(self, callback_list: list):

        self.history = self.model.fit(x= self.train_gen, epochs= self.config.epochs, verbose= 0, callbacks= callback_list,
                        validation_data= self.valid_gen, validation_steps= None, shuffle= False)
        
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

In [67]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    model = training.get_base_model()
    train_df, valid_df, test_df = training.split_csv_data()
    train_gen, valid_gen, test_gen = training.create_train_valid_test_generator(train_df, valid_df, test_df)

    callbacks_config = config.get_callback_config()
    prepare_callbacks = PrepareCallback(config=callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    
    callback_list = [MyCallback(model, train_gen, callbacks_config)] + callback_list


    
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e

[2023-12-10 12:20:38,504: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-12-10 12:20:38,508: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-10 12:20:38,509: INFO: common: created directory at: artifacts]
[2023-12-10 12:20:38,510: INFO: common: created directory at: artifacts/training]
Found 3904 validated image filenames belonging to 2 classes.
Found 488 validated image filenames belonging to 2 classes.
Found 488 validated image filenames belonging to 2 classes.
[2023-12-10 12:20:38,775: INFO: common: created directory at: artifacts/callbacks/checkpoint_dir]
[2023-12-10 12:20:38,776: INFO: common: created directory at: artifacts/callbacks/tensorboard_log_dir]
Do you want model asks you to halt the training [y/n] ?
 Epoch     Loss   Accuracy  V_loss    V_acc     LR     Next LR  Monitor  % Improv  Duration
 1 /2      2.011   88.858   1.93559  65.574   0.00100  0.00100  accuracy     0.00    121.04 


  saving_api.save_model(


 2 /2      0.382   93.391   0.25612  93.033   0.00100  0.00100  val_loss    86.77    125.20 
training elapsed time was 0.0 hours,  4.0 minutes, 7.40 seconds)


In [37]:
import pickle

In [41]:
import dill