In [1]:
import os

os.chdir("../")

In [2]:
from pydantic import BaseModel , FilePath , FileUrl , DirectoryPath , AnyUrl
from pydantic.dataclasses import dataclass
from pathlib import Path

class TrainingConfig(BaseModel):
    root_dir: DirectoryPath
    trained_model_path: Path
    updated_base_model_path: FilePath
    training_data: DirectoryPath
    validation_data : DirectoryPath
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

class PrepareCallbacksConfig (BaseModel):
    root_dir: DirectoryPath
    tensorboard_root_log_dir: DirectoryPath
    checkpoint_model_filepath: Path
    early_stopping_patience : int
    early_stopping__monitor : str

In [3]:
from BirdClassifier.constants import *
from BirdClassifier.utils import create_directories, read_yaml

In [4]:
class ConfigurationManager:
    def __init__(
        self, config_file_path=CONFIG_FILE_PATH, param_file_path=PARAMS_FILE_PATH
    ) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(param_file_path)
        create_directories([self.config.artifacts_root])

    def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_checkpoint_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([config.tensorboard_root_log_dir, model_checkpoint_dir])
        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath),
            early_stopping_patience= self.params.EARLY_STOPPING_PATIENCE,
            early_stopping__monitor = self.params.EARLY_STOPPING_MONITOR,
        )
        return prepare_callback_config
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=training.trained_model_path,
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=training.training_data_dir,
            validation_data= training.validation_data_dir,
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )
        return training_config  
    

In [5]:
config = ConfigurationManager()
path = config.get_training_config()

2022-09-30 02:08:13.241 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: configs/config.yaml loaded successfully
2022-09-30 02:08:13.243 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: params.yaml loaded successfully
2022-09-30 02:08:13.245 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts
2022-09-30 02:08:13.245 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/training


In [6]:
path

TrainingConfig(root_dir=PosixPath('artifacts/training'), trained_model_path=PosixPath('artifacts/training/model.h5'), updated_base_model_path=PosixPath('artifacts/prepare_base_model/base_model_updated.h5'), training_data=PosixPath('artifacts/data_ingestion/train'), validation_data=PosixPath('artifacts/data_ingestion/valid'), params_epochs=1, params_batch_size=32, params_is_augmentation=True, params_image_size=BoxList([224, 224, 3]))

In [7]:
import tensorflow as tf 

2022-09-30 02:08:15.584380: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-30 02:08:15.762165: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-09-30 02:08:16.282452: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-09-30 02:08:16.282493: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such

In [8]:
import time


class PrepareCallbacks:
    def __init__(self, config: PrepareCallbacksConfig):
        self.config = config

    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir, f"tb_log_at_{timestamp}"
        )
        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)

    @property
    def _create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=self.config.checkpoint_model_filepath , 
            save_best_only=True
        )
    @property
    def _create_early_stopping_callbacks(self):
        return tf.keras.callbacks.EarlyStopping(
            monitor=self.config.early_stopping__monitor,
            patience=self.config.early_stopping_patience,
            restore_best_weights=True
        )

    def get_tb_ckpt_callbacks(self):
        return [self._create_tb_callbacks, self._create_ckpt_callbacks , self._create_early_stopping_callbacks]

In [9]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config

    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size, class_mode = "categorical"
        )

        valid_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale = 1./255
        )

        self.valid_generator = valid_data_generator.flow_from_directory(
            directory=self.config.validation_data,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                rescale = 1./255,
            )
        else:
            train_data_generator = valid_data_generator

        self.train_generator = train_data_generator.flow_from_directory(
            directory=self.config.training_data,
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)


    def train(self, callback_list: list):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator,
            callbacks=callback_list
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model)

In [11]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callbacks_config()
    prepare_callbacks = PrepareCallbacks(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e

2022-09-30 02:08:33.165 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: configs/config.yaml loaded successfully
2022-09-30 02:08:33.167 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: params.yaml loaded successfully
2022-09-30 02:08:33.168 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts
2022-09-30 02:08:33.168 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/prepare_callbacks/tensorboard_log_dir
2022-09-30 02:08:33.168 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/prepare_callbacks/checkpoint_dir
2022-09-30 02:08:33.169 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/training


Found 2000 images belonging to 400 classes.
Found 58388 images belonging to 400 classes.


2022-09-30 02:08:35.838961: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8204
2022-09-30 02:08:36.600789: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-09-30 02:08:36.600818: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-09-30 02:08:36.637812: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 939.37MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gai

   2/1824 [..............................] - ETA: 4:25 - loss: 6.5077 - accuracy: 0.0000e+00   

2022-09-30 02:08:41.810870: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




In [19]:
training.train_valid_generator()

Found 0 images belonging to 400 classes.
Found 58388 images belonging to 400 classes.
