In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\anjik\\Desktop\\MLOPs_projects\\Chest_Disease_Image_Classification'

# imports

In [4]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
from cnnClassifier import logger

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

In [6]:
# access model trainer config
config = read_yaml(CONFIG_FILE_PATH)
print(config)
model_trainer_config= config.model_training
model_trainer_config

[2024-03-16 23:11:18,831: INFO: common: yaml file: config\config.yaml loaded successfully]
{'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=drive_link', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}, 'prepare_base_model': {'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.h5', 'updated_base_model_path': 'artifacts/prepare_base_model/base_model_updated.h5'}, 'model_training': {'root_dir': 'artifacts/model_training', 'trained_model_path': 'artifacts/model_training/model.h5'}}


ConfigBox({'root_dir': 'artifacts/model_training', 'trained_model_path': 'artifacts/model_training/model.h5'})

In [7]:
params = read_yaml(PARAMS_FILE_PATH)
params

[2024-03-16 23:11:18,849: INFO: common: yaml file: params.yaml loaded successfully]


ConfigBox({'AUGMENTATION': True, 'IMAGE_SIZE': [224, 224, 3], 'BATCH_SIZE': 16, 'INCLUDE_TOP': False, 'EPOCHS': 1, 'CLASSES': 2, 'WEIGHTS': 'imagenet', 'LEARNING_RATE': 0.01})

In [8]:
# write configuration manager
class ConfigurationManager:
    """
    ConfigurationManager class captures & returns configuration for components implementation
        
    """
    def __init__(self,
        # params: config.yaml, params.yaml file paths          
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        
        # read config, params yaml file
        self.config = read_yaml(config_filepath) # returns ConfigBox to access data easily
        self.params = read_yaml(params_filepath)
        
        # create artifact directory
        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config (self) -> ModelTrainerConfig:
        """
        Method: get_model_trainer_config
        Params:
        Returns: configuration for Model Trainer component i.e ModelTrainerConfig 
        """
        logger.info("Entering get_model_trainer_config method of ConfigurationManager")
        model_trainer_config = self.config.model_training # model_training key from config.yaml
        prepare_base_model_config = self.config.prepare_base_model # prepare_base_model key from config.yaml
        params_config = self.params # params.yaml              
        training_data= os.path.join(self.config.data_ingestion.unzip_dir, "Chest-CT-Scan-data") # training data from data ingestion artifact
        
        create_directories([
            Path(model_trainer_config.root_dir)
                 ]) # creates artifacts/model_training directory
        
        # returning from entity: ModelTrainerConfig dataclass
        model_trainer_config= ModelTrainerConfig(
            root_dir = Path(model_trainer_config.root_dir),
            trained_model_path= Path(model_trainer_config.trained_model_path),
            updated_base_model_path= Path(prepare_base_model_config.updated_base_model_path),
            training_data= Path(training_data),
            params_epochs= params_config.EPOCHS,
            params_batch_size= params_config.BATCH_SIZE,
            params_is_augmentation= params_config.AUGMENTATION,
            params_image_size= params_config.IMAGE_SIZE
            
        )
        logger.info("Then, exiting get_model_trainer_config method of ConfigurationManager")
        return model_trainer_config

In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [10]:
class ModelTraining:
    """
     This class trains the data with updated Vgg16 model
    """
    def __init__(self, model_training_config:ModelTrainerConfig):
        self.model_training_config= model_training_config 
        
    
    def get_base_model(self):
        """
        Method Name : get_base_model
        Description : loads updated VGG16 model from artifacts/prepare_base_model/base_model_updated.h5
        Output      : 
        On Failure  :   Write an exception log and then raise an exception
        """
        logger.info("Getting base model")
        try:
            self.model= tf.keras.models.load_model(self.model_training_config.updated_base_model_path) # artifacts/prepare_base_model/base_model_updated.h5
            
        except Exception as e:
            raise e
        
    def train_valid_generator(self):
        """
        Method Name : train_valid_generator
        Description : performs train test split of the data
        Output      : 
        On Failure  :   Write an exception log and then raise an exception
        
        """
        logger.info("Data split: Training and validation: started!")
        data_generator_kwargs= dict(rescale= 1./255,
                                   validation_split= 0.20)
        
        dataflow_kwargs= dict(target_size= self.model_training_config.params_image_size[:-1],
                              batch_size= self.model_training_config.params_batch_size,
                              interpolation= "bilinear")
        
        valid_data_generator= tf.keras.preprocessing.image.ImageDataGenerator(**data_generator_kwargs)
        self.valid_generator= valid_data_generator.flow_from_directory(directory= self.model_training_config.training_data,
                                                                       subset= "validation",
                                                                       shuffle= False,
                                                                       **dataflow_kwargs)
        
        if self.model_training_config.params_is_augmentation:
            train_data_generator= tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=40,
                                                                                  horizontal_flip=True,
                                                                                  width_shift_range=0.2,
                                                                                  height_shift_range=0.2,
                                                                                  shear_range=0.2,
                                                                                  zoom_range=0.2,
                                                                                  **data_generator_kwargs)
        else:
            train_data_generator= valid_data_generator
            
        self.train_generator= train_data_generator.flow_from_directory(directory= self.model_training_config.training_data,
                                                                       subset= "training",
                                                                       shuffle= True,
                                                                       **dataflow_kwargs)
        logger.info("Data split: Training and validation: completed!")
        
        
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)
        
    
    def train(self):
        self.steps_per_epoch= self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps= self.valid_generator.samples // self.valid_generator.batch_size
        
        logger.info("Model is fit")
        self.model.fit(self.train_generator,
                       epochs=self.model_training_config.params_epochs,
                       steps_per_epoch=self.steps_per_epoch,
                       validation_steps=self.validation_steps,
                       validation_data=self.valid_generator
                       )
        
        logger.info("Trained model is saved")
        self.save_model(path=self.model_training_config.trained_model_path,
                        model= self.model)

# Training pipeline for Model Trainer component

In [12]:
try:
    logger.info("ModelTrainer component started")
    logger.info("Loading of ModelTrainer component configuration started")
    config = ConfigurationManager() # create object for ConfigurationManager class
    model_training_config= config.get_model_trainer_config() # obj.method() returns ModelTrainerConfig
    logger.info("All configuration directories, files needed for ModelTrainer component are ready")
    
    logger.info("ModelTrainer steps started")
    model_trainer = ModelTraining(model_training_config=model_training_config) # create object for ModelTraining class
    model_trainer.get_base_model() # obj.method()    
    model_trainer.train_valid_generator()
    model_trainer.train()
    logger.info("OK! ModelTrainer component completed")
except Exception as e:
    raise e

[2024-03-16 23:11:22,854: INFO: 2503225683: ModelTrainer component started]
[2024-03-16 23:11:22,855: INFO: 2503225683: Loading of ModelTrainer component configuration started]
[2024-03-16 23:11:22,858: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-03-16 23:11:22,862: INFO: common: yaml file: params.yaml loaded successfully]
[2024-03-16 23:11:22,866: INFO: common: created directory at: artifacts]
[2024-03-16 23:11:22,870: INFO: 323759356: Entering get_model_trainer_config method of ConfigurationManager]
[2024-03-16 23:11:22,872: INFO: common: created directory at: artifacts\model_training]
[2024-03-16 23:11:22,901: INFO: 323759356: Then, exiting get_model_trainer_config method of ConfigurationManager]
[2024-03-16 23:11:22,903: INFO: 2503225683: All configuration directories, files needed for ModelTrainer component are ready]
[2024-03-16 23:11:22,904: INFO: 2503225683: ModelTrainer steps started]
[2024-03-16 23:11:22,906: INFO: 1851567132: Getting base model]
[2