In [1]:
!pwd

/f/python_venv/End-to-End-Cancer-Classification-using-MLFlow-DVC/research


In [2]:
import os
os.chdir("../")
!pwd

/f/python_venv/End-to-End-Cancer-Classification-using-MLFlow-DVC


In [3]:
# entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainerConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data_path: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list


In [4]:
# configuration manager

from CNN_Classifier.constants import *
from CNN_Classifier.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> TrainerConfig:
        training_config = self.config.training
        prepare_base_model_config = self.config.prepare_base_model
        params = self.params
        training_data_path = os.path.join(self.config.data_ingestion.unzip_dir, 'Chest-CT-Scan-data')

        create_directories([training_config.root_dir])

        model_trainer_config = TrainerConfig(
                root_dir=Path(training_config.root_dir),
                trained_model_path=Path(training_config.trained_model_path),
                updated_base_model_path=Path(prepare_base_model_config.updated_base_model_path),
                training_data_path=Path(training_data_path),
                params_epochs=params.EPOCHS,
                params_batch_size=params.BATCH_SIZE,
                params_is_augmentation=params.AUGMENTATION,
                params_image_size=params.IMAGE_SIZE,
        )

        return model_trainer_config



In [5]:
# component

import os
import tensorflow as tf
import time

class ModelTrainer:
    def __init__(self, config: TrainerConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):
        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size = self.config.params_image_size[:-1],
            batch_size = self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data_path,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data_path,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )
    
    @staticmethod
    def save_model(path: Path, model: tf.keras.models):
        model.save(path)

    def train(self):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )



In [6]:
# pipeline

try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.get_base_model()
    model_trainer.train_valid_generator()
    model_trainer.train()

except Exception as e:
    raise e

[2025-08-04 20:24:06,976] [INFO] [common.read_yaml:33] [yaml file: config\config.yaml loaded successfully]
[2025-08-04 20:24:06,979] [INFO] [common.read_yaml:33] [yaml file: params.yaml loaded successfully]
[2025-08-04 20:24:06,980] [INFO] [common.create_directories:52] [Created directory at: artifacts]
[2025-08-04 20:24:06,984] [INFO] [common.create_directories:52] [Created directory at: artifacts/training]
Found 68 images belonging to 2 classes.
Found 275 images belonging to 2 classes.
