
-------

# **`Preparing Data for Base Model`**

------



In [1]:
# check current directory
!pwd
!

/e/Practice python/Chest Cancer Classification Using MLflow and DVC/research


In [2]:
# change directory
import os
os.chdir("../")

In [3]:
# check current directory
!pwd

/e/Practice python/Chest Cancer Classification Using MLflow and DVC


### **Set up Model Configrations**

In [4]:
from dataclasses import dataclass  # Import the dataclass decorator from the dataclasses module
from pathlib import Path  # Import the Path class from the pathlib module for file system paths

@dataclass(frozen=True)  # Define a frozen dataclass, which makes instances immutable
class PrepareBaseModelConfig:
    root_dir: Path  # The root directory for the model configuration
    base_model_path: Path  # The path to the base model file
    updated_base_model_path: Path  # The path to the updated base model file
    params_image_size: list  # A list containing image size parameters (e.g., [width, height])
    params_learning_rate: float  # The learning rate parameter for training
    params_include_top: bool  # Boolean indicating whether to include the top layer of the model
    params_weights: str  # A string indicating the type of weights to use (e.g., 'imagenet')
    params_classes: int  # The number of classes for classification tasks

In [6]:
from src.chest_cancer_classifier import *
from src.chest_cancer_classifier.constants import *
from src.chest_cancer_classifier.utils.common_functions import read_yaml, create_directories

In [10]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,  # Default path for the configuration file
        params_filepath=PARAMS_FILE_PATH):  # Default path for the parameters file

        # Read the YAML configuration files and store their contents
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        # Create necessary directories as specified in the configuration
        create_directories([self.config.artifacts_root])

    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        # Retrieve the configuration for preparing the base model
        config = self.config.prepare_base_model
        
        # Create the root directory specified in the base model configuration
        create_directories([config.root_dir])

        # Create an instance of PrepareBaseModelConfig with the relevant parameters
        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),  # Convert root directory to Path object
            base_model_path=Path(config.base_model_path),  # Convert base model path to Path object
            updated_base_model_path=Path(config.updated_base_model_path),  # Convert updated model path to Path object
            params_image_size=self.params.IMAGE_SIZE,  # Get image size parameter from params
            params_learning_rate=self.params.LEARNING_RATE,  # Get learning rate from params
            params_include_top=self.params.INCLUDE_TOP,  # Get include_top flag from params
            params_weights=self.params.WEIGHTS,  # Get weights parameter from params
            params_classes=self.params.CLASSES  # Get number of classes from params
        )

        # Return the configuration object for preparing the base model
        return prepare_base_model_config

### **Set Up Model Components**

In [11]:
import os
import tensorflow as tf
from zipfile import ZipFile
import urllib.request as request

In [12]:
class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        # Initialize the class with a configuration object for preparing the base model
        self.config = config

    
    def get_base_model(self):
        # Create the base model using the VGG16 architecture from Keras
        self.model = tf.keras.applications.vgg16.VGG16(
            input_shape=self.config.params_image_size,  # Set the input shape based on the config
            weights=self.config.params_weights,  # Load weights specified in the config
            include_top=self.config.params_include_top  # Include the top layer or not, based on config
        )

        # Save the created base model to the specified path
        self.save_model(path=self.config.base_model_path, model=self.model)

    
    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
        # Prepare a full model by adding a classification layer on top of the base model
        
        # Freeze all layers if freeze_all is True
        if freeze_all:
            for layer in model.layers:
                layer.trainable = False
        # Freeze layers until the specified layer index if freeze_till is provided
        elif (freeze_till is not None) and (freeze_till > 0):
            for layer in model.layers[:-freeze_till]:
                layer.trainable = False

        # Flatten the output from the base model
        flatten_in = tf.keras.layers.Flatten()(model.output)
        # Add a dense layer for predictions with softmax activation
        prediction = tf.keras.layers.Dense(
            units=classes,  # Number of classes for the output
            activation="softmax"  # Softmax activation for multi-class classification
        )(flatten_in)

        # Create the full model with the specified inputs and outputs
        full_model = tf.keras.models.Model(
            inputs=model.input,
            outputs=prediction
        )

        # Compile the full model with the specified optimizer and loss function
        full_model.compile(
            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),  # Stochastic Gradient Descent optimizer
            loss=tf.keras.losses.CategoricalCrossentropy(),  # Loss function for multi-class classification
            metrics=["accuracy"]  # Track accuracy during training
        )

        # Print the model summary to the console
        full_model.summary()
        return full_model
    

    def update_base_model(self):
        # Update the base model to a full model with additional layers and configurations
        self.full_model = self._prepare_full_model(
            model=self.model,  # Pass the base model
            classes=self.config.params_classes,  # Number of classes from the config
            freeze_all=True,  # Freeze all layers during training
            freeze_till=None,  # No layers to unfreeze
            learning_rate=self.config.params_learning_rate  # Learning rate from the config
        )

        # Save the updated full model to the specified path
        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
    


    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        # Save the Keras model to the specified file path
        model.save(path)

In [13]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2024-11-10 23:18:29,463: INFO: common_functions: YAML file 'config\config.yaml' loaded successfully]
[2024-11-10 23:18:29,476: INFO: common_functions: YAML file 'params.yaml' loaded successfully]
[2024-11-10 23:18:29,479: INFO: common_functions: Directory created at: artifacts]
[2024-11-10 23:18:29,481: INFO: common_functions: Directory created at: artifacts/prepare_base_model]
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1us/step




-------