In [1]:
import os
os.getcwd()
os.chdir("../")
%pwd

'g:\\success_analytics_courses\\internship_project\\pulsar_project'

In [2]:
## step - 1 : config.yaml completed
## step - 2 : params.yaml completed(required in model trainer stage)
## step - 3 : constant completed
## step - 4 : entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfiguration:

    root_dir_name: Path
    dataset_download_url: str
    zip_data_dir_name: Path
    unzip_data_dir_name: Path
    
@dataclass(frozen=True)
class DataValidationConfiguration:

    validated_root_dir_name: Path
    validated_train_dir: Path
    validated_test_dir: Path
    validated_status_report_file_name: str
    validated_required_files:list

@dataclass(frozen=True)
class DataTransformationConfiguration:

    transformed_root_dir_name: Path
    transformed_train_dir: Path
    transformed_test_dir: Path
    transformed_industrial_data_dir: Path
    transformed_preprocess_dir: Path

@dataclass(frozen=True)
class ModelTrainerConfiguration:

    trained_model_root_dir_name: Path
    trained_model_path_yaml_file: str
    trained_model_base_accuracy: float
    trained_model_overfit_value: float
    trained_model_FPR: float
    trained_model_RECALL: float
    trained_model_selection:str

@dataclass(frozen=True)
class ModelEvaluationConfiguration:

    evaluated_model_root_dir_name: Path
    evaluated_model_result_file_name: str
    evaluated_model_result_file_column_name: list

@dataclass(frozen=True)
class ModelPusherConfiguration:

    pushed_model_root_dir_name:Path


In [3]:
## step - 5 : configuration manager in src config

import sys
from pulsarclassification.constants import *
from pulsarclassification.logging import logging
from pulsarclassification.exception import PulsarException
from pulsarclassification.utils.common import read_yaml,create_directories

class ConfigurationManager:

    def __init__(self, config_file_path: str = CONFIG_FILE_PATH):
        
        try:
            self.config = read_yaml(CONFIG_FILE_PATH)
            create_directories(self.config.artifacts_dir_name)
            logging.info(f" Artifacts directory created at : {self.config.artifacts_dir_name} ")
        except Exception as e:
            raise PulsarException(e,sys)
        
    def get_data_ingestion_config(self) -> DataIngestionConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_ingestion_config

            data_ingestion_dir = os.path.join(artifact_dir,config.root_dir_name)
            create_directories(data_ingestion_dir)

            raw_data_dir = os.path.join(data_ingestion_dir,config.zip_data_dir_name)
            create_directories(raw_data_dir)

            ingested_csv_data_dir = os.path.join(data_ingestion_dir,config.unzip_data_dir_name)
            create_directories(ingested_csv_data_dir)

            data_ingestion_config = DataIngestionConfiguration(
                root_dir_name  = config.root_dir_name,
                dataset_download_url = config.dataset_download_url,
                zip_data_dir_name = raw_data_dir,
                unzip_data_dir_name = ingested_csv_data_dir
            )

            logging.info(f" Data ingestion configuration: {data_ingestion_config}")

            return data_ingestion_config
    
        except Exception as e:
            raise PulsarException(e,sys)
        
    def get_data_validation_configuration(self) -> DataValidationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_validation_config

            data_validation_dir = os.path.join(artifact_dir,config.validated_root_dir_name)
            create_directories(data_validation_dir)

            data_validation_train_dir = os.path.join(data_validation_dir,config.validated_train_dir)
            create_directories(data_validation_train_dir)

            data_validation_test_dir = os.path.join(data_validation_dir,config.validated_test_dir)
            create_directories(data_validation_test_dir)

            data_validation_config = DataValidationConfiguration(
                validated_root_dir_name  = config.validated_root_dir_name,
                validated_train_dir = data_validation_train_dir,
                validated_test_dir = data_validation_test_dir,
                validated_status_report_file_name = os.path.join(data_validation_dir,config.validated_status_report_file_name),
                validated_required_files = config.validated_required_files
            )

            logging.info(f" Data validation configuration: {data_validation_config}")

            return data_validation_config
        
        except Exception as e:
            raise PulsarException(e,sys)
        
    def get_data_transformation_configuration(self) -> DataTransformationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_transformation_config

            data_transformation_dir = os.path.join(artifact_dir,config.transformed_root_dir_name)
            create_directories(data_transformation_dir)

            data_transformation_train_dir = os.path.join(data_transformation_dir,config.transformed_train_dir)
            create_directories(data_transformation_train_dir)

            data_transformation_test_dir = os.path.join(data_transformation_dir,config.transformed_test_dir)
            create_directories(data_transformation_test_dir)

            data_transformation_industrial_data_dir = os.path.join(data_transformation_dir,config.transformed_industrial_data_dir)
            create_directories(data_transformation_industrial_data_dir)

            data_transformation_preprocess_data_dir = os.path.join(data_transformation_dir,config.transformed_preprocess_dir)
            create_directories(data_transformation_preprocess_data_dir)


            data_transformation_config = DataTransformationConfiguration(
                transformed_root_dir_name = data_transformation_dir,
                transformed_train_dir = data_transformation_train_dir,
                transformed_test_dir =  data_transformation_test_dir,
                transformed_industrial_data_dir =  data_transformation_industrial_data_dir,
                transformed_preprocess_dir = data_transformation_preprocess_data_dir
            )

            logging.info(f" Data transformation configuration: {data_transformation_config}")

            return data_transformation_config
        
        except Exception as e:
            raise PulsarException(e,sys)
    
    def get_model_trainer_configuration(self) -> ModelTrainerConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.model_trainer_config
            param_config = read_yaml(MODEL_PARAMETER_FILE_PATH)

            model_trainer_dir = os.path.join(artifact_dir,config.trained_model_root_dir_name)
            create_directories(model_trainer_dir)

            model_trainer_yaml_file = os.path.join(model_trainer_dir,config[MODEL_TRAINER_YAML_FILE_NAME_KEY])

            model_trainer_config = ModelTrainerConfiguration(
                trained_model_root_dir_name = model_trainer_dir,
                trained_model_path_yaml_file = model_trainer_yaml_file,
                trained_model_base_accuracy = config.trained_model_base_accuracy,
                trained_model_overfit_value = config.trained_model_overfit_value,
                trained_model_FPR           = config.trained_model_FPR,
                trained_model_RECALL        = config.trained_model_RECALL,
                trained_model_selection     = param_config[MODEL_SELECTION_KEY]
            )

            logging.info(f" Model trainer configuration: {model_trainer_config}")

            return model_trainer_config
        
        except Exception as e:
            raise PulsarException(e,sys)
        
    def get_model_evaluation_configuration(self) -> ModelEvaluationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.model_evaluation_config

            model_evaluation_dir = os.path.join(artifact_dir,config.evaluated_model_root_dir_name)
            create_directories(model_evaluation_dir)

            model_evaluated_csv_file = os.path.join(model_evaluation_dir,config[MODEL_EVALUATION_RESULT_FILE_NAME_KEY])

            model_evaluation_config = ModelEvaluationConfiguration(
                evaluated_model_root_dir_name = model_evaluation_dir,
                evaluated_model_result_file_name = model_evaluated_csv_file,
                evaluated_model_result_file_column_name = config.evaluated_model_result_file_column_name
            )

            logging.info(f" Model evaluation configuration: {model_evaluation_config}")

            return model_evaluation_config
        
        except Exception as e:
            raise PulsarException(e,sys)
        
    def get_model_pusher_configuration(self) -> ModelPusherConfiguration:

        try:
            config = self.config.model_pusher_config

            model_pusher_dir = os.path.join(ROOT_DIR,config.pushed_model_root_dir_name)
            create_directories(model_pusher_dir)
            model_pusher_config = ModelPusherConfiguration(
                pushed_model_root_dir_name = model_pusher_dir
            )

            logging.info(f" Model pusher configuration: {model_pusher_config}")

            return model_pusher_config
        
        except Exception as e:
            raise PulsarException(e,sys)

In [4]:
#stage - 6 : updating components

import os
import importlib
import pandas as pd
from pathlib import Path
from pulsarclassification.logging import logging
from pulsarclassification.constants import *
from pulsarclassification.utils.common import pickle_file_saving
from pulsarclassification.entity import ModelEvaluationConfiguration

class ModelPusher:
    def __init__(self, 
                 modelevaluation_config: ModelEvaluationConfiguration,
                 modelpusher_config: ModelPusherConfiguration):

        try:
            self.modelevaluation_config = modelevaluation_config
            self.modelpusher_config = modelpusher_config
        except Exception as e:
            raise PulsarException(e,sys) 
        
    def get_model_pusher(self):
        try:
           result_file = pd.read_csv(self.modelevaluation_config.evaluated_model_result_file_name)
           result_status_file = result_file[result_file[PUSHED_MODEL_STATUS_FEATURE_NAME] == 1] ## 1 represent it passed all the status measures
           max_value_of_required_metric = result_status_file[PUSHED_MODEL_METRIC_EVALUATION_FEATURE_NAME].max()
           required_model_path = result_status_file[result_status_file[PUSHED_MODEL_METRIC_EVALUATION_FEATURE_NAME]== max_value_of_required_metric][PUSHED_MODEL_FILE_PATH_FEATURE_NAME].values[0]
           logging.info(f" {required_model_path} has best test accuracy amomg all the trained models i.e {max_value_of_required_metric}")
           final_model = pd.read_pickle(required_model_path)
           pickle_file_saving(final_model,self.modelpusher_config.pushed_model_root_dir_name,PUSHED_MODEL_FILE_NAME_KEY)
           logging.info(f"Final model saved in : {self.modelpusher_config.pushed_model_root_dir_name}")
        except Exception as e:
            raise PulsarException(e,sys)

In [5]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_configuration()
    model_pusher_config = config.get_model_pusher_configuration()
    model_pusher = ModelPusher(
        modelevaluation_config=model_evaluation_config,
        modelpusher_config=model_pusher_config)
    model_pusher.get_model_pusher()
except Exception as e:
    raise PulsarException(e,sys)

[03-09-2023 21:25:03: INFO: common:  yaml file from this path g:\success_analytics_courses\internship_project\pulsar_project\config\config.yaml read succesfully]
[03-09-2023 21:25:03: INFO: common:  Directory already present: artifacts ]
[03-09-2023 21:25:03: INFO: 3972388464:  Artifacts directory created at : artifacts ]
[03-09-2023 21:25:03: INFO: common:  Directory already present: artifacts\evaluated_model_status ]
[03-09-2023 21:25:03: INFO: 3972388464:  Model evaluation configuration: ModelEvaluationConfiguration(evaluated_model_root_dir_name='artifacts\\evaluated_model_status', evaluated_model_result_file_name='artifacts\\evaluated_model_status\\evaluated_model_result.csv', evaluated_model_result_file_column_name=BoxList(['model_path', 'train_accuracy', 'test_accuracy', 'train_fpr', 'test_fpr', 'train_recall', 'test_recall', 'model_evaluation_status']))]
[03-09-2023 21:25:03: INFO: common:  Directory already present: g:\success_analytics_courses\internship_project\pulsar_project

[03-09-2023 21:25:06: INFO: 3281502157: Final model saved in : g:\success_analytics_courses\internship_project\pulsar_project\ModelRegistry]
