In [1]:
import os
os.getcwd()
%pwd
os.chdir("../")
%pwd

'g:\\success_analytics_courses\\internship_project\\pulsar_project'

In [2]:
## step - 1 : config.yaml completed
## step - 2 : params.yaml completed(required in model trainer stage)
## step - 3 : constant completed
## step - 4 : entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfiguration:

    root_dir_name: Path
    dataset_download_url: str
    zip_data_dir_name: Path
    unzip_data_dir_name: Path
    
@dataclass(frozen=True)
class DataValidationConfiguration:

    validated_root_dir_name: Path
    validated_train_dir: Path
    validated_test_dir: Path
    validated_status_report_file_name: str
    validated_required_files:list

@dataclass(frozen=True)
class DataTransformationConfiguration:

    transformed_root_dir_name: Path
    transformed_train_dir: Path
    transformed_test_dir: Path
    transformed_industrial_data_dir: Path
    transformed_preprocess_dir: Path

@dataclass(frozen=True)
class ModelTrainerConfiguration:

    trained_model_root_dir_name: Path
    trained_model_path_yaml_file: str
    trained_model_base_accuracy: float
    trained_model_overfit_value: float
    trained_model_FPR: float
    trained_model_RECALL: float
    trained_model_selection:str

@dataclass(frozen=True)
class ModelEvaluationConfiguration:

    evaluated_model_root_dir_name: Path
    evaluated_model_result_file_name: str
    evaluated_model_result_file_column_name: list


In [3]:
## step - 5 : configuration manager in src config

from pulsarclassification.constants import *
from pulsarclassification.logging import logging
from pulsarclassification.utils.common import read_yaml,create_directories

class ConfigurationManager:

    def __init__(self, config_file_path: str = CONFIG_FILE_PATH):
        
        try:
            self.config = read_yaml(CONFIG_FILE_PATH)
            create_directories(self.config.artifacts_dir_name)
            logging.info(f" Artifacts directory created at : {self.config.artifacts_dir_name} ")
        except Exception as e:
            raise e
        
    def get_data_ingestion_config(self) -> DataIngestionConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_ingestion_config

            data_ingestion_dir = os.path.join(artifact_dir,config.root_dir_name)
            create_directories(data_ingestion_dir)

            raw_data_dir = os.path.join(data_ingestion_dir,config.zip_data_dir_name)
            create_directories(raw_data_dir)

            ingested_csv_data_dir = os.path.join(data_ingestion_dir,config.unzip_data_dir_name)
            create_directories(ingested_csv_data_dir)

            data_ingestion_config = DataIngestionConfiguration(
                root_dir_name  = config.root_dir_name,
                dataset_download_url = config.dataset_download_url,
                zip_data_dir_name = raw_data_dir,
                unzip_data_dir_name = ingested_csv_data_dir
            )

            logging.info(f" Data ingestion configuration: {data_ingestion_config}")

            return data_ingestion_config
    
        except Exception as e:
            raise e
        
    def get_data_validation_configuration(self) -> DataValidationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_validation_config

            data_validation_dir = os.path.join(artifact_dir,config.validated_root_dir_name)
            create_directories(data_validation_dir)

            data_validation_train_dir = os.path.join(data_validation_dir,config.validated_train_dir)
            create_directories(data_validation_train_dir)

            data_validation_test_dir = os.path.join(data_validation_dir,config.validated_test_dir)
            create_directories(data_validation_test_dir)

            data_validation_config = DataValidationConfiguration(
                validated_root_dir_name  = config.validated_root_dir_name,
                validated_train_dir = data_validation_train_dir,
                validated_test_dir = data_validation_test_dir,
                validated_status_report_file_name = os.path.join(data_validation_dir,config.validated_status_report_file_name),
                validated_required_files = config.validated_required_files
            )

            logging.info(f" Data validation configuration: {data_validation_config}")

            return data_validation_config
        
        except Exception as e:
            raise e
        
    def get_data_transformation_configuration(self) -> DataTransformationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.data_transformation_config

            data_transformation_dir = os.path.join(artifact_dir,config.transformed_root_dir_name)
            create_directories(data_transformation_dir)

            data_transformation_train_dir = os.path.join(data_transformation_dir,config.transformed_train_dir)
            create_directories(data_transformation_train_dir)

            data_transformation_test_dir = os.path.join(data_transformation_dir,config.transformed_test_dir)
            create_directories(data_transformation_test_dir)

            data_transformation_industrial_data_dir = os.path.join(data_transformation_dir,config.transformed_industrial_data_dir)
            create_directories(data_transformation_industrial_data_dir)

            data_transformation_preprocess_data_dir = os.path.join(data_transformation_dir,config.transformed_preprocess_dir)
            create_directories(data_transformation_preprocess_data_dir)


            data_transformation_config = DataTransformationConfiguration(
                transformed_root_dir_name = data_transformation_dir,
                transformed_train_dir = data_transformation_train_dir,
                transformed_test_dir =  data_transformation_test_dir,
                transformed_industrial_data_dir =  data_transformation_industrial_data_dir,
                transformed_preprocess_dir = data_transformation_preprocess_data_dir
            )

            logging.info(f" Data transformation configuration: {data_transformation_config}")

            return data_transformation_config
        
        except Exception as e:
            raise e
    
    def get_model_trainer_configuration(self) -> ModelTrainerConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.model_trainer_config
            param_config = read_yaml(MODEL_PARAMETER_FILE_PATH)

            model_trainer_dir = os.path.join(artifact_dir,config.trained_model_root_dir_name)
            create_directories(model_trainer_dir)

            model_trainer_yaml_file = os.path.join(model_trainer_dir,config[MODEL_TRAINER_YAML_FILE_NAME_KEY])

            model_trainer_config = ModelTrainerConfiguration(
                trained_model_root_dir_name = model_trainer_dir,
                trained_model_path_yaml_file = model_trainer_yaml_file,
                trained_model_base_accuracy = config.trained_model_base_accuracy,
                trained_model_overfit_value = config.trained_model_overfit_value,
                trained_model_FPR           = config.trained_model_FPR,
                trained_model_RECALL        = config.trained_model_RECALL,
                trained_model_selection     = param_config[MODEL_SELECTION_KEY]
            )

            logging.info(f" Model trainer configuration: {model_trainer_config}")

            return model_trainer_config
        
        except Exception as e:
            raise e
        
    def get_model_evaluation_configuration(self) -> ModelEvaluationConfiguration:

        try:
            artifact_dir = self.config.artifacts_dir_name
            config = self.config.model_evaluation_config

            model_evaluation_dir = os.path.join(artifact_dir,config.evaluated_model_root_dir_name)
            create_directories(model_evaluation_dir)

            model_evaluated_csv_file = os.path.join(model_evaluation_dir,config[MODEL_EVALUATION_RESULT_FILE_NAME_KEY])

            model_evaluation_config = ModelEvaluationConfiguration(
                evaluated_model_root_dir_name = model_evaluation_dir,
                evaluated_model_result_file_name = model_evaluated_csv_file,
                evaluated_model_result_file_column_name = config.evaluated_model_result_file_column_name
            )

            logging.info(f" Model evaluation configuration: {model_evaluation_config}")

            return model_evaluation_config
        
        except Exception as e:
            raise e

In [24]:
#stage - 6 : updating components

import os
import importlib
import pandas as pd
from pathlib import Path
from sklearn.model_selection import StratifiedShuffleSplit
from pulsarclassification.logging import logging
from pulsarclassification.constants import *
from pulsarclassification.utils.common import read_yaml
from pulsarclassification.entity import DataTransformationConfiguration,ModelTrainerConfiguration
from sklearn import metrics
from sklearn.metrics import confusion_matrix,accuracy_score
class ModelEvaluation:
    def __init__(self, 
                 transformation_config: DataTransformationConfiguration,
                 modeltrainer_config: ModelTrainerConfiguration,
                 modelevaluation_config: ModelEvaluationConfiguration):

        try:
            self.transformation_config = transformation_config
            self.modeltrainer_config = modeltrainer_config
            self.modelevaluation_config = modelevaluation_config
            self.schema = read_yaml(SCHEMA_FILE_PATH)
        except Exception as e:
            raise e 
        
    def get_data_for_evaluation(self):
        try:
            model_train_data_file_path = os.path.join(self.transformation_config.transformed_train_dir,TRANSFORMED_MODEL_TRAIN_FILE_NAME)
            model_test_data_file_path = os.path.join(self.transformation_config.transformed_test_dir,TRANSFORMED_MODEL_TEST_FILE_NAME)
            
            model_train_data = pd.read_csv(model_train_data_file_path)
            model_test_data = pd.read_csv(model_test_data_file_path)

            train_data_input_features = model_train_data.drop(self.schema.target_column,axis=1)
            logging.info(f"Train data features extracted from {TRANSFORMED_MODEL_TRAIN_FILE_NAME} having shape : {train_data_input_features.shape} ")

            test_data_input_features = model_test_data.drop(self.schema.target_column,axis=1)
            logging.info(f"Test data features extracted from {TRANSFORMED_MODEL_TEST_FILE_NAME} having shape : {test_data_input_features.shape} ")

            train_data_output_features = model_train_data[self.schema.target_column]
            logging.info(f"Output feature extracted from {TRANSFORMED_MODEL_TRAIN_FILE_NAME} having shape : {train_data_output_features.shape} ")

            test_data_output_features = model_test_data[self.schema.target_column]
            logging.info(f"Output feature extracted from {TRANSFORMED_MODEL_TEST_FILE_NAME} having shape : {test_data_output_features.shape} ")
            
            return train_data_input_features,train_data_output_features,test_data_input_features,test_data_output_features

        except Exception as e:
            raise e
        
    def model_evaluate(self,model,X,y):
        y_pred = model.predict(X)
        accuracy = accuracy_score(y,y_pred)
        tn,fp,fn,tp = confusion_matrix(y, y_pred, labels=[0, 1]).ravel()
        FPR = fp/(tn+fp)
        RECALL = tp/(tp+fn)
        return accuracy,FPR,RECALL

    def get_model_evaluation_result(self):
        try:
            X_train,y_train,X_test,y_test = self.get_data_for_evaluation()  ## X = input features , y = output features
            saved_model_config = read_yaml(self.modeltrainer_config.trained_model_path_yaml_file)
            print(saved_model_config)
            
            df_result = pd.DataFrame()
            for model_path_key,model_path_name in saved_model_config[SAVED_MODEL_ARTIFACTS_KEY].items():
                result = []
                model = pd.read_pickle(model_path_name)
                train_accuracy,train_fpr,train_recall = self.model_evaluate(model,X_train,y_train)
                test_accuracy,test_fpr,test_recall = self.model_evaluate(model,X_test,y_test)
                result.append(model_path_name)
                result.append(train_accuracy)
                result.append(test_accuracy)
                result.append(train_fpr)
                result.append(test_fpr)
                result.append(train_recall)
                result.append(test_recall)
                model_status = None
                if train_accuracy > self.modeltrainer_config.trained_model_base_accuracy :
                    if (train_accuracy > test_accuracy) and (test_fpr < self.modeltrainer_config.trained_model_FPR) and (test_recall > self.modeltrainer_config.trained_model_RECALL):
                        logging.info(f" All evaluation cases passed ")
                        model_status = 1
                    else:
                        logging.info(f" Evaluation cases failed ")
                        model_status = 0        
                else:
                    logging.info(f" Evaluation cases failed ")
                    model_status = 0
                    
                result.append(model_status)
                temp = pd.DataFrame([result])
                df_result = pd.concat([df_result,temp],axis=0,ignore_index=True)
            df_result.columns = self.modelevaluation_config.evaluated_model_result_file_column_name
            df_result.to_csv(self.modelevaluation_config.evaluated_model_result_file_name,index=False)
            logging.info(f" Model result saved in : {self.modelevaluation_config.evaluated_model_result_file_name} ")
        except Exception as e:
            raise e

In [25]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_configuration()
    model_trainer_config = config.get_model_trainer_configuration()
    model_evaluation_config = config.get_model_evaluation_configuration()
    model_evaluator = ModelEvaluation(transformation_config=data_transformation_config,
                                 modeltrainer_config = model_trainer_config,
                                 modelevaluation_config=model_evaluation_config)
    model_evaluator.get_model_evaluation_result()
except Exception as e:
    raise e

[03-09-2023 19:48:23: INFO: common:  yaml file from this path g:\success_analytics_courses\internship_project\pulsar_project\config\config.yaml read succesfully]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts ]
[03-09-2023 19:48:23: INFO: 3903250859:  Artifacts directory created at : artifacts ]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts\data_transformation ]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts\data_transformation\training_data_for_model ]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts\data_transformation\test_data_for_model ]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts\data_transformation\industrial_test_data ]
[03-09-2023 19:48:23: INFO: common:  Directory already present: artifacts\data_transformation\preprocessed_pickle_file ]
[03-09-2023 19:48:23: INFO: 3903250859:  Data transformation configuration: DataTransformationConfiguratio

[03-09-2023 19:48:24: INFO: 460011214: Train data features extracted from pulsar_train_data.csv having shape : (105807, 8) ]
[03-09-2023 19:48:24: INFO: 460011214: Test data features extracted from pulsar_test_data.csv having shape : (11757, 8) ]
[03-09-2023 19:48:24: INFO: 460011214: Output feature extracted from pulsar_train_data.csv having shape : (105807,) ]
[03-09-2023 19:48:24: INFO: 460011214: Output feature extracted from pulsar_test_data.csv having shape : (11757,) ]
[03-09-2023 19:48:24: INFO: common:  yaml file from this path artifacts\trained_model\trained_model_path.yaml read succesfully]
{'all_trained_model_paths': {'model_0_path_03092023': 'artifacts\\trained_model\\model_03092023\\LGBMClassifier\\model.pkl', 'model_1_path_03092023': 'artifacts\\trained_model\\model_03092023\\XGBClassifier\\model.pkl'}}
[03-09-2023 19:48:24: INFO: 460011214:  All evaluation cases passed ]
[03-09-2023 19:48:25: INFO: 460011214:  All evaluation cases passed ]
[03-09-2023 19:48:25: INFO: 46