In [2]:
import os
from datetime import datetime
from pathlib import Path


def get_current_time_stamp():
    return f"{datetime.now().strftime('%Y%m%d%H%M%S')}"


ROOT_DIR = os.getcwd()  # to get current working directory
CURRENT_TIME_STAMP = get_current_time_stamp()
# config constants
CONFIG_DIR = os.path.join(ROOT_DIR, 'configs')
CONFIG_FILE_NAME = "config.yaml"
CONFIG_FILE_PATH = os.path.join(CONFIG_DIR, CONFIG_FILE_NAME)

DATABASE_FILE_NAME = "database.yaml"
DATABASE_FILE = Path(os.path.join(CONFIG_DIR, DATABASE_FILE_NAME))

FEATURE_GENERATOR_FILE_NAME = "feature_generator.yaml"
FEATURE_GENERATOR_FILE_PATH = Path(os.path.join(CONFIG_DIR, FEATURE_GENERATOR_FILE_NAME))

In [8]:
from pydantic import BaseModel, DirectoryPath, FilePath


class DataIngestionArtifact(BaseModel):
    train_file_path: FilePath
    test_file_path: FilePath


class DataValidationArtifact(BaseModel):
    schema_file_path: FilePath
    report_file_dir: DirectoryPath
    is_validated: bool


class DataTransformationArtifact(BaseModel):
    preprocessed_object_path: FilePath
    
class ModelTrainerArtifact(BaseModel):
    pass

In [14]:

from pathlib import Path

from pydantic import BaseModel, DirectoryPath, FilePath


class DataIngestionConfig(BaseModel):
    dataset_download_id: str
    raw_data_file_path: Path
    ingested_train_file_path: Path
    ingested_test_data_path: Path
    random_state: int


class TrainingPipelineConfig(BaseModel):
    artifact_dir: DirectoryPath
    pipeline_name: str
    experiment_code: str
    training_random_state: int


class DataValidationConfig(BaseModel):
    experiment_code: str
    data_validated_artifact_dir: DirectoryPath
    schema_file_path: FilePath
    report_file_dir: Path
    data_validated_test_collection: str
    data_validated_train_collection: str
    train_data_file: FilePath
    test_data_file: FilePath


class DataTransformationConfig(BaseModel):
    data_validated_train_collection: str
    schema_file_path: FilePath
    random_state: int
    preprocessed_object_file_path: Path
    to_train_collection: str
    to_test_collection: str
    
class ModelTrainerConfig(BaseModel):
    model_config_file_path : Path
    base_accuracy : int
    trained_model_file_path : Path 
    model_report_dir : DirectoryPath
    preprocessed_object_file_path: FilePath
    to_train_collection: str
    to_test_collection: str
    schema_file_path : FilePath
    model_report_dir : DirectoryPath
    eval_difference : int
    eval_param  : str
    experiment_id : str
    validated_collection : str


In [10]:
import os
import sys
from pathlib import Path

from CreditCard.constants import (CONFIG_DIR, CONFIG_FILE_PATH,
                                  CURRENT_TIME_STAMP, ROOT_DIR)
from CreditCard.entity import (DataIngestionConfig, DataTransformationConfig,
                               DataValidationConfig, TrainingPipelineConfig)
from CreditCard.exception import AppException
from CreditCard.logging import logger
from CreditCard.utils import create_directories, read_yaml


class ConfigurationManager:

    def __init__(self,
                 config_file_path: Path = CONFIG_FILE_PATH) -> None:
        try:
            self.config_info = read_yaml(path_to_yaml=Path(config_file_path))
            self.pipeline_config = self.get_training_pipeline_config()
            self.time_stamp = CURRENT_TIME_STAMP

        except Exception as e:
            raise AppException(e, sys) from e

    def get_data_ingestion_config(self) -> DataIngestionConfig:

        try:
            data_ingestion_info = self.config_info.data_ingestion_config
            pipeline_config = self.pipeline_config
            artifact_dir = pipeline_config.artifact_dir
            experiment_code = pipeline_config.experiment_code
            random_state = pipeline_config.training_random_state
            dataset_download_id = data_ingestion_info.dataset_download_id
            data_ingestion_dir_name = data_ingestion_info.ingestion_dir
            raw_data_dir = data_ingestion_info.raw_data_dir
            raw_file_name = data_ingestion_info.dataset_download_file_name
            data_ingestion_dir = os.path.join(artifact_dir, data_ingestion_dir_name, experiment_code)
            raw_data_file_path = os.path.join(data_ingestion_dir, raw_data_dir, raw_file_name)
            ingested_dir_name = data_ingestion_info.ingested_dir
            ingested_dir_path = os.path.join(data_ingestion_dir, ingested_dir_name)

            ingested_train_file_path = os.path.join(ingested_dir_path, data_ingestion_info.ingested_train_file)
            ingested_test_file_path = os.path.join(ingested_dir_path, data_ingestion_info.ingested_test_file)
            create_directories([os.path.dirname(raw_data_file_path), os.path.dirname(ingested_train_file_path)])

            data_ingestion_config = DataIngestionConfig(dataset_download_id=dataset_download_id,
                                                        raw_data_file_path=raw_data_file_path,
                                                        ingested_train_file_path=ingested_train_file_path,
                                                        ingested_test_data_path=ingested_test_file_path,
                                                        random_state=random_state)

            return data_ingestion_config
        except Exception as e:
            raise AppException(e, sys) from e

    def get_training_pipeline_config(self) -> TrainingPipelineConfig:
        try:
            training_config = self.config_info.training_pipeline_config
            training_pipeline_name = training_config.pipeline_name
            training_experiment_code = training_config.experiment_code
            training_random_state = training_config.random_state
            training_artifacts = os.path.join(ROOT_DIR, training_config.artifact_dir)
            create_directories(path_to_directories=[training_artifacts])
            training_pipeline_config = TrainingPipelineConfig(artifact_dir=training_artifacts,
                                                              experiment_code=training_experiment_code,
                                                              pipeline_name=training_pipeline_name,
                                                              training_random_state=training_random_state)
            logger.info(f"Training pipeline config: {training_pipeline_config}")
            return training_pipeline_config
        except Exception as e:
            raise AppException(e, sys) from e

    def get_data_validation_config(self) -> DataValidationConfig:
        try:
            pipeline_config = self.pipeline_config
            artifact_dir = pipeline_config.artifact_dir
            data_ingestion_config = self.get_data_ingestion_config()
            train_data_file = data_ingestion_config.ingested_train_file_path
            test_data_file = data_ingestion_config.ingested_test_data_path
            experiment_code = pipeline_config.experiment_code
            data_validation_config_info = self.config_info.data_validation_config
            data_validated_artifact_dir = Path(
                os.path.join(artifact_dir, data_validation_config_info.data_validation_dir, experiment_code))
            schema_file_path = os.path.join(CONFIG_DIR, data_validation_config_info.schema_file_name)
            report_file_dir = os.path.join(data_validated_artifact_dir, data_validation_config_info.report_dir)
            validated_test_collection = data_validation_config_info.data_validated_test_collection_name
            validated_train_collection = data_validation_config_info.data_validated_train_collection_name
            create_directories([report_file_dir])

            data_validation_config = DataValidationConfig(experiment_code=experiment_code,
                                                          data_validated_artifact_dir=data_validated_artifact_dir,
                                                          schema_file_path=schema_file_path,
                                                          report_file_dir=report_file_dir,
                                                          data_validated_test_collection=validated_test_collection,
                                                          data_validated_train_collection=validated_train_collection,
                                                          train_data_file=train_data_file,
                                                          test_data_file=test_data_file)
            return data_validation_config

        except Exception as e:
            raise AppException(e, sys)

    def get_data_transformation_config(self) -> DataTransformationConfig:
        try:
            pipeline_config = self.pipeline_config
            artifact_dir = pipeline_config.artifact_dir
            experiment_code = pipeline_config.experiment_code
            random_state = pipeline_config.training_random_state
            data_validation_config_info = self.get_data_validation_config()
            data_validated_train_collection = data_validation_config_info.data_validated_train_collection
            schema_file_path = data_validation_config_info.schema_file_path
            data_transformation_config_info = self.config_info.data_transformation_config
            data_transformation_dir_name = data_transformation_config_info.data_transformation_dir
            data_transformation_dir = os.path.join(artifact_dir, data_transformation_dir_name, experiment_code)
            preprocessed_object_dir = data_transformation_config_info.preprocessing_object_dir
            preprocessed_object_name = data_transformation_config_info.preprocessing_object_file_name
            preprocessed_object_file_path = os.path.join(data_transformation_dir, preprocessed_object_dir,
                                                         preprocessed_object_name)
            to_train_collection = data_transformation_config_info.to_train_collection
            to_test_collection = data_transformation_config_info.to_test_collection

            create_directories([os.path.dirname(preprocessed_object_file_path)])
            data_transformation_config = DataTransformationConfig(
                data_validated_train_collection=data_validated_train_collection,
                schema_file_path=schema_file_path,
                random_state=random_state,
                preprocessed_object_file_path=preprocessed_object_file_path,
                to_train_collection=to_train_collection,
                to_test_collection=to_test_collection)
            return data_transformation_config

        except Exception as e:
            raise AppException(e, sys)


In [13]:
from CreditCard.exception import AppException
from CreditCard.logging import logger
# from CreditCard.entity import *
import pandas as pd 
from CreditCard.utils import save_bin , load_bin
from CreditCard.Database import MongoDB
import os
import sys
from typing import List
import numpy as np


class BaseModel:
    """model estimator : Train the model and save the model to pickle """
    def __init__(self, preprocessing_object, trained_model_object):
        """
        TrainedModel constructor
        preprocessing_object: preprocessing_object
        trained_model_dict:  {cluster : model saved path}
        """
        self.preprocessing_object = preprocessing_object
        self.trained_model_object = trained_model_object
        self.columns_to_drop = ["cluster"]
        
    def preprocess_data(self,data_to_preprocess):
        transformed_feature_to_predict = self.preprocessing_object.transform_data(data_to_preprocess)
        transformed_feature_to_predict.drop(self.columns_to_drop , axis =1 , inplace = True)
        return transformed_feature_to_predict
       
    def predict(self, X):
        """
        function accepts raw inputs and then transformed raw input using preprocessing_object
        which guarantees that the inputs are in the same format as the training data
        At last it perform prediction on transformed features
        """
        data_to_predict = self.preprocess_data(data_to_preprocess=X)
        prediction = self.trained_model_object.predict(data_to_predict)
        return prediction
    
    def predict_proba(self,X):
        data_to_predict = self.preprocess_data(data_to_preprocess=X)
        prediction_proba = self.trained_model_object.predict_proba(data_to_predict)
        return prediction_proba

    def __repr__(self):
        return f"{type(self.trained_model_object).__name__}()"

    def __str__(self):
        return f"{type(self.trained_model_object).__name__}()"

class EstimatorModel:
    """model estimator : Train the model and save the model to pickle """
    def __init__(self, preprocessing_object, trained_model_dict):
        """
        TrainedModel constructor
        preprocessing_object: preprocessing_object
        trained_model_dict:  {cluster : model saved path}
        """
        self.preprocessing_object = preprocessing_object
        self.trained_model_dict = trained_model_dict
        self.trained_model_object = {cluster : load_bin(path = model) for cluster , model in trained_model_dict.items()}
        
    def preprocess_data(self,data_to_preprocess):
        transformed_feature_to_predict = self.preprocessing_object.transform(data_to_preprocess)
        return transformed_feature_to_predict

    def predict(self, X):
        """
        function accepts raw inputs and then transformed raw input using preprocessing_object
        which guarantees that the inputs are in the same format as the training data
        At last it perform prediction on transformed features
        """
        pre_data_to_predict = self.preprocess_data(data_to_preprocess=X)
        prediction = pd.DataFrame(columns=["prediction"])
        
        for row in range(pre_data_to_predict.shape[0]):
            data_to_predict = pre_data_to_predict.loc[row]
            cluster= data_to_predict["cluster"]
            model = self.trained_model_object[cluster]
            prediction.loc[row] = model.predict(data_to_predict.drop(labels=["cluster"] , axis = 1))

        return prediction
    def predict_proba(self, X):
        """
        function accepts raw inputs and then transformed raw input using preprocessing_object
        which guarantees that the inputs are in the same format as the training data
        At last it perform prediction on transformed features
        """
        pre_data_to_predict = self.preprocess_data(data_to_preprocess=X)
        prediction = pd.DataFrame(columns=["0" , "1"])
        
        for row in range(pre_data_to_predict.shape[0]):
            data_to_predict = pre_data_to_predict.loc[row]
            cluster= data_to_predict["cluster"]
            model = self.trained_model_object[cluster]
            prediction.loc[row] = model.predict_proba(data_to_predict.drop(labels=["cluster"] , axis = 1))

    def __repr__(self):
        return f"{type(self.trained_model_object).__name__}()"

    def __str__(self):
        return f"{type(self.trained_model_object).__name__}()"

In [10]:
from pathlib import Path

from pydantic import BaseModel, DirectoryPath, FilePath

class MetricReportArtifact(BaseModel):
        experiment_id : str 
        model_name : str 
        model_obj : object
        report : object

class MetricEvalArtifact(BaseModel):
        best_model : object
        best_train_eval_param  : int
        best_test_eval_param : int
        best_eval_param_difference : int 
        best_model_name  : str
        best_model_report : object

In [11]:
import pandas as pd 
from CreditCard.logging import logger
from CreditCard.exception import AppException
from ensure import ensure_annotations
from evidently.metrics import ClassificationQualityMetric
from evidently.metrics import ClassificationClassBalance
from evidently.metrics import ClassificationConfusionMatrix
from evidently.metrics import ClassificationQualityByClass
from evidently.metrics import ClassificationClassSeparationPlot
from evidently.metrics import ClassificationProbDistribution
from evidently.metrics import ClassificationRocCurve
from evidently.metrics import ClassificationPRCurve
from evidently.metrics import ClassificationPRTable
from evidently.metrics import ClassificationQualityByFeatureTable
from evidently.metrics import ConflictTargetMetric
from evidently.metrics import ConflictPredictionMetric
from evidently.report import Report
from box import ConfigBox

@ensure_annotations
def  get_best_model(reports_artifacts: list , eval_param : str , eval_difference : float , base_accuracy : float):
        best_model = None
        best_train_eval_param = None 
        best_test_eval_param = None
        best_eval_param_difference = None 
        best_model_name = None 
        best_model_report = eval_difference
        for report_artifact in reports_artifacts:
            classification_report = report_artifact.report
            data = ConfigBox(classification_report.as_dict())
            test_data_result = data.metrics[0].result.current
            train_data_result = data.metrics[0].result.reference
            test_eval_param = test_data_result[eval_param]
            train_eval_param = train_data_result[eval_param]
            model_eval_difference = train_eval_param - test_eval_param
            if model_eval_difference < best_eval_param_difference and test_data_result["accuracy"] >= base_accuracy :
                best_model= report_artifact.model_obj
                best_train_eval_param = train_eval_param
                best_test_eval_param = test_eval_param
                best_eval_param_difference = model_eval_difference
                best_model_name = report_artifact.model_name
                best_model_report = report_artifact.report
        if best_model :
            
            model_eval_artifact = MetricEvalArtifact(best_model = best_model,
                                                     best_train_eval_param= best_train_eval_param,
                                                     best_test_eval_param= best_test_eval_param ,
                                                     best_eval_param_difference= best_eval_param_difference , 
                                                     best_model_name= best_model_name  ,
                                                     best_model_report= best_model_report )
            return model_eval_artifact


@ensure_annotations
def evaluate_classification_model(X_train: pd.DataFrame, y_train: pd.DataFrame,
                                  X_test: pd.DataFrame, y_test: pd.DataFrame, base_accuracy : float  ,report_dir : str,
                                  eval_difference : float , estimators: list,  eval_param : str = "accuracy" ,
                                  experiment_id: str = None ) -> MetricEvalArtifact:
    """
      Description:
      This function compare multiple regression model return best model
      Params:
      experiment_id: the experiment id
      estimators: model List 
      X_train: Training dataset input feature
      y_train: Training dataset target feature
      X_test: Testing dataset input feature
      y_test: Testing dataset input feature
      return
      It returned a named tuple

      MetricInfoArtifact ("model_name", "model_object","train_precision", "test_precision",
                                                            "train_recall", "test_recall",
                                                            "train_f1" , "test_f1","model_accuracy", "index_number")
    """
    current_df = X_test.copy()
    current_df["target"] = y_test.copy()
    reference_df = X_train.copy()
    reference_df["target"] = y_train.copy()
    
    if experiment_id is None:
        experiment_id = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
        
    model_dir = os.path.join(report_dir, "model_report")
    experiment_dir = os.path.join(model_dir, experiment_id)
    model_report_list = list()
    try:
        best_model = None
        for estimator in estimators:
            model_name = estimator.__class__.__name__
            model = estimator
            current_df['prediction'] = model.predict_proba(current_df[X_test.feature_names.tolist()]).iloc[:, 1]
            reference_df['prediction'] = model.predict_proba(reference_df[X_train.feature_names.tolist()]).iloc[:, 1]
            classification_report = Report(metrics=[ClassificationQualityMetric(),
                                                     ClassificationClassBalance(),
                                                     ConflictTargetMetric(),
                                                     ConflictPredictionMetric(),
                                                    ClassificationConfusionMatrix(),
                                                    ClassificationQualityByClass(),
                                                    ClassificationClassSeparationPlot(),
                                                    ClassificationProbDistribution(),
                                                    ClassificationRocCurve(),
                                                    ClassificationPRCurve(),
                                                    ClassificationPRTable(),
                                                    ClassificationQualityByFeatureTable(),])
            model_report_path = os.path_join(experiment_dir, model_name, "evidently_classification_report.html")
            classification_report.save_html(filename=model_report_path)
            model_report_artifact = MetricReportArtifact(experiment_id = experiment_id , 
                                                       model_name = model_name , 
                                                       model_obj = model , 
                                                       report  = classification_report)
            model_report_list.append(model_report_artifact)
        best_model = get_best_model(reports = model_report_list , eval_param = eval_param  ,eval_difference =eval_difference , 
                                    base_accuracy =base_accuracy )

            
        if best_model is None:
            logger.info("No acceptable model found")
        else:
            logger.info(f"Acceptable model  name {best_model.best_model}. ")
            logger.info(f"Acceptable model test {eval_param} score {best_model.best_test_eval_param}. ")
            logger.info(f"best model artifact {best_model}")
    except Exception as e:
        raise AppException(e, sys)
    return best_model

In [15]:
import importlib
from datetime import datetime
import pandas as pd 
from dataclasses import dataclass
import matplotlib.pyplot as plt
import seaborn as sns 
import yaml
from CreditCard.exception import AppException 
import os
import sys
from collections import namedtuple
from typing import List
from CreditCard.logging import logger
from CreditCard.utils import read_yaml
from neuro_mf import ModelFactory , GridSearchedBestModel
from sklearn.model_selection import train_test_split
class ModelTrainer:

    def __init__(self, model_trainer_config: ModelTrainerConfig):
        try:
            logger.info(f"{'>>' * 30}Model trainer log started.{'<<' * 30} ")
            self.model_trainer_config_info = model_trainer_config
            to_train_collection = self.model_trainer_config_info.to_train_collection
            to_test_collection = self.model_trainer_config_info.to_test_collection
            validated_collection = self.model_trainer_config_info.validated_collection
            to_train_connection = MongoDB(to_train_collection, drop_collection=False)
            to_test_connection = MongoDB(to_test_collection, drop_collection=False)
            validated_connection = MongoDB(validated_collection , drop_collection=False)
            self.validated_df = validated_connection.find_many_as_df()
            self.train_df = to_train_connection.find_many_as_df()
            self.test_df = to_test_connection.find_many_as_df()
            preprocessing_obj_path = self.model_trainer_config_info.preprocessed_object_file_path
            self.preprocessing_obj = load_bin(path=preprocessing_obj_path)
            self.schema = read_yaml(path_to_yaml=self.model_trainer_config_info.schema_file_path)
            
            
        except Exception as e:
            raise AppException(e, sys) from e
    
    def get_validated_data_to_test(self):
        data = self.validated_df.copy()
        master_x = data[self.schema.columns_to_test]
        master_y = data[self.schema.target_test]
        X_train, X_test, y_train, y_test = train_test_split(master_x, master_y, test_size=0.2, random_state=1961)
        return X_train, X_test, y_train, y_test

    def initiate_model_trainer(self) -> ModelTrainerArtifact:
        try:
            logger.info("Loading transformed training dataset")
            logger.info(f"{'>>' * 30}Base Model.{'<<' * 30} ")
            model_trainer_config_info = self.model_trainer_config_info
            trained_model_file_path = model_trainer_config_info.trained_model_file_path
            model_report_dir = model_trainer_config_info.model_report_dir
            eval_difference = model_trainer_config_info.eval_difference
            base_accuracy = model_trainer_config_info.base_accuracy
            model_config_file_path = model_trainer_config_info.model_config_file_path
            eval_difference = model_trainer_config_info.eval_difference
            eval_param = model_trainer_config_info.eval_param
            experiment_id = model_trainer_config_info.experiment_id
            base_features_to_drop = self.schema.base_model_features_to_drop
            base_x_train = self.train_df.drop(base_features_to_drop, axis =1 )
            base_y_train = self.train_df[self.schema.target_column]
            base_x_test = self.test_df.drop(base_features_to_drop, axis =1 )
            base_y_test = self.test_df[self.schema.target_column]
            logger.info(f"Expected accuracy: {base_accuracy}")
            logger.info("Extracting model config file path")
            base_model_factory = ModelFactory(model_config_path=model_config_file_path)
            base_best_model =base_model_factory.get_best_model(X=base_x_train, y=base_y_train, base_accuracy=base_accuracy)
            base_grid_searched_best_model_list: List[GridSearchedBestModel] = base_model_factory.grid_searched_best_model_list
            base_model_list = [model.best_model for model in base_grid_searched_best_model_list]
            base_report_dir = os.path.join(model_report_dir, "Base_model")
            
            base_metric_info : MetricEvalArtifact = evaluate_classification_model(estimators=base_model_list, X_train=base_x_train,
                                                                            y_train=base_y_train, X_test=base_x_test, y_test=base_y_test,
                                                                            base_accuracy=base_accuracy , report_dir=base_report_dir,
                                                                            eval_difference =eval_difference  , eval_param = eval_param ,
                                                                            experiment_id= experiment_id )
            
    
            logger.info(f"{base_metric_info.__dict__}")
            base_model_file_name = f"{base_metric_info.best_model_name}.pkl"
            base_model_file_path = os.path.join(base_report_dir, "Model", base_model_file_name)
            base_model_object = base_metric_info.best_model
            base_predictor = BaseModel(preprocessing_object=self.preprocessing_obj , trained_model_object=base_model_object)
            save_bin(data = base_predictor, path = base_model_file_path)
            
            logger.info(f"{'>>' * 30}Base model done{'<<' * 30} ")
            cluster_model_dict = dict()
            clusters = self.train_df["cluster"].unique()
            for cluster in clusters:
                model_list = None
                to_train_df = self.train_df[self.train_df[self.schema.cluster_column]==cluster]
                to_test_df = self.test_df[self.test_df[self.schema.cluster_column]==cluster]
                to_train_features = to_train_df.drop(self.schema.to_drop_cluster_data , axis=1)
                to_train_target = to_test_df[self.schema.target_column]
                to_test_features = to_test_df.drop(self.schema.to_drop_cluster_data , axis =1 )
                to_test_target = to_test_df[self.schema.target_column]
                model_factory = ModelFactory(model_config_path=model_config_file_path)
                best_model = model_factory.get_best_model(X=to_train_features, y=to_train_target, base_accuracy=base_accuracy)
                logger.info(f"Best model found on training dataset: {best_model}")

                logger.info("Extracting trained model list.")
                grid_searched_best_model_list: List[GridSearchedBestModel] = model_factory.grid_searched_best_model_list

                model_list = [model.best_model for model in grid_searched_best_model_list]
                logger.info(f"Model list: {model_list} , {len(model_list)}")
                cluster_name = f"cluster_{cluster}"
                report_dir = os.path.join(model_report_dir, cluster_name)
                logger.info("Evaluation all trained model on training and testing dataset both")
                metric_info: MetricEvalArtifact = evaluate_classification_model(estimators=model_list, X_train=to_train_features,
                                                                            y_train=to_train_target, X_test=to_test_features, 
                                                                            y_test=to_test_target,
                                                                            base_accuracy=base_accuracy , report_dir=report_dir,
                                                                            eval_difference =eval_difference  , eval_param = eval_param ,
                                                                            experiment_id= experiment_id )
                logger.info(f"Metric info: {metric_info}")
        
                model_file_name = f"{metric_info.best_model_name}.pkl"
                model_file_path = os.path.join(report_dir, "Model", model_file_name)
                model_object = metric_info.best_model
                cluster_model_dict[cluster] = model_file_path
                save_bin(data = model_object, path = model_file_path)
                
            logger.info("Best found model on both training and testing dataset.")

            

            prediction_model = EstimatorModel(preprocessing_object=self.preprocessing_obj, trained_model_dict=cluster_model_dict)
            cluster_report_dir = os.path.join(model_report_dir, "cluster_custom_model")
            cluster_model_path = os.path.join(cluster_report_dir, "cluster_custom_model.pkl")
            save_bin(data=prediction_model , path = cluster_model_path)
            
            clustered_model_list = [prediction_model , base_predictor]
            
            X_train, X_test, y_train, y_test = self.get_validated_data_to_test()

            metric_info = evaluate_classification_model(estimators=clustered_model_list, X_train=X_train,
                                                                            y_train=y_train, X_test=X_test, y_test=y_test,
                                                                            report_dir=cluster_report_dir,                                              
                                                                            base_accuracy=base_accuracy ,   eval_difference =eval_difference  , eval_param = eval_param ,
                                                                            experiment_id= experiment_id )
            logger.info(f"Saving model at path: {trained_model_file_path}")
            save_object(file_path=trained_model_file_path, obj=prediction_model)

            model_trainer_artifact = ModelTrainerArtifact(is_trained=True, message="Model Trained successfully",
                                                          trained_model_file_path=trained_model_file_path,
                                                          train_f1=metric_info.test_f1,
                                                          test_f1=metric_info.test_f1,
                                                          train_precision=metric_info.train_precision,
                                                          test_precision=metric_info.test_precision,
                                                          model_accuracy=metric_info.model_accuracy)

            logger.info(f"Model Trainer Artifact: {model_trainer_artifact}")
            return model_trainer_artifact
        except Exception as e:
            logger.error(e)
            raise AppException(e, sys) from e

    def __del__(self):
        logger.info(f"{'>>' * 30}Model trainer log completed.{'<<' * 30} ")
