In [17]:
%pwd

'C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops'

In [18]:
import os
from pathlib import Path
os.chdir(Path("C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops\\"))

In [19]:
%pwd

'C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops'

In [20]:
from software_defect_prediction.constants import *
from software_defect_prediction.utils.common import *
from software_defect_prediction.entity.config_entity import ModelEvaluationConfig
from software_defect_prediction.config.configuration import ConfigurationManager

import shutil

In [21]:
from sys import exception
import pandas as pd
# from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, matthews_corrcoef, cohen_kappa_score,
    balanced_accuracy_score, hamming_loss, jaccard_score
)
import json
from mlflow.models import infer_signature
import mlflow

class Model_Evaulation():
    def __init__(self,model_evaluation_config : ModelEvaluationConfig, model_parameters : dict ,predictor_col : str,mlflow_tracking : ConfigBox) -> None:
        self.config = model_evaluation_config
        self.predictor_col = predictor_col
        self.model_parameters = model_parameters.MODEL_PARAMETERS.to_dict()
        self.__mlflow_tracking = mlflow_tracking
        self.model = None
        self.train_df = None
        self.test_df = None
        
    def prepare_and_load_files(self) -> None:
        try :
            
            files_arr = [self.config.train_file,self.config.test_file,self.config.model_file]
            
            for file_name in files_arr:
                source_file_path = Path(Path(self.config.source_file_path) / Path(file_name))
                destination_file_path = Path(Path(self.config.root_dir) / Path(file_name))
                if os.path.exists(destination_file_path):
                    os.remove(destination_file_path)

                shutil.copy(source_file_path,self.config.root_dir)
            logger.info(Path(self.config.root_dir) / Path(self.config.train_file))
            self.train_df = pd.read_csv(Path(self.config.root_dir) / Path(self.config.train_file))
            self.test_df = pd.read_csv(Path(self.config.root_dir) / Path(self.config.test_file))
            self.model = joblib.load(Path(Path(self.config.root_dir) / Path("random_forest_model.joblib")))
            logger.info("model, train and test data loaded successfully")
            
        except exception as e:
            logger.error("model, train and test data loding failed")
            raise(e)
    
    def setup_mlflow(self):
        os.environ['MLFLOW_TRACKING_URI'] = self.__mlflow_tracking.MLFLOW_TRACKING_URI
        os.environ['MLFLOW_TRACKING_USERNAME'] =  self.__mlflow_tracking.MLFLOW_TRACKING_USERNAME
        os.environ['MLFLOW_TRACKING_PASSWORD'] = self.__mlflow_tracking.MLFLOW_TRACKING_PASSWORD
    
    def eval_metrics_classification(self, actual, pred):
        accuracy = accuracy_score(actual, pred)
        precision = precision_score(actual, pred, average='weighted')
        recall = recall_score(actual, pred, average='weighted')
        f1 = f1_score(actual, pred, average='weighted')
        roc_auc = roc_auc_score(actual, pred, average='weighted', multi_class='ovr')
        # cm = confusion_matrix(actual, pred)
        mcc = matthews_corrcoef(actual, pred)
        cohen_kappa = cohen_kappa_score(actual, pred)
        balanced_acc = balanced_accuracy_score(actual, pred)
        hamming = hamming_loss(actual, pred)
        jaccard = jaccard_score(actual, pred, average='weighted')
        
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'roc_auc': roc_auc,
            # 'confusion_matrix': cm,
            'mcc': mcc,
            'cohen_kappa': cohen_kappa,
            'balanced_accuracy': balanced_acc,
            'hamming_loss': hamming,
            'jaccard_score': jaccard
        }
        
    def log_into_mlflow(self) -> None:
        
        X_train = self.train_df.drop(columns=['id',self.predictor_col])
        
        X_test = self.test_df.drop(columns=['id',self.predictor_col])
        y_test = self.test_df[self.predictor_col].astype(int)
        
        mlflow.set_tracking_uri(uri=self.__mlflow_tracking.MLFLOW_TRACKING_URI)
        mlflow.set_experiment(self.__mlflow_tracking.MLFLOW_TRACKING_EXPERIMENT)
        
        with mlflow.start_run():
            
            y_pred = self.model.predict(X_test)
            
            model_perf_metrics = self.eval_metrics_classification(actual=y_test,pred=y_pred)
            with open(Path(Path(self.config.root_dir) / Path(self.config.perf_metrics_file)), "w") as outfile: 
                json.dump(model_perf_metrics, outfile)
            
            mlflow.log_params(self.model_parameters)
            for metric_name, value in model_perf_metrics.items():
                mlflow.log_metric(metric_name, value)

            signature = infer_signature(X_train, self.model.predict(X_train))

            mlflow.sklearn.log_model(
                sk_model=self.model,
                signature=signature,
                input_example=X_train,
                artifact_path="software_defect_prediction",
                registered_model_name="RandomForestClassifier",
            )


In [22]:
config_manager = ConfigurationManager()
model_evaluation_config = config_manager.get_model_evaluation_config()
model_params = config_manager.get_model_params()
predictor = config_manager.get_data_schema().TARGET_COLUMN.name

model_eval = Model_Evaulation(model_evaluation_config,model_params,predictor,config_manager.get_mlflow_credentials())
model_eval.prepare_and_load_files()
model_eval.setup_mlflow()
model_eval.log_into_mlflow()

[32m2024-06-01 21:37:32.029[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: config\config.yaml loaded successfully[0m
[32m2024-06-01 21:37:32.036[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: params.yaml loaded successfully[0m
[32m2024-06-01 21:37:32.037[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: schema.yaml loaded successfully[0m
[32m2024-06-01 21:37:32.042[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: credentials.yaml loaded successfully[0m
[32m2024-06-01 21:37:32.044[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mcreate_directories[0m:[36m51[0m - [1mcreated directory at: artifacts[0m
[32m2024-06-01 21:37:32.047[0m | [1mINFO    [0m | 

TypeError: log_model() got an unexpected keyword argument 'artifacts'