In [1]:
import os

In [2]:
%pwd

'/Users/harshbhatt/Desktop/Projects/walmart-sales-forecast-mlops/research'

In [3]:
os.chdir("../")
%pwd

'/Users/harshbhatt/Desktop/Projects/walmart-sales-forecast-mlops'

In [6]:
# import dagshub
# dagshub.init(repo_owner='harshvbhatt', repo_name='walmart-sales-forecast-mlops', mlflow=True)

In [None]:
os.environ["MLFLOW_TRACKING_URI"] = "https://dagshub.com/harshvbhatt/walmart-sales-forecast-mlops.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"] = "harshvbhatt"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "#########"

In [15]:
from dataclasses import dataclass, field
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    pipeline_path: Path 
    model_instance_path: Path
    # all_params: dict
    target_column: str
    metrics_file_name: Path
    mlflow_uri: str

In [9]:
from projectFiles.constants import *
from projectFiles.utils.common import read_yaml, create_directories, save_json

In [19]:
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH, schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        # params = self.params
        schema = self.schema

        create_directories([config.root_dir])
        
        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            pipeline_path = config.pipeline_path,
            model_instance_path = config.model_instance_path,
            # all_params = params,
            metrics_file_name = config.metrics_file_name,
            target_column = schema.TARGET_COLUMN.name,
            mlflow_uri = "https://dagshub.com/harshvbhatt/walmart-sales-forecast-mlops.mlflow"
        )

        return model_evaluation_config

In [12]:
import os
import pandas as pd
from sklearn.metrics import root_mean_squared_error, r2_score
from sklearn.model_selection import cross_val_predict
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import numpy as np
import pickle

In [22]:
class ModelEvaluation:
    def __init__(self, config = ModelEvaluationConfig):
        self.config = config

    def eval_metrics(self, actual, pred):
        rmse = root_mean_squared_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, r2
    
    def log_in_mlflow(self):
        target_col = self.config.target_column
        test_df = pd.read_csv(self.config.test_data_path)
        with open(self.config.pipeline_path, 'rb') as file:
            regressor_pipeline = pickle.load(file)
        with open(self.config.model_instance_path, 'rb') as file:
            model_instance = pickle.load(file)   

        test_df.drop(columns=["Date"], inplace = True)
        test_x = test_df.drop(columns=target_col)
        test_y = test_df[target_col]

        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            prediction = regressor_pipeline.predict(test_x)
            
            (rmse, r2) = self.eval_metrics(test_y, prediction)

            scores = {"rmse":rmse, "r2":r2}
            save_json(path = Path(self.config.metrics_file_name), data = scores)

            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("r2", r2)

            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(regressor_pipeline, "model", registered_model_name="LGBMRegressorPipeline")
            else:
                mlflow.sklearn.log_model(regressor_pipeline, "model")

In [23]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config = model_evaluation_config)
    model_evaluation.log_in_mlflow()
except Exception as e:
    raise e

[2025-03-02 23:59:46,508: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-03-02 23:59:46,510: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-02 23:59:46,512: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-02 23:59:46,513: INFO: common: created directory at: artifacts]
[2025-03-02 23:59:46,514: INFO: common: created directory at: artifacts/model_evaluation]
[2025-03-02 23:59:46,725: INFO: common: json file saved at: artifacts/model_evaluation/metrics.json]


Successfully registered model 'LGBMRegressorPipeline'.
2025/03/02 23:59:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LGBMRegressorPipeline, version 1
Created version '1' of model 'LGBMRegressorPipeline'.


🏃 View run colorful-dolphin-759 at: https://dagshub.com/harshvbhatt/walmart-sales-forecast-mlops.mlflow/#/experiments/0/runs/34e711e5f2f3418aa40e818b9ed2ad75
🧪 View experiment at: https://dagshub.com/harshvbhatt/walmart-sales-forecast-mlops.mlflow/#/experiments/0
