In [1]:
import os

In [2]:
# import dagshub
# dagshub.init(repo_owner='mann-lean', repo_name='data-science-project', mlflow=True)

# import mlflow
# with mlflow.start_run():
#   mlflow.log_param('parameter name', 'value')
#   mlflow.log_metric('metric name', 1)

In [3]:
# set environment variables for MLFLOW
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/mann-lean/data-science-project.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"]="mann-lean"
os.environ["MLFLOW_TRACKING_PASSWORD"]="d48e826348084596921be95179ff847d63506cba"

In [4]:
%pwd

'e:\\dsProject\\nycTaxiProject\\research'

In [5]:
os.chdir('../')

In [6]:
%pwd

'e:\\dsProject\\nycTaxiProject'

In [7]:
from dataclasses import dataclass
from pathlib import Path

In [8]:
# Entity
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    x_train_dir: Path
    y_train_dir: Path
    x_test_dir: Path
    y_test_dir: Path
    model_dir: Path
    all_params:dict
    mlflow_uri:str
    model_evaluation:Path

In [9]:
from nycTaxiProject.util.common import read_yaml,create_directories,save_evaluation
from nycTaxiProject.constants import CONFIG_FILE_PATH,PARAMS_FILE_PATH

In [10]:
class ConfigurationManger:
    def __init__(
            self,
     config_file_path=CONFIG_FILE_PATH,
     params_file_path=PARAMS_FILE_PATH
     ):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)

    def get_model_evaluation(self)->ModelEvaluationConfig:
        config=self.config.model_evaluation
        create_directories([config.root_dir])

        model_evaluation_config=ModelEvaluationConfig(
            root_dir=config.root_dir,
            x_train_dir=config.x_train_dir,
            y_train_dir=config.y_train_dir,
            x_test_dir=config.x_test_dir,
            y_test_dir=config.y_test_dir,
            model_dir=config.model_dir,
            all_params= self.params,
            mlflow_uri= "https://dagshub.com/mann-lean/data-science-project.mlflow",
            model_evaluation=config.model_evaluation
        )
        return model_evaluation_config

In [11]:
import logging
logger=logging.getLogger(__name__)
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import SGDRegressor
import numpy as np
import joblib
import pandas as pd
import mlflow
import dagshub

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class Model_Evaluation:
    def __init__(self,config=ModelEvaluationConfig):
        self.config=config
        self.model=joblib.load(self.config.model_dir) #importing model(.pkl )
    def evaluate_model(self, X, y_true, name:str): #why argument  need model,when we are loading the model from the directory? because we are loading the model from the directory, we don't need to pass the model as an argument to the evaluate_model method. We can remove the model argument from the method definition and directly load the model within the method using joblib.load(self.config.model_dir). Here's how you can modify the evaluate_model method:
        try:
            #1 Initializing Dagshub logger
            dagshub.init(repo_owner='mann-lean', repo_name='data-science-project', mlflow=True)
            #2 set the tracking URI for MLflow
            mlflow.set_tracking_uri(self.config.mlflow_uri)
            model=self.model
           
            with mlflow.start_run():
                y_pred = model.predict(X)
                r2 = r2_score(y_true, y_pred)
                rmse = np.sqrt(mean_squared_error(y_true, y_pred))
                mse=mean_squared_error(y_true, y_pred)
                mae = mean_absolute_error(y_true, y_pred)
                
                print(f"-----------{name} Evaluation-------------")
                print(f"R2 Score:   {r2:.4f}")
                print(f"RMSE:       {rmse:.4f}")
                print(f"MSE:        {mse:.4f}")
                print(f"MAE:        {mae:.4f}")

                # saving evaluation metrics in a dictionary
                evalutaion_dict={"name":name,
                                 "R2_Score":r2,
                                "RMSE":rmse,
                                "MSE":mse,
                                "MAE":mae}
                # saving evaluation metrics in a JSON file (LOCALLY)
                save_evaluation(evalutaion_dict,Path(self.config.model_evaluation))
                
            # 4. LOG PARAMS: Save hyperparameter configurations (e.g., alpha, penalty)
                if self.config.all_params:
                    mlflow.log_params(self.config.all_params)
            # 5. LOG METRICS: Log evaluation metrics (e.g., R2, RMSE, MSE, MAE)
                mlflow.log_metric(f"{name} R2_Score", r2)
                mlflow.log_metric(f"{name} RMSE", rmse)
                mlflow.log_metric(f"{name} MSE", mse)
                mlflow.log_metric(f"{name} MAE", mae)
            # 6. LOG MODEL: Optionally, log the trained model itself for future reference
                mlflow.sklearn.log_model(model, "model", registered_model_name="SGDRegressor")
                
        except Exception as e:
            logger.exception(e)
            raise e


In [16]:
try:
    logger.info(">>>>>>>STAGE: MODEL EVALUATION Started<<<<<<<<< ")
    config=ConfigurationManger()
    model_evaluation_config=config.get_model_evaluation()
    x_train=pd.read_csv(model_evaluation_config.x_train_dir)
    y_train=pd.read_csv(model_evaluation_config.y_train_dir)
    model_evaluation=Model_Evaluation(model_evaluation_config)
    model_evaluation.evaluate_model(x_train,y_train,"training")

    x_test=pd.read_csv(model_evaluation_config.x_test_dir)
    y_test=pd.read_csv(model_evaluation_config.y_test_dir)
    model_evaluation.evaluate_model(x_test,y_test,"testing")
    logger.info(">>>>>>>STAGE: MODEL EVALUATION ENDED<<<<<<<<< ")

except Exception as e:
    logger.exception(e)
    raise e

[2026-02-25 19:49:19,809 : INFO : 2382430873 : >>>>>>>STAGE: MODEL EVALUATION Started<<<<<<<<< ]
[2026-02-25 19:49:19,831 : INFO : common : yaml file: config\config.yaml LOADED successfully]
[2026-02-25 19:49:19,840 : INFO : common : yaml file: params.yaml LOADED successfully]
[2026-02-25 19:49:19,844 : INFO : common : created directory at: artifacts/modelEvaluatioin]
[2026-02-25 19:49:25,384 : INFO : _client : HTTP Request: GET https://dagshub.com/api/v1/repos/mann-lean/data-science-project "HTTP/1.1 200 OK"]


[2026-02-25 19:49:25,403 : INFO : helpers : Initialized MLflow to track repo "mann-lean/data-science-project"]


[2026-02-25 19:49:25,414 : INFO : helpers : Repository mann-lean/data-science-project initialized!]
-----------training Evaluation-------------
R2 Score:   0.6906
RMSE:       2.2370
MSE:        5.0043
MAE:        1.1405
[2026-02-25 19:49:27,094 : INFO : common : Evalution Metrices saved at artifacts\modelEvaluatioin\evaluation.json]


Registered model 'SGDRegressor' already exists. Creating a new version of this model...
2026/02/25 19:50:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: SGDRegressor, version 3
Created version '3' of model 'SGDRegressor'.


üèÉ View run upset-rat-681 at: https://dagshub.com/mann-lean/data-science-project.mlflow/#/experiments/0/runs/75a5ac4237e64a7385c2e82c339a9c06
üß™ View experiment at: https://dagshub.com/mann-lean/data-science-project.mlflow/#/experiments/0
[2026-02-25 19:50:32,023 : INFO : _client : HTTP Request: GET https://dagshub.com/api/v1/repos/mann-lean/data-science-project "HTTP/1.1 200 OK"]


[2026-02-25 19:50:32,040 : INFO : helpers : Initialized MLflow to track repo "mann-lean/data-science-project"]


[2026-02-25 19:50:32,050 : INFO : helpers : Repository mann-lean/data-science-project initialized!]
-----------testing Evaluation-------------
R2 Score:   0.7187
RMSE:       2.0790
MSE:        4.3223
MAE:        1.1287
[2026-02-25 19:50:32,863 : INFO : common : Evalution Metrices saved at artifacts\modelEvaluatioin\evaluation.json]


Registered model 'SGDRegressor' already exists. Creating a new version of this model...
2026/02/25 19:51:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: SGDRegressor, version 4
Created version '4' of model 'SGDRegressor'.


üèÉ View run brawny-whale-513 at: https://dagshub.com/mann-lean/data-science-project.mlflow/#/experiments/0/runs/c3ea21a98e1a421b9fe3b50c4dd08ac8
üß™ View experiment at: https://dagshub.com/mann-lean/data-science-project.mlflow/#/experiments/0
[2026-02-25 19:51:03,865 : INFO : 2382430873 : >>>>>>>STAGE: MODEL EVALUATION ENDED<<<<<<<<< ]


##### -----------training Evaluation-------------<br>
R2 Score:   0.6906<br>
RMSE:       2.2370<br>
MSE:        5.0043<br>
MAE:        1.1405
<br>

##### -----------testing Evaluation-------------<br>
R2 Score:   0.7187<br>
RMSE:       2.0790<br>
MSE:        4.3223<br>
MAE:        1.1287