In [13]:
import os

In [14]:
%pwd

'e:\\dsProject\\nycTaxiProject'

In [15]:
# os.chdir("../")

In [16]:
%pwd

'e:\\dsProject\\nycTaxiProject'

In [17]:
from dataclasses import dataclass
from pathlib import Path

In [18]:
# Entity
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir:Path
    xtraining_data:Path
    ytraining_data:Path
    model_dir:Path
    # params for model training
    loss:str
    penalty:str
    alpha:float
    max_iter:int
    tol:float
    random_state:int
    learning_rate:str
    eta0:float

In [19]:
from nycTaxiProject.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from nycTaxiProject.util.common import read_yaml,create_directories,save_model

In [20]:
class ConfigurationManager:
    def __init__(
            self,
            config_file_path= CONFIG_FILE_PATH,
            params_file_path= PARAMS_FILE_PATH):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
    def get_model_training_config(self)->ModelTrainingConfig:
        config=self.config.model_training
        params=self.params
        create_directories([config.root_dir])

        model_training_config=ModelTrainingConfig(
            root_dir=config.root_dir,
            xtraining_data=config.xtraining_data,
            ytraining_data=config.ytraining_data,
            model_dir=config.model_dir,
            loss=params.loss,
            penalty=params.penalty,
            alpha=params.alpha,
            max_iter=params.max_iter,
            tol=params.tol,
            random_state=params.random_state,
            learning_rate=params.learning_rate,
            eta0=params.eta0

        )
        return model_training_config

In [21]:
import logging
logger=logging.getLogger(__name__) # __name__ is a special variable in Python that represents the name of the current module. When a module is run directly, __name__ is set to "__main__". When a module is imported, __name__ is set to the name of the module. By using logging.getLogger(__name__), we can create a logger that is specific to the current module, allowing us to easily identify where log messages are coming from when we have multiple modules in our application.
from sklearn.linear_model import SGDRegressor
import pandas as pd


In [22]:
class Model_Trainer:
    def __init__(self,config=ModelTrainingConfig):
        self.config=config

    def train_model(self):
        try:
            x_train=pd.read_csv(self.config.xtraining_data) #fetching DataFrame from the csv file which is stored in the path mentioned in the config file.
            y_train=pd.read_csv(self.config.ytraining_data) #fetching DataFrame from the csv file which is stored in the path mentioned in the config file.
            model_dir=Path(self.config.model_dir)
            param_kwargs=dict(
                loss=self.config.loss,
                penalty=self.config.penalty,
                alpha=self.config.alpha,
                max_iter=self.config.max_iter,
                tol=self.config.tol,
                random_state=self.config.random_state,
                learning_rate=self.config.learning_rate,
                eta0=self.config.eta0
            )


            def sgd_model(param_kwargs,x_train,y_train):
                model=SGDRegressor(**param_kwargs)
                model.fit(x_train,y_train.values.ravel())
                return model
            
            @staticmethod # Static method is a method that belongs to a class rather than an instance of the class. It can be called on the class itself, rather than on an instance of the class. Static methods do not have access to the instance (self) or class (cls) variables, and they are defined using the @staticmethod decorator.
            def save_trained_model(model,model_dir:Path):
                save_model(model=model,path=model_dir)

            model=sgd_model(param_kwargs,x_train,y_train)
            save_trained_model(model=model,model_dir=model_dir)

        except Exception as e:
            logger.exception(e)
            raise e

In [None]:
try:
    logger.info(">>>>>>>STAGE: MODEL TRAINING STARTED<<<<<<<<< ")
    config=ConfigurationManager()
    model_training_config=config.get_model_training_config()
    model_trainer=Model_Trainer(config=model_training_config)
    model_trainer.train_model()
    logger.info(">>>>>>>STAGE: MODEL TRAINING ENDED<<<<<<<<< ")

except Exception as e:
    logger.exception(e)
    raise e

[2026-02-24 20:02:54,803 : INFO : 3416358301 : >>>>>>>STAGE: MODEL TRAINING STARTED<<<<<<<<< ]
[2026-02-24 20:02:54,822 : INFO : common : yaml file: config\config.yaml LOADED successfully]
[2026-02-24 20:02:54,833 : INFO : common : yaml file: params.yaml LOADED successfully]
[2026-02-24 20:02:54,838 : INFO : common : created directory at: artifacts/model_training]
[2026-02-24 20:08:00,941 : INFO : common : model saved at: artifacts\model_training\sgdModel.pkl and size of the model is: ~ 2KB]
[2026-02-24 20:08:00,958 : INFO : 3416358301 : >>>>>>>STAGE: MODEL TRAINING ENDED<<<<<<<<< ]


