In [4]:
import os
os.chdir("../")


In [5]:
pwd

'd:\\projects\\MLflow_DL\\Machine-learning-project-with-MLflow'

# here we will try the model training part of the pipeline and then add it the main pipeline

lets prepare the entity first

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir:Path
    train_data_path:Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: str




In [28]:
# now we need to create configuration manager function for data model_trainer task and for that we need the constants that store paths to config yaml schema and params and also read yaml and create directories functions to read them from utils

from mlProject.utils.common import read_yaml, create_directories
from mlProject.constants import *





class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):

            self.config = read_yaml(config_filepath)
            self.schema = read_yaml(schema_filepath)
            self.params = read_yaml(params_filepath)

            create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
          config = self.config.model_trainer
          params = self.params.ElasticNet
          schema = self.schema.TARGET_COLUMN
          print(schema.name)

          create_directories([config.root_dir])

          model_trainer_config = ModelTrainerConfig(
                  root_dir = config.root_dir,
                  train_data_path = config.train_data_path,
                  test_data_path = config.test_data_path,
                  model_name = config.model_name,
                  alpha = params.alpha,
                  l1_ratio = params.l1_ratio,
                  target_column = schema.name
          )
          return model_trainer_config
          

In [29]:
import pandas as pd
import os
from mlProject import logger
from sklearn.linear_model import ElasticNet
import joblib 

In [30]:
class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config


    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        # seperate label from data and turn into x and y
        train_x = train_data.drop([self.config.target_column], axis=1)
        test_x = test_data.drop([self.config.target_column], axis=1)

        train_y = train_data[[self.config.target_column]]
        test_y = test_data[[self.config.target_column]]


        lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))
        

In [31]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer  = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2025-01-23 13:21:10,066: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-01-23 13:21:10,069: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-01-23 13:21:10,071: INFO: common: yaml file: params.yaml loaded successfully]
[2025-01-23 13:21:10,072: INFO: common: created directory at: artifacts]
quality
[2025-01-23 13:21:10,074: INFO: common: created directory at: artifacts/model_trainer]
