In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
pwd

'/home/cloudcraftz/End-To-End-Wine-Quality'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig():
    root_dir: Path
    model_name: str
    params_grid: dict
    train_data_path: Path
    model_save_dir: Path
    target_column: str

In [5]:
from WineQuality.constants import *
from WineQuality.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH,schema_filepath = SCHEMA_FILE_PATH):
        
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_configs(self) -> list[ModelTrainerConfig]:

        config = self.config.model_trainer
        params = self.params
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_configs =[]

        for model_name, params_grid in params.items():
            model_trainer_config = ModelTrainerConfig(
                root_dir= config.root_dir,
                model_name= model_name,
                params_grid= params_grid,
                train_data_path= config.train_data_path,
                model_save_dir= config.model_save_dir,
                target_column= schema.name

            )

            model_trainer_configs.append(model_trainer_config)

        return model_trainer_configs



In [7]:
import os
import pandas as pd
from WineQuality import logger
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV
import json

import pickle


model_dict = {'Lasso': Lasso(random_state=42), 'Ridge': Ridge(random_state=42)}


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train_model(self):
        train_data = pd.read_csv(self.config.train_data_path)
        X = train_data.drop(columns=[self.config.target_column])
        y = train_data[self.config.target_column]

        model = model_dict[self.config.model_name]

        logger.info(f"Training {self.config.model_name} model")

        grid = GridSearchCV(estimator=model, param_grid=self.config.params_grid.params_grid, cv=5, n_jobs=-1)

        grid.fit(X, y)

        logger.info(f"Best Score: {grid.best_score_}")
        logger.info(f"Best Parameters: {grid.best_params_}")

        # Define paths
        model_save_path = os.path.join(self.config.model_save_dir, self.config.model_name, f"{self.config.model_name}.pkl")
        best_parameters_save_path = os.path.join(self.config.model_save_dir, self.config.model_name, f"{self.config.model_name}_best_parameters.json")

        # Ensure the directories exist (fix: use os.path.dirname)
        os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

        with open(model_save_path, 'wb') as f:
            pickle.dump(grid.best_estimator_, f)

        with open(best_parameters_save_path, 'w') as f:
            json.dump(grid.best_params_, f)

        logger.info(f"Model saved at: {model_save_path}")


In [8]:
try:
    config = ConfigurationManager()
    model_trainer_configs = config.get_model_trainer_configs()
    for model_trainer_config in model_trainer_configs:
        model_trainer = ModelTrainer(model_trainer_config)
        model_trainer.train_model()

except Exception as e:
    raise e

[2025-03-22 20:29:14,411: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-03-22 20:29:14,413: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-22 20:29:14,415: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-22 20:29:14,415: INFO: common: created directory at: artifacts]
[2025-03-22 20:29:14,416: INFO: common: created directory at: artifacts/model_trainer]
[2025-03-22 20:29:14,420: INFO: 2999260193: Training Ridge model]
[2025-03-22 20:29:16,017: INFO: 2999260193: Best Score: 0.3231663170787522]
[2025-03-22 20:29:16,020: INFO: 2999260193: Best Parameters: {'alpha': 0.5}]
[2025-03-22 20:29:16,025: INFO: 2999260193: Model saved at: artifacts/model_trainer/models/Ridge/Ridge.pkl]
[2025-03-22 20:29:16,034: INFO: 2999260193: Training Lasso model]
[2025-03-22 20:29:16,518: INFO: 2999260193: Best Score: 0.2211569723366204]
[2025-03-22 20:29:16,519: INFO: 2999260193: Best Parameters: {'alpha': 0.1}]
[2025-03-22 20:29:16,521: INFO: 299