In [1]:
import os
os.chdir("../")
%pwd

'd:\\Machine_Learning\\Consignment-Pricing-Prediction'

In [5]:
# Entity for the model trainer
from dataclasses import dataclass 
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path 
    train_data_path: Path
    model_path: Path
    scaler_path: Path
    target_column: str

In [6]:
from ConsignmentPricingPrediction.constants import *
from ConsignmentPricingPrediction.utils.common import read_yaml, create_directories

In [8]:
# Creating the configuration Manager for model trainer
class ConfigurationManager:
    def __init__(
            self,
            config_file_path= CONFIG_FILE_PATH,
            params_file_path= PARAMS_FILE_PATH
        ):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer 
        params = self.params

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_path= config.train_data_path,
            model_path= config.model_path,
            scaler_path= config.scaler_path,
            target_column= params.TARGET_COLUMN
        )
        return model_trainer_config

In [51]:
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from ConsignmentPricingPrediction.utils.common import save_object
from sklearn.metrics import r2_score
from ConsignmentPricingPrediction.logging import Logger

class ModelTrainer:
    def __init__(self, model_trainer_config: ModelTrainerConfig):
        self.config = model_trainer_config

    def fetch_train_data(self)-> pd.DataFrame:
        testDF = pd.read_csv(self.config.train_data_path)
        Logger.info('Training Dataset has been fetched successfully')
        return testDF
    
    def separating_train_data(self, testDF: pd.DataFrame):
        """
        Separating the training data into dependent and independent features
        
        Args:
            testDF: pd.DataFrame
        
        Returns:
            x: pd.DataFrame
            y: pd.DataFrame
        """
        x = testDF.drop(self.config.target_column, axis=1)
        y = testDF[self.config.target_column]
        Logger.info("Dataset has been seperated into dependent and independent features")

        return x, y
    
    def scal_dataset(self, testDF: pd.DataFrame):
        """
        Scale the dataset and then save the scaler

        Args:
            testDF: pd.DataFrame

        Returns:
            scaled_data: ndarray
        """
        scal = StandardScaler()
        scaled_data = scal.fit_transform(testDF)
        Logger.info("Dataset has been scaled successfully")

        # Saving the scaler
        save_object(path=Path(self.config.scaler_path),obj=scal)    
        Logger.info("Scaler has been saved successfully")

        return scaled_data

        
    def training_model(self, x, y):
        models = {
            'LinearRegression': LinearRegression(),
            'Lasso': Lasso(),
            'Ridge': Ridge(),
        }

        model_accuracy_dict = {}

        for model in models.items():
            # Fitting the model
            model[1].fit(x, y) 
            
            # Testing the accuracy on the training data
            y_pred = model[1].predict(x)
            model_accuracy_dict[model[0]] = r2_score(y,y_pred)

        max_accuracy_score = (max(model_accuracy_dict.values()))
        max_accuracy_model = list(model_accuracy_dict.items())[list(model_accuracy_dict.values()).index(max_accuracy_score)][0]
        Logger.info("Model: {} | Model Accuracy: {}".format(max_accuracy_model, max_accuracy_score))

        final_model = models[max_accuracy_model]
        Logger.info("Model has been trained successfully")
        return final_model

    def save_model(self, model):
        save_object(path=Path(self.config.model_path),obj=model)
        Logger.info("Model has been saved successfully")

    def trainer(self):
        # Fetching the dataset
        data = self.fetch_train_data()

        # Separating the dataset
        x, y = self.separating_train_data(data)

        # Scaling the dataset
        x_scaled = self.scal_dataset(x)

        # Training the model
        model = self.training_model(x_scaled, y)
        
        # Saving the model
        self.save_model(model)

In [52]:
from ConsignmentPricingPrediction.logging import Logger

try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(model_trainer_config)
    model_trainer.trainer()

except Exception as e:
    raise e

[2024-03-21 19:36:58,442: INFO: common: yaml file config\config.yaml loaded successfully]
[2024-03-21 19:36:58,448: INFO: common: yaml file params.yaml loaded successfully]
[2024-03-21 19:36:58,450: INFO: common: created directory at: artifacts]
[2024-03-21 19:36:58,452: INFO: common: created directory at: artifacts/model_trainer]
[2024-03-21 19:36:58,462: INFO: 3631226736: Training Dataset has been fetched successfully]
[2024-03-21 19:36:58,466: INFO: 3631226736: Dataset has been seperated into dependent and independent features]
[2024-03-21 19:36:58,473: INFO: 3631226736: Dataset has been scaled successfully]
[2024-03-21 19:36:58,473: INFO: 3631226736: Scaler has been saved successfully]
[2024-03-21 19:36:58,562: INFO: 3631226736: Model: LinearRegression | Model Accuracy: 0.3077374722825449]
[2024-03-21 19:36:58,562: INFO: 3631226736: Model has been trained successfully]
[2024-03-21 19:36:58,562: INFO: 3631226736: Model has been saved successfully]


  model = cd_fast.enet_coordinate_descent(
