In [1]:
import os

In [2]:
%pwd

'e:\\projects\\Delivery-time-prediction-for-food-devlivery-industry\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'e:\\projects\\Delivery-time-prediction-for-food-devlivery-industry'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_input_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    n_estimators: int
    max_depth: int
    learning_rate: float
    subsample: float
    min_child_weight: int
    min_split_gain: float
    reg_lambda: float
    n_jobs: int
    n_estimators_rf: int
    max_depth_rf: int
    criterion_rf: str
    max_features_rf: int
    min_samples_split_rf: int
    min_samples_leaf_rf: int
    max_samples_rf: float
    verbose_rf: int
    n_jobs_rf: int

In [6]:
from pathlib import Path

CONFIG_FILE_PATH = Path("E:\projects\Delivery-time-prediction-for-food-devlivery-industry\config\config.yaml")
PARAMS_FILE_PATH = Path("E:\projects\Delivery-time-prediction-for-food-devlivery-industry\params.yaml")
SCHEMA_FILE_PATH = Path("E:\projects\Delivery-time-prediction-for-food-devlivery-industry\schema.yaml")

In [7]:
from Deliveryprediction.constants import *
from Deliveryprediction.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_gbm_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.LightGBM
        params_2 = self.params.Random_Forest
        #schema = self.schema.TARGATE_COLUMN

        create_directories([config.root_dir])

        model_trainer_config_lgm = ModelTrainerConfig(
            root_dir = config.root_dir,
            data_input_dir = config.data_input_dir,
            train_data_path = config.train_data_path,
            test_data_path= config.test_data_path,
            model_name = config.model_name,
            n_estimators = params.n_estimators,
            max_depth = params.max_depth,
            learning_rate = params.learning_rate,
            subsample = params.subsample,
            min_child_weight = params.min_child_weight,
            min_split_gain = params.min_split_gain,
            reg_lambda = params.reg_lambda,
            n_jobs = params.n_jobs,
            criterion_rf = params_2.criterion,
            max_depth_rf = params_2.max_depth,
            n_estimators_rf = params_2.n_estimators,
            max_features_rf = params_2.max_features,
            min_samples_split_rf = params_2.min_samples_split,
            min_samples_leaf_rf = params_2.min_samples_leaf,
            max_samples_rf = params_2.max_samples,
            verbose_rf = params_2.verbose,
            n_jobs_rf = params_2.n_jobs,
            )

        return model_trainer_config_lgm
    
    # def get_model_rf_config(self) -> ModelTrainerConfig:
    #     config = self.config.model_trainer
    #     params = self.params.Random_Forest
    #     #schema = self.schema.TARGATE_COLUMN

    #     #create_directories([config.root_dir])

    #     model_trainer_config_rf = ModelTrainerConfig(
    #         root_dir = config.root_dir,
    #         data_input_dir = config.data_input_dir,
    #         train_data_path = config.train_data_path,
    #         test_data_path= config.test_data_path,
    #         model_name = config.model_name,
    #         n_estimators = params.n_estimators,
    #         criterion = params.criterion,
    #         max_depth = params.max_depth,
    #         max_features = params.max_features,
    #         min_samples_split = params.min_samples_split,
    #         min_samples_leaf = params.min_samples_leaf,
    #         max_samples = params.max_samples,
    #         verbose = params.verbose,
    #         n_jobs = params.n_jobs
    #         )

    #     return model_trainer_config_rf


In [10]:
config = ConfigurationManager()

[2025-02-12 15:26:58,565: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\config\config.yaml loaded successfully]
[2025-02-12 15:26:58,572: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\params.yaml loaded successfully]
[2025-02-12 15:26:58,579: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\schema.yaml loaded successfully]
[2025-02-12 15:26:58,582: INFO: common: created directory at: artifacts]


In [12]:
abc = config.get_model_gbm_config()

[2025-02-12 15:27:56,935: INFO: common: created directory at: artifacts/model_trainer/]


In [None]:
import pandas as pd
import yaml
import joblib
import logging
from pathlib import Path
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from lightgbm import LGBMRegressor


class ModelTrainer:
    TARGET_COLUMN = "time_taken"
    
    def __init__(self, config:ModelTrainerConfig):
        self.config = config
        self.root_path = Path(self.config.data_input_dir)
        self.train_data_path = self.root_path / "train_trans.csv"
        self.test_data_path = self.root_path / "test_trans.csv"
        
        self.save_data_dir = Path(self.config.root_dir)
        self.save_data_dir.mkdir(exist_ok=True, parents=True)

        self.model_save_dir = self.save_data_dir / "models"
        self.model_save_dir.mkdir(exist_ok=True)
        self.training_data = None
        self.model = None
        self.stacking_model = None
        self.transformer = None

    def load_data(self) -> pd.DataFrame:
        try:
            print(self.train_data_path)
            df = pd.read_csv(self.train_data_path)
            logger.info("Training Data read successfully")
            return df
        except FileNotFoundError:
            logger.error("The file to load does not exist")
            return None

    @staticmethod
    def read_params(file_path):
        with open(file_path, "r") as f:
            return yaml.safe_load(f)

    @staticmethod
    def save_model(model, save_dir: Path, model_name: str):
        joblib.dump(value=model, filename=save_dir / model_name)

    @staticmethod
    def save_transformer(transformer, save_dir: Path, transformer_name: str):
        joblib.dump(transformer, save_dir / transformer_name)

    @staticmethod
    def train_model(model, X_train: pd.DataFrame, y_train):
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def make_X_and_y(data: pd.DataFrame, target_column: str):
        X = data.drop(columns=[target_column])
        y = data[target_column]
        return X, y

    def run(self):
        # Load training data
        self.training_data = self.load_data()
        if self.training_data is None:
            return

        X_train, y_train = self.make_X_and_y(self.training_data, self.TARGET_COLUMN)
        logger.info("Dataset splitting completed")

        
        
        rf = RandomForestRegressor(n_estimators = self.config.n_estimators_rf, 
                                    max_depth = self.config.max_depth_rf,
                                    criterion = self.config.criterion_rf,
                                    max_features =self.config.max_features_rf,
                                    min_samples_split = self.config.min_samples_split_rf,
                                    min_samples_leaf = self.config.min_samples_leaf_rf,
                                    max_samples = self.config.max_samples_rf,
                                    verbose= self.config.verbose_rf,
                                    n_jobs = self.config.n_jobs_rf)


        lgbm = LGBMRegressor(n_estimators = self.config.n_estimators,
                                max_depth = self.config.max_depth,
                                learning_rate = self.config.learning_rate,
                                subsample = self.config.subsample,
                                min_child_weight = self.config.min_child_weight,
                                min_split_gain = self.config.min_split_gain,
                                reg_lambda = self.config.reg_lambda,
                                n_jobs = self.config.n_jobs,)

        lr = LinearRegression()
        power_transform = PowerTransformer()

        stacking_reg = StackingRegressor(estimators=[("rf_model", rf),
                                                     ("lgbm_model", lgbm)],
                                         final_estimator=lr, cv=5, n_jobs=-1)

        self.model = TransformedTargetRegressor(regressor=stacking_reg, transformer=power_transform)
        
        # Train model
        self.train_model(self.model, X_train, y_train)
        logger.info("Model training completed")
        
        self.stacking_model = self.model.regressor_
        self.transformer = self.model.transformer_
        
        # Save models
        self.save_model(self.model, self.model_save_dir, "model.joblib")
        self.save_model(self.stacking_model, self.model_save_dir, "stacking_regressor.joblib")
        self.save_transformer(self.transformer, self.model_save_dir, "power_transformer.joblib")
        logger.info("All models and transformers saved successfully")


In [None]:

config = ConfigurationManager()
model_trainer = config.get_model_gbm_config()
model_final = ModelTrainer(model_trainer)
model_final.run()
# model_gbm=ModelTrainer(config.get_model_gbm_config(config=ModelTrainerConfig))
# model_rf=ModelTrainer(config.get_model_rf_config(config=ModelTrainerConfig))


# config = ConfigurationManager()
# data_preparation_config = config.get_data_preparation_config()
# data_preparation = DataPreparation(config=data_preparation_config)
# data=data_preparation.load_data()
# data_preparation.split_data(data)

[2025-02-12 15:55:30,800: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\config\config.yaml loaded successfully]
[2025-02-12 15:55:30,805: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\params.yaml loaded successfully]
[2025-02-12 15:55:30,813: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\schema.yaml loaded successfully]
[2025-02-12 15:55:30,815: INFO: common: created directory at: artifacts]
[2025-02-12 15:55:30,817: INFO: common: created directory at: artifacts/model_trainer/]


artifacts\data_trans\train_trans.csv
[2025-02-12 15:55:33,464: INFO: 4171640766: Training Data read successfully]
[2025-02-12 15:55:33,472: INFO: 4171640766: Dataset splitting completed]
[2025-02-12 15:56:12,414: INFO: 4171640766: Model training completed]
[2025-02-12 15:56:13,785: INFO: 4171640766: All models and transformers saved successfully]


In [33]:

class ModelTrainer:
    TARGET_COLUMN = "time_taken"
    
    def __init__(self, config:ModelTrainerConfig):
        self.config = config
        self.root_path = Path(self.config.config.model_trainer.data_input_dir)
        self.train_data_path = self.root_path / "train_trans.csv"
        self.test_data_path = self.root_path / "test_trans.csv"
    
    def load_data(self) -> pd.DataFrame:
        try:
            print(self.train_data_path)
            df = pd.read_csv(self.train_data_path)
            logger.info("Training Data read successfully")
            return df
        except FileNotFoundError:
            logger.error("The file to load does not exist")
            return None

In [34]:
config = ConfigurationManager()
model=ModelTrainer(config)


[2025-02-12 14:01:50,575: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\config\config.yaml loaded successfully]
[2025-02-12 14:01:50,582: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\params.yaml loaded successfully]
[2025-02-12 14:01:50,590: INFO: common: yaml file: E:\projects\Delivery-time-prediction-for-food-devlivery-industry\schema.yaml loaded successfully]
[2025-02-12 14:01:50,592: INFO: common: created directory at: artifacts]


In [35]:
model.load_data()

artifacts\data_trans\train_trans.csv
[2025-02-12 14:01:53,375: ERROR: 2981318131: The file to load does not exist]


In [36]:

root_path = Path(config.config.model_trainer.data_input_dir)
train_data_path = root_path / "train_trans.csv"

In [37]:
train_data_path 

WindowsPath('artifacts/data_trans/train_trans.csv')

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "e:\projects\Delivery-time-prediction-for-food-devlivery-industry\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_19348\1512572791.py", line 1, in <module>
    df= pd.read_csv("\\artifacts\\data_trans\\train_trans.csv")
  File "e:\projects\Delivery-time-prediction-for-food-devlivery-industry\.venv\lib\site-packages\pandas\io\parsers\readers.py", line 1026, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "e:\projects\Delivery-time-prediction-for-food-devlivery-industry\.venv\lib\site-packages\pandas\io\parsers\readers.py", line 620, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "e:\projects\Delivery-time-prediction-for-food-devlivery-industry\.venv\lib\site-packages\pandas\io\parsers\readers.py", line 1620, in __init__
    self._engine = self._make_engine(f, self.engin

In [14]:
df

Unnamed: 0,age,ratings,pickup_time_minutes,distance,weather_fog,weather_sandstorms,weather_stormy,weather_sunny,weather_windy,type_of_order_drinks,...,city_type_urban,is_weekend_1,order_time_of_day_evening,order_time_of_day_morning,order_time_of_day_night,traffic,distance_type,vehicle_condition,multiple_deliveries,time_taken
0,1.000000,1.00,0.5,0.390817,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,3.0,1.0,1,0.0,17
1,0.947368,0.76,0.5,0.953779,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,3.0,2,0.0,43
2,0.578947,0.80,1.0,0.234434,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2,1.0,28
3,1.000000,0.84,1.0,0.240033,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1,1.0,16
4,1.000000,0.88,1.0,0.466571,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,1.0,1.0,0.0,0.0,1.0,2.0,1,0.0,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28266,0.578947,0.92,0.5,0.451895,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,3.0,2.0,0,0.0,30
28267,0.052632,1.00,1.0,0.612270,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,1.0,2.0,1,1.0,24
28268,0.526316,0.92,0.0,0.322877,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1,0.0,15
28269,0.947368,0.96,0.5,0.004486,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0,1.0,26
