In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectDemandSales\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectDemandSales'

In [5]:
from dataclasses import dataclass
from pathlib import Path
import pickle


@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    base_model_path:Path
    #updated_base_model_path:Path
    train_data_path: Path
    test_data_path: Path

In [6]:
from DemandSalesRegression.constants import *
from DemandSalesRegression.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([config.root_dir])

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            #updated_base_model_path=Path(config.updated_base_model_path),
            train_data_path= config.train_data_path,
            test_data_path=config.test_data_path        
            
        )

        return prepare_base_model_config

In [8]:
import sys
import numpy as np 
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder,StandardScaler,OneHotEncoder
from sklearn.ensemble import GradientBoostingRegressor

from DemandSalesRegression import logger
from DemandSalesRegression.utils.common import get_size
import os

In [15]:
class PrepareBaseModel:
    def __init__(self,config:PrepareBaseModelConfig):
        self.config= config
    

    def get_data_transformer_object(self):
        numerical_columns = ['quantity', 'price_per_case', 'total_sales']
        categorical_columns =  ['company_region', 'product','unit']

        num_pipeline= Pipeline(steps=[
            ("scaler",StandardScaler())
            ])
        
        cat_pipeline=Pipeline(steps=[
            ("OneHotEncoder",OneHotEncoder(handle_unknown="ignore")),
            ("scaler",StandardScaler(with_mean=False))
            ])

        preprocessor=ColumnTransformer([
            ("num_pipeline",num_pipeline,numerical_columns),
            ("cat_pipelines",cat_pipeline,categorical_columns)
            ])  
        
        return preprocessor
    
    def initiate_data_transformation(self): 
        train_df=pd.read_csv(r"./artifacts/data_ingestion/train_data.csv")
        test_df=pd.read_csv(r"./artifacts/data_ingestion/test_data.csv")

        logger.info("Read train and test data completed")

        preprocessing_obj=self.get_data_transformer_object()        

        target_column_name='total_sales'
        input_column_name=['quantity', 'price_per_case', 'total_sales','company_region', 'product','unit']       

        input_feature_train_df=train_df[input_column_name]
        target_feature_train_df=train_df[target_column_name]

        input_feature_test_df=test_df[input_column_name]
        target_feature_test_df=test_df[target_column_name]     

        input_feature_train_arr=preprocessing_obj.fit_transform(input_feature_train_df)
        input_feature_test_arr=preprocessing_obj.transform(input_feature_test_df)

        train_arr = np.c_[
                input_feature_train_arr, np.array(target_feature_train_df)
                ]
        test_arr = np.c_[
                 input_feature_test_arr, np.array(target_feature_test_df)
                 ]
        
        if not os.path.exists(self.config.train_data_path):
                train_arr = pd.DataFrame(train_arr)#columns=[colnames])
                train_arr.to_csv(self.config.train_data_path,index=False)
        if not os.path.exists(self.config.test_data_path):
                test_arr = pd.DataFrame(test_arr)#,columns=[colnames])
                test_arr.to_csv(self.config.test_data_path,index=False)        

        return(
                self.config.train_data_path,
                self.config.test_data_path
         
        )  

In [16]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(prepare_base_model_config)
    prepare_base_model.get_data_transformer_object()
    prepare_base_model.initiate_data_transformation()
except Exception as e:
    raise e

[2024-07-06 21:21:11,620: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-06 21:21:11,622: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-06 21:21:11,624: INFO: common: created directory at: artifacts]
[2024-07-06 21:21:11,627: INFO: common: created directory at: artifacts/prepare_base_model]
[2024-07-06 21:21:11,667: INFO: 3280885768: Read train and test data completed]
