In [2]:
import os

In [3]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMachineLearningProjectAutoMobilePrice\\research'

In [4]:
os.chdir("../")

In [32]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMachineLearningProjectAutoMobilePrice'

In [33]:
from dataclasses import dataclass
from pathlib import Path
import pickle


@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    base_model_path:Path
    #updated_base_model_path:Path
    source_URL: Path
    local_data_file_path:Path
    train_data_path: Path
    test_data_path: Path

In [34]:
from src.AutoMobilePriceRegression.constants import *
from src.AutoMobilePriceRegression.utils.common import read_yaml, create_directories

In [35]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([config.root_dir])

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            #updated_base_model_path=Path(config.updated_base_model_path),
            source_URL=config.source_URL,
            local_data_file_path=config.local_data_file_path,
            train_data_path= config.train_data_path,
            test_data_path=config.test_data_path        
            
        )

        return prepare_base_model_config

In [36]:
import sys
import numpy as np 
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder,StandardScaler,OneHotEncoder
from src.AutoMobilePriceRegression import logger
from src.AutoMobilePriceRegression.utils.common import get_size
#from src.AutoMobilePriceRegression.components.data_ingestion import DataIngestion
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder

In [37]:
class PrepareBaseModel:
    def __init__(self,config:PrepareBaseModelConfig):
        self.config= config

    def download_file(self):
        if not os.path.exists(self.config.local_data_file_path):
            df=pd.read_csv(self.config.source_URL)
            df.to_csv(self.config.local_data_file_path,index=False,header=True)
            CatEncod=LabelEncoder()
            categorical_columns = ['num_of_doors', 'body_style', 'drive_wheels',
                                   'engine_location','num_of_cylinders', 'fuel_system']

            for col in df:
                 if col in categorical_columns:
                      df[col]=CatEncod.fit_transform(df[col])
                              
                      
            train_set,test_set=train_test_split(df,test_size=0.2,random_state=42)

            if not os.path.exists(self.config.train_data_path):
                        train_set.to_csv(self.config.train_data_path,index=False,header=True)            
                
            if not os.path.exists(self.config.test_data_path):
                        test_set.to_csv(self.config.test_data_path,index=False,header=True) 

            import pickle
            with open ("./artifacts/model_CatEncod.pkl","wb") as f:
                pickle.dump(CatEncod,f)             

            return(
                        self.config.train_data_path,
                        self.config.test_data_path
                    )

    def initiate_data_transformation(self): 
        train_df=pd.read_csv(r"./artifacts/prepare_base_model/train_data.csv")
        test_df=pd.read_csv(r"./artifacts/prepare_base_model/test_data.csv")

        logger.info("Read train and test data completed")

        sc=StandardScaler()     

        target_column_name="price"
        
        input_feature_train_df=train_df.drop(target_column_name,axis=1)
        target_feature_train_df=train_df[target_column_name]

        input_feature_test_df=test_df.drop(target_column_name,axis=1)
        target_feature_test_df=test_df[target_column_name]     

        input_feature_train_arr=sc.fit_transform(input_feature_train_df)
        input_feature_test_arr=sc.transform(input_feature_test_df)

        train_arr = np.c_[
                input_feature_train_arr, np.array(target_feature_train_df)
                ]
        test_arr = np.c_[
                 input_feature_test_arr, np.array(target_feature_test_df)
                 ]
        
        if not os.path.exists(self.config.train_data_path):
                train_arr = pd.DataFrame(train_arr)#columns=[colnames])
                train_arr.to_csv(self.config.train_data_path,index=False)
        if not os.path.exists(self.config.test_data_path):
                test_arr = pd.DataFrame(test_arr)#,columns=[colnames])
                test_arr.to_csv(self.config.test_data_path,index=False)        

        return(
                self.config.train_data_path,
                self.config.test_data_path
         
        )      

In [38]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(prepare_base_model_config)
    prepare_base_model.download_file()
    prepare_base_model.initiate_data_transformation()
except Exception as e:
    raise e

[2024-07-14 15:43:11,997: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-14 15:43:12,000: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-14 15:43:12,002: INFO: common: created directory at: artifacts]
[2024-07-14 15:43:12,005: INFO: common: created directory at: artifacts/prepare_base_model]
[2024-07-14 15:43:12,049: INFO: 636616641: Read train and test data completed]
