In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMachineLearningProjectAutoMobilePrice\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMachineLearningProjectAutoMobilePrice'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransfornmationConfig:
    root_dir: Path
    data_path: Path

In [6]:
from src.AutoMobilePriceRegression.constants import *
from src.AutoMobilePriceRegression.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH) -> None:
        
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_data_transformation_config(self) -> DataTransfornmationConfig:
        config=self.config.data_transformation
    
        create_directories([config.root_dir])
        
        data_transformation_config  = DataTransfornmationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
           
        )

        return data_transformation_config 

In [8]:
import sys
import numpy as np 
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder,StandardScaler,OneHotEncoder
from AutoMobilePriceRegression import logger
from AutoMobilePriceRegression.utils.common import get_size
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder


In [9]:
class DataTransfornmation:
    def __init__(self,config:DataTransfornmationConfig):
        self.config= config


    def data_LabelEncoder(self):
        df=pd.read_csv(self.config.data_path)
        for col in df:
            if col in list(df.select_dtypes(include="object").columns):
                df[col]=LabelEncoder().fit_transform(df[col])
        return df    
                

    def train_test_splitting(self):

        df=self.data_LabelEncoder()

        train_set,test_set=train_test_split(df,test_size=0.2,random_state=42)

        train_set.to_csv(os.path.join(self.config.root_dir,"train.csv"),index=False)
        test_set.to_csv(os.path.join(self.config.root_dir,"test.csv"),index=False)

        logger.info("Data Splitting is completed") 
        logger.info(train_set.shape) 
        logger.info(test_set.shape) 

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransfornmation(data_transformation_config)
    data_transformation.train_test_splitting()
except Exception as e:
    raise e

[2024-09-07 00:17:17,429: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-07 00:17:17,432: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-07 00:17:17,436: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-09-07 00:17:17,438: INFO: common: created directory at: artifacts]
[2024-09-07 00:17:17,440: INFO: common: created directory at: artifacts/data_transformation]
[2024-09-07 00:17:17,460: INFO: 3694206378: Data Splitting is completed]
[2024-09-07 00:17:17,461: INFO: 3694206378: (164, 15)]
[2024-09-07 00:17:17,462: INFO: 3694206378: (41, 15)]
