In [128]:
import os

In [129]:
%pwd

'c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction'

In [130]:
os.chdir('c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction')

In [131]:
@dataclass
class DataTransformationConfig:
    root_dir: str
    train_data_path: str
    test_data_path: str
    preprocessor_obj_file_path: str


In [132]:
from src.gemstonePricePrediction.constants import *
from src.gemstonePricePrediction.utils.common import read_yaml, create_directories


In [133]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            preprocessor_obj_file_path=config.preprocessor_obj_file_path
       )

        return data_transformation_config


In [134]:
import pandas as pd
from src.gemstonePricePrediction.logger import logging
from src.gemstonePricePrediction.exception import CustomException
import sys

In [135]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config


    # Creating preprocessing object
    def get_data_transformation_object(self):

        try:
            categorical_cols = ['cut', 'color', 'clarity']
            numerical_cols = ['carat', 'depth', 'table', 'x', 'y', 'z']

            cut_categories = ['Fair', 'Good', 'Very Good', 'Premium', 'Ideal']
            color_categories = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
            clarity_categories = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']

            num_pipeline = Pipeline(
                steps=[
                    ('imputer', SimpleImputer(strategy='median')),
                    ('scaler', StandardScaler())
                ]
            )

            cat_pipeline = Pipeline(
                steps=[
                    ('imputer', SimpleImputer(strategy='most_frequent')),
                    ('ordinal_encoder', OrdinalEncoder(
                        categories=[cut_categories, color_categories, clarity_categories])),
                    ('scaler', StandardScaler())
                ]
            )

            logging.info(f"Categorical Columns : {categorical_cols}")
            logging.info(f"Numerical Columns   : {numerical_cols}")

            preprocessor = ColumnTransformer(
                [
                    ('num_pipeline', num_pipeline, numerical_cols),
                    ('cat_pipeline', cat_pipeline, categorical_cols)
                ]
            )

            return preprocessor

        except Exception as e:
            raise CustomException(e, sys)

    # Full Transformation Process
    def initiate_data_transformation(self):

        try:
            logging.info("Starting data transformation process")

            train_path = self.config.train_data_path
            test_path = self.config.test_data_path

            train_df = pd.read_csv(os.path.abspath(train_path))
            test_df = pd.read_csv(os.path.abspath(test_path))

            logging.info(f"Train dataset shape: {train_df.shape}")
            logging.info(f"Test dataset shape: {test_df.shape}")

            preprocessing_obj = self.get_data_transformation_object()

            target_column_name = "price"
            drop_columns = [target_column_name, "id"]

            input_feature_train_df = train_df.drop(columns=drop_columns, axis=1)
            target_feature_train_df = train_df[target_column_name]

            input_feature_test_df = test_df.drop(columns=drop_columns, axis=1)
            target_feature_test_df = test_df[target_column_name]

            logging.info("Applying preprocessing on train and test datasets")

            input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
            input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)

            train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)]
            test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]

            os.makedirs(
                os.path.dirname(self.config.preprocessor_obj_file_path),
                exist_ok=True
            )

            save_object(
                file_path=self.config.preprocessor_obj_file_path,
                obj=preprocessing_obj
            )

            logging.info("Preprocessor saved successfully")

            return (
                train_arr,
                test_arr,
                self.config.preprocessor_obj_file_path
            )

        except Exception as e:
            logging.error("Error in initiate_data_transformation")
            raise CustomException(e, sys)


In [136]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()

    data_transformation = DataTransformation(config=data_transformation_config)

    data_transformation.initiate_data_transformation()

except Exception as e:
    raise e


[2026-02-11 01:37:43,562: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\config\config.yaml]
[2026-02-11 01:37:43,567: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\params.yaml]
[2026-02-11 01:37:43,570: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\schema.yaml]
[2026-02-11 01:37:43,575: INFO: common: created directory at: artifacts]
[2026-02-11 01:37:43,579: INFO: common: created directory at: artifacts/data_transformation]
[2026-02-11 01:37:43,580: INFO: 3089024718: Starting data transformation process]
[2026-02-11 01:37:43,586: ERROR: 3089024718: Error in initiate_data_transformation]


CustomException: Error occurred in script: C:\Users\quamr\AppData\Local\Temp\ipykernel_25272\3089024718.py at line number: 57 with message: [Errno 2] No such file or directory: 'c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction\\artifacts\\train.csv'