In [1]:
import os
%pwd


'c:\\Users\\DELL\\Desktop\\MLflow-deployment\\research'

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\DELL\\Desktop\\MLflow-deployment'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [5]:
from src.mlproject.constants import *
from src.mlproject.utils.common import read_yaml, create_directories

In [6]:
# configuration
class ConfigurationManager:
    def __init__(
        self,
        config_file_path: Path = CONFIG_FILE_PATH,
        params_file_path: Path = PARAMS_FILE_PATH,
        schema_file_path: Path = SCHEMA_FILE_PATH,
    ) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)
        create_directories([Path(self.config["artifacts_root"])])
        
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config["data_transformation"]
        create_directories([Path(config["root_dir"])])
        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config["root_dir"]),
            data_path=Path(config["data_path"]),
        )
        return data_transformation_config

    
    


In [7]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from src.mlproject import logger

In [8]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def initiate_data_transformation(self):
        logger.info("Initiating data transformation...")
        self.transform_data()

    def transform_data(self):
        df = pd.read_csv(self.config.data_path)
        logger.info(f"Original Data Shape: {df.shape}")

        # Example transformation: train-test split
        train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
        logger.info(f"Train Data Shape: {train_df.shape}")
        logger.info(f"Test Data Shape: {test_df.shape}")

        # Save transformed data
        train_df.to_csv(self.config.root_dir / "train.csv", index=False)
        test_df.to_csv(self.config.root_dir / "test.csv", index=False)
        logger.info("Data transformation completed.")


In [9]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.transform_data()
except Exception as e:
    raise e

2025-10-04 22:13:10,640 - INFO - yaml file: config\config.yaml loaded successfully
2025-10-04 22:13:10,644 - INFO - yaml file: params.yaml loaded successfully
2025-10-04 22:13:10,648 - INFO - yaml file: schema.yaml loaded successfully
2025-10-04 22:13:10,651 - INFO - Directory created at: artifacts
2025-10-04 22:13:10,655 - INFO - Directory created at: artifacts\data_transformation
2025-10-04 22:13:10,703 - INFO - Original Data Shape: (1599, 12)
2025-10-04 22:13:10,721 - INFO - Train Data Shape: (1279, 12)
2025-10-04 22:13:10,724 - INFO - Test Data Shape: (320, 12)
2025-10-04 22:13:10,760 - INFO - Data transformation completed.
