## Data Transformation trial-runs 

In [1]:
import os

In [2]:
os.chdir(r"C:\Users\USER\Desktop\MLDefaults\Heart-Attack-Prediction-Model")

## Trial-runs for entity_config file

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    selected_data_file: Path
    processed_data_file: Path

## Trial-runs for configuration file

In [4]:
#importing all project paths and modules necessary for project configurations 
from heartAttack.constants import  *
from heartAttack.utils.common import read_yaml, create_directories

In [None]:
#Updating the configuration file 
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH, 
        params_filepath = PARAMS_FILE_PATH,
        selected_schema_filepath = SELECTED_SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.selected_schema = read_yaml(selected_schema_filepath)
       
        create_directories([self.config.artifacts_root])
    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        data_processing_config = self.config.data_processing 
        
        create_directories([config.root_dir])
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            processed_data_file=Path(config.processed_data_file),
            selected_data_file=data_processing_config.selected_data_file,
        )
        return data_transformation_config

## Trial-runs for data_transformation_pipeline

In [5]:
import os 
import pandas as pd 
import numpy as np
from pathlib import Path 
from heartAttack import logger

In [6]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.df = pd.read_csv(self.config.selected_data_file)
        self.processed_df = None
    

    def process_and_store_data(self) -> None:
        """
        Execute full processing pipeline and store results
        Args:
            df: Raw input DataFrame
        """
        df=self.df.copy() #work on a copy of dataframe
        df = self.fill_missing_with_mode(df)  # Step 2: Handle missing values: Example to all call classes tranforming data here
        self.processed_df = df # Store processed data
        
        # Save to artifacts
        os.makedirs(self.config.root_dir, exist_ok=True)
        save_path = Path(self.config.root_dir) / "processed_df.csv"
        df.to_csv(save_path, index=False)
        logger.info(f"Processed data stored at: {save_path}")

## Trial-runs for data_transformation_pipeline

In [None]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.process_and_store_data()
except Exception as e:
    raise e