## Data Transformation trial-runs 

In [17]:
import os

In [18]:
os.chdir(r"C:\Users\Junior\OneDrive\Desktop\Heart-Attack-Prediction-Model")

## Trial-runs for entity_config file

In [19]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    selected_data_file: Path
    processed_data_file: Path

## Trial-runs for configuration file

In [20]:
#importing all project paths and modules necessary for project configurations 
from heartAttack.constants import  *
from heartAttack.utils.common import read_yaml, create_directories

In [21]:
#Updating the configuration file 
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH, 
        params_filepath = PARAMS_FILE_PATH,
        selected_schema_filepath = SELECTED_SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.selected_schema = read_yaml(selected_schema_filepath)
       
        create_directories([self.config.artifacts_root])
    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        data_processing_config = self.config.data_processing 
        
        create_directories([config.root_dir])
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            processed_data_file=Path(config.processed_data_file),
            selected_data_file=data_processing_config.selected_data_file,
        )
        return data_transformation_config

## Trial-runs for data_transformation_pipeline

In [22]:
import os 
import pandas as pd 
import numpy as np
from pathlib import Path 
from heartAttack import logger

In [23]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.df = pd.read_csv(self.config.selected_data_file)
        self.processed_df = None
    

    def process_and_store_data(self) -> None:
        """
        Execute full processing pipeline and store results
        Args:
            df: Raw input DataFrame
        """
        df=self.df.copy() #work on a copy of dataframe
        df = self.fill_missing_with_mode(df)  # Step 2: Handle missing values: Example to all call classes tranforming data here
        self.processed_df = df # Store processed data
        
        # Save to artifacts
        os.makedirs(self.config.root_dir, exist_ok=True)
        save_path = Path(self.config.root_dir) / "processed_df.csv"
        df.to_csv(save_path, index=False)
        logger.info(f"Processed data stored at: {save_path}")
    
    def fill_missing_with_mode(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Fill missing values in each column with the column's mode.
        """
        for col in df.columns:
            if df[col].isnull().any():
                try:
                    mode_val = df[col].mode()[0]
                    df[col].fillna(mode_val, inplace=True)
                except Exception as e:
                    logger.warning(f"Could not fill missing values in column '{col}': {e}")
        return df

## Trial-runs for data_transformation_pipeline

In [24]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.process_and_store_data()
except Exception as e:
    raise e

[2025-07-29 22:07:14,861: INFO: common: yaml_file: config\config.yaml loaded successfully]
[2025-07-29 22:07:14,865: INFO: common: yaml_file: params.yaml loaded successfully]
[2025-07-29 22:07:14,872: INFO: common: yaml_file: selected_schema.yaml loaded successfully]
[2025-07-29 22:07:14,873: INFO: common: Created directory at: artifacts]
[2025-07-29 22:07:14,875: INFO: common: Created directory at: artifacts/data_transformation]
[2025-07-29 22:07:14,894: INFO: 374889826: Processed data stored at: artifacts\data_transformation\processed_df.csv]
