In [1]:
import os 
from pathlib import Path

In [2]:
%pwd

'c:\\code\\ML\\e-commerce\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\code\\ML\\e-commerce'

In [None]:
#config.yaml

In [6]:
#config entity
from dataclasses import dataclass

@dataclass
class FeatureEngineeringconfig:
  data_path : Path 
  output_path : Path
  

In [7]:
#configuration

In [8]:
from e_commerce.utils.common import read_yaml , create_directories 
from e_commerce.constants import *
from e_commerce.entity.config_entity import DataIngestionconfig,PreProcessing

class ConfigurationManager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH,
                 params_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self)->DataIngestionconfig:
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionconfig(root_dir=config.root_dir,
                                                    source_url=config.source_url,
                                                    local_data_file=config.local_data_file)
        return data_ingestion_config
    
    def get_preprocessing_config(self)->PreProcessing:
        config = self.config.pre_processing
        create_directories([config.cleaned_data_save_path])
        create_directories([config.data_report])
        pre_processing_config = PreProcessing(data_path=config.data_path,cleaned_data_save_path=config.cleaned_data_save_path,data_report=config.data_report
                                              )
        return pre_processing_config
    

    def get_feature_engg_config(self)->FeatureEngineeringconfig:
        config = self.config.feature_engg
        create_directories([config.output_path])
        feature_engg_config = FeatureEngineeringconfig(data_path=config.data_path,output_path=config.output_path)
        return feature_engg_config 

In [9]:
#component

In [15]:
import pandas as pd
import datetime
class FeatureEnggComponent:
    def __init__(self,config:FeatureEngineeringconfig):
        self.config = config
    
    def load_data(self):
        if not os.path.exists(self.config.data_path):
            raise FileNotFoundError(f"Input file not found: {self.config.data_path}")
        return pd.read_csv(self.config.data_path)
        

    def feature_engg(self):
        df = self.load_data()
        
        # recency
        df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
        ref_date = df['InvoiceDate'].max()+datetime.timedelta(days=1)
        df['recency'] = (ref_date - df['InvoiceDate']).dt.days
        recency_df = df.groupby(['CustomerID'])['recency'].min().reset_index()

        # monetary
        df['monetary'] = df['Quantity']*df['UnitPrice']
        monetary_df = df.groupby(['CustomerID'])['monetary'].sum().reset_index()
        monetary_df.columns = ['CustomerID','monetary']

        # frequency
        frequency_df = df.groupby(['CustomerID'])['InvoiceDate'].nunique().reset_index()
        frequency_df.columns = ['CustomerID','frequency']
        

        # rmf_data
        rmf_df = recency_df.merge(frequency_df,on='CustomerID').merge(monetary_df,on='CustomerID')

        rmf_df.to_csv(os.path.join(self.config.output_path,'rmf.csv'))

In [11]:
#pipeline


In [16]:
try : 
    config = ConfigurationManager()
    feature_engg_config = config.get_feature_engg_config()
    feature_engg_comp = FeatureEnggComponent(config=feature_engg_config)
    feature_engg_comp.feature_engg()
except Exception as e:
    raise e

[2025-05-28 15:46:38,148: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-28 15:46:38,158: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-28 15:46:38,166: INFO: common: created directory at: artifacts]
[2025-05-28 15:46:38,168: INFO: common: created directory at: artifacts/feature_engg]
