In [2]:
import os

In [1]:
%pwd

'c:\\code\\ML\\e-commerce\\research'

In [3]:

os.chdir("../")

In [4]:
%pwd

'c:\\code\\ML\\e-commerce'

In [5]:
#config yaml

In [6]:
#config entity

In [7]:
#configuration

In [8]:
from e_commerce.utils.common import read_yaml , create_directories 
from e_commerce.constants import *
from e_commerce.entity.config_entity import DataIngestionconfig,PreProcessing,FeatureEngineeringconfig,EDA,outlier

class ConfigurationManager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH,
                 params_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self)->DataIngestionconfig:
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionconfig(root_dir=config.root_dir,
                                                    source_url=config.source_url,
                                                    local_data_file=config.local_data_file)
        return data_ingestion_config
    
    def get_preprocessing_config(self)->PreProcessing:
        config = self.config.pre_processing
        create_directories([config.cleaned_data_save_path])
        create_directories([config.data_report])
        pre_processing_config = PreProcessing(data_path=config.data_path,cleaned_data_save_path=config.cleaned_data_save_path,data_report=config.data_report
                                              )
        return pre_processing_config
    
    def get_feature_engg_config(self)->FeatureEngineeringconfig:
        config = self.config.feature_engg
        create_directories([config.output_path])
        feature_engg_config = FeatureEngineeringconfig(data_path=config.data_path,output_path=config.output_path)
        return feature_engg_config 
    
    def get_eda_config(self)->EDA:
        config = self.config.EDA
        create_directories([config.report])
        eda_config = EDA(data_path=config.data_path,report=config.report)

        return eda_config
    
    def outlier_config(self)->outlier:
        config = self.config.outlier
        create_directories([config.output_path])
        create_directories([config.report])
        outlier_config = outlier(data_path=config.data_path,output_path=config.output_path,report=config.report)
        return outlier_config
    

In [9]:
#component

In [14]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
class OutlierComponent:
    def __init__(self,config:outlier):
        self.config = config
    
    def load_data(self):
        if not os.path.exists(self.config.data_path):
            raise FileNotFoundError(f"Input file not found: {self.config.data_path}")
        return pd.read_csv(self.config.data_path)

    def outlier_removal(self):
        df = self.load_data()
        rmf = df.drop(["CustomerID"],axis=1)
        for col in rmf.columns:
            q1 = rmf[col].quantile(0.25)
            q3 = rmf[col].quantile(0.75)
            IQR = q3-q1
            lower = q1-(1.5*IQR)
            upper = q3+(1.5*IQR)
            rmf = rmf[(rmf[col] >= lower) & (rmf[col] <= upper)]
        rmf.to_csv(os.path.join(self.config.output_path,"rmf_final.csv"),index=False)

        # lets do some eda with this data 
        # EDA - Histogram
        plt.figure(figsize=(12, 10))
        rmf.hist(bins=10, figsize=(12, 6))
        plt.suptitle('Distributions of Recency, Frequency, and Monetary', fontsize=16)
        plt.savefig(os.path.join(self.config.report, "histograms.png"))
        plt.clf()

        # EDA - Boxplots
        plt.figure(figsize=(12, 5))
        for i, col in enumerate(rmf.columns):
            plt.subplot(1, len(rmf.columns), i+1)
            sns.boxplot(y=rmf[col])
            plt.title(f'Boxplot of {col}')
        plt.tight_layout()
        plt.savefig(os.path.join(self.config.report, "boxplots.png"))
        plt.clf()

        # EDA - Correlation Heatmap
        plt.figure(figsize=(6, 4))
        sns.heatmap(rmf.corr(), annot=True, cmap='coolwarm')
        plt.title('RFM Feature Correlation')
        plt.savefig(os.path.join(self.config.report, "correlation.png"))
        plt.clf()

        # EDA - Pairplot
        sns.pairplot(rmf)
        plt.suptitle('Pairplot of RFM Features', y=1.02)
        plt.savefig(os.path.join(self.config.report, "pairplot.png"))
        plt.clf()

In [12]:
#pipeline

In [15]:
try :
    config = ConfigurationManager()
    outlier_Config = config.outlier_config()
    outlier_comp = OutlierComponent(config=outlier_Config)
    outlier_comp.outlier_removal()

except Exception as e:
    raise e

[2025-05-30 20:36:57,480: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-30 20:36:57,484: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-30 20:36:57,486: INFO: common: created directory at: artifacts]
[2025-05-30 20:36:57,491: INFO: common: created directory at: artifacts/outlier]
[2025-05-30 20:36:57,493: INFO: common: created directory at: artifacts/outlier/report]


<Figure size 1200x1000 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x500 with 0 Axes>

<Figure size 600x400 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>