In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\code\\ML\\e-commerce'

In [4]:
#config.yaml
#config entity

In [5]:
#configuration

In [6]:
from e_commerce.utils.common import read_yaml , create_directories 
from e_commerce.constants import *
from e_commerce.entity.config_entity import DataIngestionconfig,PreProcessing,FeatureEngineeringconfig,EDA,outlier,cluster,model

class ConfigurationManager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH,
                 params_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self)->DataIngestionconfig:
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionconfig(root_dir=config.root_dir,
                                                    source_url=config.source_url,
                                                    local_data_file=config.local_data_file)
        return data_ingestion_config
    
    def get_preprocessing_config(self)->PreProcessing:
        config = self.config.pre_processing
        create_directories([config.cleaned_data_save_path])
        create_directories([config.data_report])
        pre_processing_config = PreProcessing(data_path=config.data_path,cleaned_data_save_path=config.cleaned_data_save_path,data_report=config.data_report
                                              )
        return pre_processing_config
    
    def get_feature_engg_config(self)->FeatureEngineeringconfig:
        config = self.config.feature_engg
        create_directories([config.output_path])
        feature_engg_config = FeatureEngineeringconfig(data_path=config.data_path,output_path=config.output_path)
        return feature_engg_config 
    
    def get_eda_config(self)->EDA:
        config = self.config.EDA
        create_directories([config.report])
        eda_config = EDA(data_path=config.data_path,report=config.report)

        return eda_config
    
    def outlier_config(self)->outlier:
        config = self.config.outlier
        create_directories([config.output_path])
        create_directories([config.report])
        outlier_config = outlier(data_path=config.data_path,output_path=config.output_path,report=config.report)
        return outlier_config
    def cluster_config(self)->cluster:
        config = self.config.cluster
        params = self.params.cluster
        create_directories([config.cluster])
        create_directories([config.report])
        cluster_config = cluster(data_path=config.data_path,cluster=config.cluster,report=config.report,random_state= params.random_state)
        return cluster_config
    
    def get_model_config(self)->model:
        config = self.config.model
        params = self.params.model
        create_directories([config.models])
        create_directories([config.report])
        model_config = model(data_path=config.data_path,kl_path=config.kl_path,models=config.models,report=config.report,random_state=params.random_state)
        return model_config

In [7]:
#component

In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from sklearn.cluster import KMeans
from kneed import KneeLocator


class ModelBuildingComponent:
    def __init__(self,config:model):
        self.config = config
    
    def load_data(self):
        if not os.path.exists(self.config.data_path):
            raise FileNotFoundError(f"file not found at {self.config.data_path}")
        return pd.read_csv(self.config.data_path)
    

    def build_model(self):
        rmf_scaled = self.load_data()
        # model building
        kl = joblib.load(self.config.kl_path)

        km_model = KMeans(n_clusters=kl.elbow,random_state=self.config.random_state)
        joblib.dump(km_model,os.path.join(self.config.models,'km_model.pkl'))

        y_predicted = km_model.fit_predict(rmf_scaled)

        rmf_scaled['cluster'] = y_predicted

        final_op_rmf = rmf_scaled

        

        # segmentation graphs

        

        plt.figure(figsize=(10,6))
        sns.scatterplot(
            x=final_op_rmf['recency'], 
            y=final_op_rmf['monetary'], 
            hue=final_op_rmf['cluster'], 
            palette='viridis',
            alpha=0.7
        )
        plt.title('Customer Segments by Recency and Monetary Value')
        plt.xlabel('Recency (days)')
        plt.ylabel('Monetary Value')
        plt.legend(title='Cluster')
        plt.savefig(os.path.join(self.config.report,'segmentaion.png'))
        plt.clf()

        # pairplots
        sns.pairplot(final_op_rmf, vars=['recency', 'frequency', 'monetary'], hue='cluster', palette='viridis')
        plt.savefig(os.path.join(self.config.report,'pairplot.png'))
        plt.clf()

        # Compute the average RFM values for each cluster
        cluster_profile = final_op_rmf.groupby('cluster')[['recency', 'frequency', 'monetary']].mean().round(1)
        with open("cluster_profile_report.txt", "w") as f:
            f.write("Cluster Profile Report (Mean RFM Values):\n\n")
            f.write(cluster_profile.to_string())

        segment_map = {
            0: 'Loyal Customers',
            1: 'Churned Customers',
            2: 'Potential Loyalists'
        }

        final_op_rmf['segment'] = final_op_rmf['cluster'].map(segment_map)
        final_op_rmf.to_csv(os.path.join(self.config.models, "segmentation_output.csv"), index=False)


        # cluster with segmentation
        plt.figure(figsize=(10, 6))
        sns.scatterplot(
            data=final_op_rmf, 
            x='recency', 
            y='monetary', 
            hue='segment', 
            palette='Set1', 
            s=100
        )

        plt.title("Customer Segments based on Recency and Monetary")
        plt.xlabel("Recency (days since last purchase)")
        plt.ylabel("Monetary (total spending)")
        plt.legend(title='Segment')
        plt.grid(True)
        plt.savefig(os.path.join(self.config.report,'segmentation_Cluster.png'))
        plt.clf()
        
        

In [12]:
#pipeline

In [13]:
try :
    config = ConfigurationManager()
    model_config = config.get_model_config()
    model_build_comp = ModelBuildingComponent(config=model_config)
    model_build_comp.build_model()

except Exception as e:
    raise e

[2025-06-01 17:05:37,471: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-01 17:05:37,476: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-01 17:05:37,478: INFO: common: created directory at: artifacts]
[2025-06-01 17:05:37,481: INFO: common: created directory at: artifacts/models]
[2025-06-01 17:05:37,485: INFO: common: created directory at: artifacts/models/report]


<Figure size 1000x600 with 0 Axes>

<Figure size 808.875x750 with 0 Axes>

<Figure size 1000x600 with 0 Axes>