In [5]:
import pandas as pd  # For working with tabular data
from collections import Counter  # For counting scheme occurrences
import ast  # For safely converting string representations of Python objects
import logging  # For debugging and tracking execution

In [6]:
# ------------------------------
# Configure Logging
# ------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("scheme_mapping_debug.log"),
        logging.StreamHandler()
    ]
)


In [7]:
def load_data(file_path):
    """
    Load CSV data into a DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        logging.info(f"Data loaded from {file_path} with shape: {df.shape}")
        return df
    except Exception as e:
        logging.critical("Failed to load data.", exc_info=True)
        raise


In [8]:
def get_existing_product_columns(df, candidate_columns):
    """
    Filter only those product columns that actually exist in the dataset.
    """
    existing_columns = [col for col in candidate_columns if col in df.columns]
    logging.info(f"Found {len(existing_columns)} valid product columns.")
    return existing_columns


In [9]:
def process_scheme_recommendations(df, product_columns):
    """
    For each product, compute top 3 effective schemes based on sales quantity.
    """
    product_scheme_data = []

    for product in product_columns:
        logging.info(f"Processing product: {product}")

        # Filter stockists who sold this product
        product_df = df[df[product] == 1]

        if product_df.empty:
            logging.warning(f"No sales data found for product: {product}")
            continue

        # Extract all partners who sold the product
        partner_ids = product_df['Partner_id'].dropna().astype(str).unique()

        # Extract scheme info and sales quantity
        scheme_data = product_df[['Scheme_Type', 'Sales_Quantity_Last_Period']].dropna()

        scheme_growth = Counter()

        # Count the weighted impact of each scheme
        for _, row in scheme_data.iterrows():
            schemes = row['Scheme_Type'].split(', ') if isinstance(row['Scheme_Type'], str) else []
            for scheme in schemes:
                try:
                    scheme_growth[scheme] += float(row['Sales_Quantity_Last_Period'])
                except Exception as e:
                    logging.debug(f"Invalid sales quantity found: {row['Sales_Quantity_Last_Period']}")

        # Get top 3 schemes
        top_schemes = [s[0] for s in scheme_growth.most_common(3)]

        while len(top_schemes) < 3:
            top_schemes.append("No Scheme Available")

        product_scheme_data.append({
            'Product_id': product,
            'Partner_id': ', '.join(partner_ids),
            'Scheme_1': top_schemes[0],
            'Scheme_2': top_schemes[1],
            'Scheme_3': top_schemes[2]
        })

        logging.debug(f"Top schemes for {product}: {top_schemes}")

    return pd.DataFrame(product_scheme_data)

In [10]:
def save_scheme_mapping(final_df, output_file="Optimized_Product_Partner_Scheme_Mapping.csv"):
    """
    Save the recommended scheme mapping to a CSV file.
    """
    try:
        final_df.to_csv(output_file, index=False)
        logging.info(f"Final scheme mapping saved to {output_file}")
    except Exception as e:
        logging.critical("Failed to save the scheme mapping.", exc_info=True)
        raise


In [11]:
def run_scheme_mapping_pipeline(file_path):
    """
    Main reusable pipeline for scheme recommendation.
    """
    logging.info("Starting scheme mapping pipeline...")

    # Candidate product columns
    product_columns = [
        'AIS(Air Insulated Switchgear)', 'RMU(Ring Main Unit)', 'PSS(Compact Sub-Stations)',
        'VCU(Vacuum Contactor Units)', 'E-House', 'VCB(Vacuum Circuit Breaker)',
        'ACB(Air Circuit Breaker)', 'MCCB(Moduled Case Circuit Breaker)',
        'SDF(Switch Disconnectors)', 'BBT(Busbar Trunking)', 'Modular Switches',
        'Starter', 'Controller', 'Solar Solutions', 'Pump Starter and Controller'
    ]

    # Load and process
    df = load_data(file_path)
    existing_columns = get_existing_product_columns(df, product_columns)
    final_df = process_scheme_recommendations(df, existing_columns)

    logging.info("Sample scheme recommendations:")
    logging.info(f"\n{final_df.head()}")

    save_scheme_mapping(final_df)

    logging.info("Scheme recommendation pipeline completed successfully.")

# -----------------------------------
# Run pipeline
# -----------------------------------
if __name__ == "__main__":
    data_file = "generated_stockist_data.csv"
    run_scheme_mapping_pipeline(data_file)


2025-04-02 09:08:45,317 - INFO - Starting scheme mapping pipeline...
2025-04-02 09:08:45,324 - INFO - Data loaded from generated_stockist_data.csv with shape: (1000, 26)
2025-04-02 09:08:45,325 - INFO - Found 15 valid product columns.
2025-04-02 09:08:45,326 - INFO - Processing product: AIS(Air Insulated Switchgear)
2025-04-02 09:08:45,386 - INFO - Processing product: RMU(Ring Main Unit)
2025-04-02 09:08:45,493 - INFO - Processing product: PSS(Compact Sub-Stations)
2025-04-02 09:08:45,672 - INFO - Processing product: VCU(Vacuum Contactor Units)
2025-04-02 09:08:45,777 - INFO - Processing product: E-House
2025-04-02 09:08:45,887 - INFO - Processing product: VCB(Vacuum Circuit Breaker)
2025-04-02 09:08:45,992 - INFO - Processing product: ACB(Air Circuit Breaker)
2025-04-02 09:08:46,181 - INFO - Processing product: MCCB(Moduled Case Circuit Breaker)
2025-04-02 09:08:46,378 - INFO - Processing product: SDF(Switch Disconnectors)
2025-04-02 09:08:46,569 - INFO - Processing product: BBT(Busba