In [5]:
import pandas as pd
import numpy as np
from scipy.optimize import linprog
import logging

In [6]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')

def load_data(filepath):
    """Loads data from a CSV file."""
    try:
        df = pd.read_csv(filepath)
        logging.info(f"Data loaded successfully from {filepath}. Shape: {df.shape}")
        return df
    except Exception as e:
        logging.error(f"Error loading data: {e}")
        raise

In [7]:
def melt_data(df, metadata_cols):
    """Melts the product columns into rows for optimization."""
    try:
        product_cols = [col for col in df.columns if col not in metadata_cols]
        df_melted = df.melt(id_vars=metadata_cols, value_vars=product_cols, 
                            var_name='Product_id', value_name='Has_Product')
        df_melted = df_melted[df_melted['Has_Product'] == 1].drop(columns=['Has_Product'])
        logging.info(f"Data melted successfully. Shape after melting: {df_melted.shape}")
        return df_melted
    except Exception as e:
        logging.error(f"Error in melting data: {e}")
        raise

In [8]:
def aggregate_sales(df_melted):
    """Aggregates sales by Partner, Product, and Scheme."""
    try:
        grouped = df_melted.groupby(["Partner_id", "Product_id", "Scheme_Type"]).agg({
            "Sales_Value_Last_Period": "sum",
            "Sales_Quantity_Last_Period": "sum"
        }).reset_index()
        logging.info("Sales aggregation complete.")
        return grouped
    except Exception as e:
        logging.error(f"Error aggregating sales: {e}")
        raise

In [9]:
def optimize_schemes(product_group):
    """Selects top 3 schemes using Linear Programming for each product."""
    schemes = product_group["Scheme_Type"].unique()
    num_schemes = len(schemes)

    if num_schemes == 0:
        return [None, None, None]
    if num_schemes <= 3:
        return list(schemes) + [None] * (3 - num_schemes)

    try:
        c = -product_group.groupby("Scheme_Type")["Sales_Value_Last_Period"].sum().values
        bounds = [(0, 1) for _ in range(num_schemes)]
        res = linprog(c, bounds=bounds, method='highs', options={"disp": False})
        # Currently, we just return empty values until real scheme extraction logic is added
        return [None, None, None]
    except Exception as e:
        logging.warning(f"Optimization failed for a group: {e}")
        return [None, None, None]

In [10]:
def run_optimization_pipeline(filepath, output_path):
    metadata_cols = [
        'Partner_id', 'Geography', 'Stockist_Type', 'Scheme_Type', 'Sales_Value_Last_Period',
        'Sales_Quantity_Last_Period', 'MRP', 'Growth_Percentage', 'Discount_Applied',
        'Bulk_Purchase_Tendency', 'New_Stockist', 'Feedback_Score'
    ]

    df = load_data(filepath)
    df_melted = melt_data(df, metadata_cols)
    product_schemes = aggregate_sales(df_melted)

    optimization_data = product_schemes[["Product_id", "Scheme_Type", "Sales_Value_Last_Period"]]

    logging.info("Starting optimization per product...")
    optimized_schemes = optimization_data.groupby("Product_id").apply(optimize_schemes).reset_index()
    optimized_schemes[["Scheme_1", "Scheme_2", "Scheme_3"]] = pd.DataFrame(
        optimized_schemes[0].tolist(), index=optimized_schemes.index
    )
    optimized_schemes.drop(columns=[0], inplace=True)

    partners_per_product = df_melted.groupby("Product_id")["Partner_id"].apply(list).reset_index()
    final_optimized_output = partners_per_product.merge(optimized_schemes, on="Product_id", how="left")

    final_optimized_output.to_csv(output_path, index=False)
    logging.info(f"Optimization pipeline complete. Output saved to {output_path}")

# Run the pipeline
if __name__ == "__main__":
    run_optimization_pipeline("Stockist_Data.csv", "Top_Optimized_Schemes_with_LP.csv")

INFO - Data loaded successfully from Stockist_Data.csv. Shape: (1000, 27)
INFO - Data melted successfully. Shape after melting: (7643, 13)
INFO - Sales aggregation complete.
INFO - Starting optimization per product...
INFO - Optimization pipeline complete. Output saved to Top_Optimized_Schemes_with_LP.csv
