In [8]:
# ----------------------- Imports -----------------------
import pandas as pd
from scipy.optimize import linprog
import numpy as np
import logging
from typing import List, Union, Optional


In [9]:
# ----------------------- Logging Setup -----------------------
logging.basicConfig(level=logging.INFO, format="%(asctime)s — %(levelname)s — %(message)s")
logger = logging.getLogger(__name__)


In [10]:
# ------------------ Load and Melt Data ------------------
def load_and_melt_data(filepath: str, metadata_cols: Optional[List[str]] = None) -> pd.DataFrame:
    """
    Load CSV and melt it into long format using metadata and product columns.
    
    Args:
        filepath (str): Path to the CSV file.
        metadata_cols (List[str], optional): List of metadata columns. If None, inferred from file.
    
    Returns:
        pd.DataFrame: Melted long-format DataFrame with rows for each (Partner, Product) pair.
    """
    logger.info(f"Reading input CSV from {filepath}")
    
    try:
        df = pd.read_csv(filepath)
        logger.info(f"File loaded with shape {df.shape}")
    except Exception as e:
        logger.error(f"Failed to read CSV: {e}")
        raise

    if metadata_cols is None:
        # Infer metadata columns as non-product columns (assuming binary indicators for products)
        metadata_cols = df.columns[df.nunique() > 10].tolist()
        logger.info(f"Inferred metadata columns: {metadata_cols}")

    product_cols = [col for col in df.columns if col not in metadata_cols]

    df_melted = df.melt(id_vars=metadata_cols, value_vars=product_cols,
                        var_name='Product_id', value_name='Has_Product')
    df_melted = df_melted[df_melted['Has_Product'] == 1].drop(columns=['Has_Product'])

    logger.info(f"Melted dataframe shape: {df_melted.shape}")
    return df_melted


In [11]:
# ------------------ Aggregate Sales Data ------------------
def aggregate_sales_data(df_melted: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregate sales per Partner-Product-Scheme combination.
    
    Args:
        df_melted (pd.DataFrame): Melted input DataFrame.
    
    Returns:
        pd.DataFrame: Aggregated sales values.
    """
    logger.info("Aggregating sales by Partner, Product, and Scheme_Type.")
    grouped_df = df_melted.groupby(["Partner_id", "Product_id", "Scheme_Type"]).agg({
        "Sales_Value_Last_Period": "sum",
        "Sales_Quantity_Last_Period": "sum"
    }).reset_index()
    
    logger.info(f"Aggregated dataframe shape: {grouped_df.shape}")
    return grouped_df


In [12]:
# ------------------ Scheme Optimization ------------------
def optimize_schemes(product_group: pd.DataFrame) -> List[Union[str, None]]:
    """
    Select top 3 schemes per product using linear programming.
    
    Args:
        product_group (pd.DataFrame): Data for one product.
    
    Returns:
        List[Union[str, None]]: Top 3 selected schemes.
    """
    schemes = product_group["Scheme_Type"].unique()
    num_schemes = len(schemes)

    logger.debug(f"Optimizing for {num_schemes} scheme(s): {schemes.tolist()}")

    if num_schemes == 0:
        return [None, None, None]
    if num_schemes <= 3:
        return list(schemes) + [None] * (3 - num_schemes)

    try:
        grouped_sales = product_group.groupby("Scheme_Type")["Sales_Value_Last_Period"].sum()
        c = -grouped_sales.values  # Maximize revenue
        bounds = [(0, 1) for _ in range(num_schemes)]
        res = linprog(c, bounds=bounds, method='highs')

        if res.success:
            top_indices = np.argsort(res.x)[-3:][::-1]
            selected = grouped_sales.index[top_indices].tolist()
            logger.info(f"Top 3 schemes: {selected}")
            return selected + [None] * (3 - len(selected))
        else:
            logger.warning("LP optimization failed.")
            return [None, None, None]
    except Exception as e:
        logger.error(f"Exception during LP: {e}")
        return [None, None, None]


In [13]:
# ------------------ End-to-End Scheme Optimization ------------------
def generate_optimized_schemes(input_path: str, output_path: str, metadata_cols: Optional[List[str]] = None):
    """
    Run the full pipeline to generate optimized schemes for each product.
    
    Args:
        input_path (str): Input CSV path.
        output_path (str): Output CSV path to save optimized schemes.
        metadata_cols (List[str], optional): List of metadata columns.
    """
    logger.info("Starting full scheme optimization pipeline.")

    df_melted = load_and_melt_data(input_path, metadata_cols)
    product_schemes = aggregate_sales_data(df_melted)

    optimization_data = product_schemes[["Product_id", "Scheme_Type", "Sales_Value_Last_Period"]]

    logger.info("Running optimization for each product group.")
    optimized = optimization_data.groupby("Product_id").apply(optimize_schemes).reset_index()

    optimized[["Scheme_1", "Scheme_2", "Scheme_3"]] = pd.DataFrame(optimized[0].tolist(), index=optimized.index)
    optimized.drop(columns=[0], inplace=True)

    partners_per_product = df_melted.groupby("Product_id")["Partner_id"].apply(list).reset_index()
    final_output = partners_per_product.merge(optimized, on="Product_id", how="left")

    final_output.to_csv(output_path, index=False)
    logger.info(f"Output saved to {output_path}")


In [14]:
# Define your CSV paths
input_csv = "input/stockist_data.csv"
output_csv = "output/optimized_schemes.csv"