In [2]:
import pandas as pd
import warnings
import logging
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import pairwise_distances

# Suppress warnings and configure logging
warnings.filterwarnings('ignore')

In [3]:
# Set up logging configuration
logging.basicConfig(
    level=logging.INFO,  # Change to DEBUG for more detailed logs
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("product_recommendation.log"),  # Log to a file
        logging.StreamHandler()  # Also show logs in the console
    ]
)

In [4]:
def load_and_split_data(file_path, product_cols):
    """
    Load stockist product data and split into training and testing sets.
    """
    logging.info("Loading data from CSV...")
    df = pd.read_csv(file_path)
    logging.debug(f"Data loaded with shape: {df.shape}")
    
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    train_df.to_csv("train_data.csv", index=False)
    test_df.to_csv("test_data.csv", index=False)
    
    logging.info("Train and test data saved successfully.")
    return train_df, test_df

In [5]:
def compute_jaccard_similarity(train_df, product_cols):
    """
    Compute Jaccard similarity between products based on purchase patterns.
    """
    logging.info("Converting product data to boolean for Jaccard similarity...")
    df_products_train = train_df[product_cols].astype(bool)
    df_products_np = df_products_train.values

    logging.info("Calculating Jaccard similarity matrix...")
    jaccard_sim = 1 - pairwise_distances(df_products_np.T, metric="jaccard")

    product_similarity_df = pd.DataFrame(jaccard_sim, index=product_cols, columns=product_cols)
    logging.debug("Jaccard similarity matrix created successfully.")
    logging.info("Sample similarity matrix:")
    logging.info(f"\n{product_similarity_df.head()}")

    return product_similarity_df

In [6]:
def get_top3_products(product_name, similarity_df):
    """
    Returns top 3 most similar products for a given product.
    """
    if product_name not in similarity_df.index:
        logging.warning(f"Product '{product_name}' not found in similarity matrix.")
        return ["Product not found"]
    
    top_products = similarity_df[product_name].sort_values(ascending=False)[1:4].index.tolist()
    logging.debug(f"Top 3 similar products for '{product_name}': {top_products}")
    return top_products


In [7]:
def generate_recommendations(test_df, product_cols, similarity_df):
    """
    Generate top 3 product recommendations for each partner in test data.
    """
    recommendations = []
    similarity_scores = []

    logging.info("Generating recommendations for test partners...")
    for index, row in test_df.iterrows():
        purchased_products = [product for product in product_cols if row[product] == 1]

        if not purchased_products:
            recommendations.append([])
            similarity_scores.append([])
            continue

        recommended_products = set()
        product_scores = []

        for product in purchased_products:
            top_products = get_top3_products(product, similarity_df)
            recommended_products.update(top_products)

            scores = similarity_df.loc[product, top_products].values
            product_scores.extend(scores)

        recommendations.append(list(recommended_products)[:3])
        similarity_scores.append(product_scores[:3])

        if index < 5:
            logging.info(f"\nRecommendations for Partner {row['Partner_id']}:")
            logging.info(f"Purchased Products: {purchased_products}")
            logging.info(f"Recommended Products: {recommendations[-1]}")
            logging.info(f"Similarity Scores: {similarity_scores[-1]}")

    return recommendations, similarity_scores

In [8]:
def save_recommendations(test_df, recommendations, similarity_scores, output_file="Partner_Product_Recommendations.csv"):
    """
    Save partner recommendations and similarity scores to a CSV file.
    """
    test_df["Recommended_Products"] = recommendations
    test_df["Similarity_Scores"] = similarity_scores

    final_df = test_df[["Partner_id", "Recommended_Products", "Similarity_Scores"]]
    logging.info("\nFinal Partner Product Recommendations:")
    logging.info(f"\n{final_df.head()}")

    final_df.to_csv(output_file, index=False)
    logging.info(f"Product recommendations saved to {output_file}")


In [9]:
# Define product columns (binary columns representing whether a product is bought)
product_cols = [
    "AIS(Air Insulated Switchgear)", "RMU(Ring Main Unit)", "PSS(Compact Sub-Stations)",
    "VCU(Vacuum Contactor Units)", "E-House", "VCB(Vacuum Circuit Breaker)",
    "ACB(Air Circuit Breaker)", "MCCB(Moduled Case Circuit Breaker)",
    "SDF(Switch Disconnectors)", "BBT(Busbar Trunking)", "Modular Switches"
]

# Main reusable pipeline
if __name__ == "__main__":
    try:
        file_path = "stockist_data (1).csv"
        logging.info("Starting product recommendation pipeline...")

        train_df, test_df = load_and_split_data(file_path, product_cols)
        product_similarity_df = compute_jaccard_similarity(train_df, product_cols)
        recommendations, similarity_scores = generate_recommendations(test_df, product_cols, product_similarity_df)
        save_recommendations(test_df, recommendations, similarity_scores)

        logging.info("Pipeline completed successfully.")

    except Exception as e:
        logging.critical("An error occurred during execution.", exc_info=True)

2025-04-07 06:07:43,427 - INFO - Starting product recommendation pipeline...
2025-04-07 06:07:43,428 - INFO - Loading data from CSV...
2025-04-07 06:07:43,461 - INFO - Train and test data saved successfully.
2025-04-07 06:07:43,462 - INFO - Converting product data to boolean for Jaccard similarity...
2025-04-07 06:07:43,464 - INFO - Calculating Jaccard similarity matrix...
2025-04-07 06:07:43,465 - INFO - Sample similarity matrix:
2025-04-07 06:07:43,471 - INFO - 
                               AIS(Air Insulated Switchgear)  \
AIS(Air Insulated Switchgear)                       1.000000   
RMU(Ring Main Unit)                                 0.350498   
PSS(Compact Sub-Stations)                           0.351396   
VCU(Vacuum Contactor Units)                         0.333861   
E-House                                             0.384615   

                               RMU(Ring Main Unit)  PSS(Compact Sub-Stations)  \
AIS(Air Insulated Switchgear)             0.350498               