In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
import numpy as np
import logging
from typing import List, Tuple

In [2]:
# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s — %(levelname)s — %(message)s"
)
logger = logging.getLogger(__name__)


PRODUCT_COLUMNS = [
    "AIS(Air Insulated Switchgear)", "RMU(Ring Main Unit)", "PSS(Compact Sub-Stations)",
    "VCU(Vacuum Contactor Units)", "E-House", "VCB(Vacuum Circuit Breaker)",
    "ACB(Air Circuit Breaker)", "MCCB(Moduled Case Circuit Breaker)", "SDF(Switch Disconnectors)",
    "BBT(Busbar Trunking)", "Modular Switches", "Starter", "Controller",
    "Solar Solutions", "Pump Starter and Controller"
]


In [3]:
def load_data(file_path: str) -> pd.DataFrame:
    """Load stockist data from a CSV file."""
    logger.info(f"Loading data from {file_path}")
    try:
        df = pd.read_csv(file_path)
        logger.info(f"Loaded {df.shape[0]} rows and {df.shape[1]} columns.")
        return df
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        raise

In [4]:
def prepare_user_product_matrix(df: pd.DataFrame) -> pd.DataFrame:
    """Prepare user-product matrix."""
    logger.info("Preparing user-product matrix.")
    matrix = df.set_index("Partner_id")[PRODUCT_COLUMNS].astype(int)
    logger.info(f"User-product matrix shape: {matrix.shape}")
    return matrix


In [5]:
def train_knn_model(train_data: pd.DataFrame) -> NearestNeighbors:
    """Train the KNN model."""
    logger.info("Training KNN model with cosine similarity.")
    knn = NearestNeighbors(metric='cosine', algorithm='brute')
    knn.fit(train_data)
    return knn


In [6]:
def recommend_products_with_scores(partner_id: str, matrix: pd.DataFrame,
                                   knn: NearestNeighbors, n_neighbors: int = 5
) -> Tuple[List[str], List[float]]:
    """Recommend top N products with similarity scores for a given partner."""
    if partner_id not in matrix.index:
        logger.warning(f"Partner ID {partner_id} not found in matrix.")
        return [], []

    distances, indices = knn.kneighbors(matrix.loc[[partner_id]], n_neighbors=n_neighbors + 1)
    similar_users = matrix.iloc[indices[0][1:]]
    similarity_scores = 1 - distances[0][1:]

    # Weighted sum of product purchases
    recommended_products = similar_users.T.dot(similarity_scores).sort_values(ascending=False)

    # Filter out already purchased
    already_purchased = matrix.loc[partner_id]
    recommended_products = recommended_products[~already_purchased.astype(bool)]

    top_recommendations = list(recommended_products.head(3).index)
    top_scores = list(recommended_products.head(3).values)

    logger.debug(f"Partner {partner_id} recommendations: {top_recommendations}")
    return top_recommendations, top_scores


In [7]:
def generate_recommendations(input_csv: str, output_csv: str):
    """Main function to generate recommendations and save to CSV."""
    df = load_data(input_csv)
    matrix = prepare_user_product_matrix(df)

    train_data, _ = train_test_split(matrix, test_size=0.2, random_state=42)
    knn = train_knn_model(train_data)

    recommendations = []
    logger.info("Generating recommendations for each partner.")
    for partner_id in df["Partner_id"]:
        recs, scores = recommend_products_with_scores(partner_id, matrix, knn)
        recommendations.append([partner_id, recs, scores])

    result_df = pd.DataFrame(recommendations, columns=["Partner_ID", "Recommended_Products", "Similarity_Scores"])
    result_df.to_csv(output_csv, index=False)
    logger.info(f"Recommendations saved to {output_csv}")

if __name__ == "__main__":
    input_path = "generated_stockist_data.csv"
    output_path = "User_Based_Recommendations.csv"
    generate_recommendations(input_path, output_path)

2025-04-02 09:22:11,896 — INFO — Loading data from generated_stockist_data.csv
2025-04-02 09:22:11,903 — INFO — Loaded 1000 rows and 26 columns.
2025-04-02 09:22:11,904 — INFO — Preparing user-product matrix.
2025-04-02 09:22:11,906 — INFO — User-product matrix shape: (1000, 15)
2025-04-02 09:22:11,908 — INFO — Training KNN model with cosine similarity.
2025-04-02 09:22:11,910 — INFO — Generating recommendations for each partner.
2025-04-02 09:22:14,938 — INFO — Recommendations saved to User_Based_Recommendations.csv
