In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np
from sklearn.model_selection import train_test_split


In [46]:
# Load the stockist data
stockist_data_path = (r"C:\Users\291688\Downloads\Indirect_Scheme_Recommendation_Explainable_Ai\generated_stockist_data.csv")
stockist_df = pd.read_csv(stockist_data_path)


In [3]:
# Define product columns
product_columns = [
    "AIS(Air Insulated Switchgear)", "RMU(Ring Main Unit)", "PSS(Compact Sub-Stations)",
    "VCU(Vacuum Contactor Units)", "E-House", "VCB(Vacuum Circuit Breaker)",
    "ACB(Air Circuit Breaker)", "MCCB(Moduled Case Circuit Breaker)", "SDF(Switch Disconnectors)",
    "BBT(Busbar Trunking)", "Modular Switches", "Starter", "Controller",
    "Solar Solutions", "Pump Starter and Controller"
]

In [4]:
# Extract the user-product matrix
user_product_matrix = stockist_df.set_index("Partner_id")[product_columns]
user_product_matrix = user_product_matrix.astype(int)  # Convert to numeric


In [5]:
# Split data into train and test sets (80% train, 20% test)
train_data, test_data = train_test_split(user_product_matrix, test_size=0.2, random_state=42)


In [6]:
# Fit KNN Model for User-Based Collaborative Filtering on training data
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(train_data)


In [7]:
# Function to recommend products with similarity scores
def recommend_products_with_scores(partner_id, n_neighbors=5):
    if partner_id not in user_product_matrix.index:
        return [], []


    # Find similar users
    #distances, indices = knn.kneighbors([user_product_matrix.loc[partner_id]], n_neighbors=n_neighbors+1)
    distances, indices = knn.kneighbors(user_product_matrix.loc[[partner_id]], n_neighbors=n_neighbors+1)

    # Exclude self (first index)
    similar_users = user_product_matrix.iloc[indices[0][1:]]
    similarity_scores = 1 - distances[0][1:]  # Convert distance to similarity

    # Compute weighted sum of product purchases
    recommended_products = similar_users.T.dot(similarity_scores).sort_values(ascending=False)

    # Filter out already purchased products
    already_purchased = user_product_matrix.loc[partner_id]
    recommended_products = recommended_products[~already_purchased.astype(bool)]

    # Select top 3 recommendations
    top_recommendations = list(recommended_products.head(3).index)
    top_similarity_scores = list(recommended_products.head(3).values)

    return top_recommendations, top_similarity_scores


In [8]:
# Generate recommendations with similarity scores
recommendations_with_scores = []
for partner_id in stockist_df["Partner_id"]:
    recommended_products, similarity_scores = recommend_products_with_scores(partner_id)
    recommendations_with_scores.append([partner_id, recommended_products, similarity_scores])


In [9]:
# Convert to DataFrame
user_based_recommendations_df = pd.DataFrame(recommendations_with_scores, columns=["Partner_ID", "Recommended_Products", "Similarity_Scores"])


In [10]:
# Save recommendations to CSV
output_file = "User_Based_Recommendations.csv"
user_based_recommendations_df.to_csv(output_file, index=False)


In [11]:
print(f"User-based collaborative filtering recommendations saved to {output_file}")

User-based collaborative filtering recommendations saved to User_Based_Recommendations.csv


In [12]:
user_based_recommendations_df

Unnamed: 0,Partner_ID,Recommended_Products,Similarity_Scores
0,001_3,"[SDF(Switch Disconnectors), ACB(Air Circuit Br...","[3.774506972053015, 1.8888888888888888, 1.8888..."
1,002_80,"[VCU(Vacuum Contactor Units), SDF(Switch Disco...","[3.369257357734473, 1.677760776806747, 1.67776..."
2,003_12,"[VCB(Vacuum Circuit Breaker), ACB(Air Circuit ...","[4.187781667034116, 3.3859979412968433, 2.5408..."
3,004_30,"[ACB(Air Circuit Breaker), BBT(Busbar Trunking...","[3.871332216326744, 3.164225435140196, 2.38962..."
4,005_49,"[SDF(Switch Disconnectors), Solar Solutions, V...","[3.199023252744026, 3.1571233410577837, 2.3825..."
...,...,...,...
995,996_99,"[ACB(Air Circuit Breaker), BBT(Busbar Trunking...","[4.078671230279971, 3.2768875045426973, 2.4053..."
996,997_83,"[VCB(Vacuum Circuit Breaker), RMU(Ring Main Un...","[4.165685232306558, 3.349188651378832, 3.34918..."
997,998_1,"[Pump Starter and Controller, Modular Switches...","[4.412969292599227, 3.459506703353634, 2.69673..."
998,999_85,"[PSS(Compact Sub-Stations), ACB(Air Circuit Br...","[1.7162326606420664, 1.7162326606420664, 1.716..."


In [35]:
def permutation_feature_importance(model, user_product_matrix, product_columns, test_users, n_neighbors=5, n_repeats=5):
    baseline_recommendations = {}
    feature_importance = {col: [] for col in product_columns}

    # Baseline recommendations
    model.fit(user_product_matrix)
    for partner_id in test_users:
        recs, _ = recommend_products_with_scores(partner_id, n_neighbors)
        baseline_recommendations[partner_id] = set(recs)

    # For each feature
    for col in product_columns:
        for _ in range(n_repeats):
            shuffled_matrix = user_product_matrix.copy()
            shuffled_matrix[col] = np.random.permutation(shuffled_matrix[col].values)

            model.fit(shuffled_matrix)

            for partner_id in test_users:
                recs, _ = recommend_products_with_scores(partner_id, n_neighbors)
                recs = set(recs)
                original_recs = baseline_recommendations[partner_id]
                
                # Score = fraction of changed recommendations
                if len(original_recs) > 0:
                    change_score = len(original_recs - recs) / len(original_recs)
                else:
                    change_score = 0.0
                
                feature_importance[col].append(change_score)

    # Average importance per feature
    feature_importance_scores = {col: np.mean(scores) for col, scores in feature_importance.items()}
    return feature_importance_scores


In [41]:
# Compute importance
test_users = test_data.index.tolist()
importance_scores = permutation_feature_importance(knn, user_product_matrix, product_columns, test_users)

# Convert to DataFrame
importance_df = pd.DataFrame(list(importance_scores.items()), columns=["Product", "Importance_Score"])

# Sort by importance (descending)
importance_df = importance_df.sort_values(by="Importance_Score", ascending=False).reset_index(drop=True)

# Display
print(importance_df)


                               Product  Importance_Score
0                              Starter          0.369667
1            SDF(Switch Disconnectors)          0.362667
2                 BBT(Busbar Trunking)          0.361667
3          Pump Starter and Controller          0.361667
4                      Solar Solutions          0.358333
5   MCCB(Moduled Case Circuit Breaker)          0.357000
6        AIS(Air Insulated Switchgear)          0.352667
7                     Modular Switches          0.352333
8          VCU(Vacuum Contactor Units)          0.351667
9            PSS(Compact Sub-Stations)          0.351333
10         VCB(Vacuum Circuit Breaker)          0.342667
11                          Controller          0.338333
12                             E-House          0.337333
13                 RMU(Ring Main Unit)          0.336333
14            ACB(Air Circuit Breaker)          0.331333
