In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from google.colab import files

In [2]:
uploaded = files.upload()
product_df = pd.read_csv("product_recommendation_data.csv")
customer_df = pd.read_csv("customer_data_collection.csv")

Saving customer_data_collection.csv to customer_data_collection.csv
Saving product_recommendation_data.csv to product_recommendation_data.csv


In [3]:
product_df = product_df.loc[:, ~product_df.columns.str.contains('^Unnamed')]
customer_df = customer_df.loc[:, ~customer_df.columns.str.contains('^Unnamed')]

In [4]:
def preprocess_content_data(df):
    df = df.copy()
    df.fillna("", inplace=True)

    df['combined_text'] = df[['Category', 'Subcategory', 'Brand', 'Season', 'Geographical_Location']].astype(str).agg(' '.join, axis=1)

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(df['combined_text'])

    num_cols = ['Price', 'Average_Rating_of_Similar_Products', 'Product_Rating', 'Customer_Review_Sentiment_Score']
    scaler = MinMaxScaler()
    scaled_numerical = scaler.fit_transform(df[num_cols])

    full_matrix = np.hstack((scaled_numerical, tfidf_matrix.toarray()))
    return full_matrix, tfidf, scaler

product_cb_matrix, tfidf_vectorizer, cb_scaler = preprocess_content_data(product_df)

In [5]:
def content_based_recommendations(product_df, last_product_name, top_n=5):
    idx_list = product_df[product_df['Subcategory'] == last_product_name].index.tolist()
    if not idx_list:
        return [" Product not found in dataset"]

    idx = idx_list[0]
    last_encoded = product_cb_matrix[idx]
    sim_scores = cosine_similarity([last_encoded], product_cb_matrix).flatten()
    top_indices = sim_scores.argsort()[::-1]

    recommendations = []
    for i in top_indices:
        prod_name = product_df.iloc[i]['Subcategory']
        if prod_name != last_product_name and prod_name not in recommendations:
            recommendations.append(prod_name)
        if len(recommendations) == top_n:
            break
    return recommendations

In [6]:
def collaborative_recommendations(customer_df, product_df, top_n=5):
    interaction_matrix = pd.DataFrame(0, index=customer_df['Customer_ID'], columns=product_df['Subcategory'].unique())

    for _, row in customer_df.iterrows():
        cid = row['Customer_ID']
        history = eval(row['Purchase_History']) if isinstance(row['Purchase_History'], str) else row['Purchase_History']
        for item in history:
            if item in interaction_matrix.columns:
                interaction_matrix.loc[cid, item] = 1

    svd = TruncatedSVD(n_components=10, random_state=42)
    matrix_reduced = svd.fit_transform(interaction_matrix)
    reconstructed = pd.DataFrame(svd.inverse_transform(matrix_reduced), index=interaction_matrix.index, columns=interaction_matrix.columns)

    recommendations = {}
    for cid in reconstructed.index:
        scores = reconstructed.loc[cid]
        already_bought = eval(customer_df[customer_df['Customer_ID'] == cid]['Purchase_History'].values[0])
        sorted_products = scores.sort_values(ascending=False)
        recommended = [prod for prod in sorted_products.index if prod not in already_bought][:top_n]
        recommendations[cid] = recommended
    return recommendations

In [7]:
def run_recommender():
    customer_id = input(" Enter Customer ID (e.g., C1002): ").strip()
    if customer_id not in customer_df['Customer_ID'].values:
        print(" Invalid Customer ID.")
        return

    customer_row = customer_df[customer_df['Customer_ID'] == customer_id].iloc[0]
    history = eval(customer_row['Purchase_History']) if isinstance(customer_row['Purchase_History'], str) else customer_row['Purchase_History']
    last_product = history[-1] if history else "Unknown"

    print(f"\n Customer ID: {customer_id}")
    print(f" Last Purchased Product: {last_product}")

    print("\n Content-Based Recommendations (based on last product):")
    cb_recs = content_based_recommendations(product_df, last_product)
    for rec in cb_recs:
        print(f"• {rec}")

    print("\n Collaborative Filtering Recommendations:")
    cf_recs = collaborative_recommendations(customer_df, product_df)
    if customer_id in cf_recs:
        for rec in cf_recs[customer_id]:
            print(f"• {rec}")
    else:
        print(" No collaborative recommendations available.")

run_recommender()

 Enter Customer ID (e.g., C1002): C1005

 Customer ID: C1005
 Last Purchased Product: Curtains

 Content-Based Recommendations (based on last product):
• Cushions
• Lamp
• Wall Art
• Biography
• Dumbbells

 Collaborative Filtering Recommendations:
• Smartphone
• Smartwatch
• Fiction
• Perfume
• Resistance Bands


In [8]:
def evaluate_recommendations(customer_df, product_df, top_k=5):
    correct_preds = 0
    total_preds = 0
    total_relevant = 0

    interaction_matrix = pd.DataFrame(0, index=customer_df['Customer_ID'], columns=product_df['Subcategory'].unique())
    for _, row in customer_df.iterrows():
        cid = row['Customer_ID']
        history = eval(row['Purchase_History']) if isinstance(row['Purchase_History'], str) else row['Purchase_History']
        for item in history:
            if item in interaction_matrix.columns:
                interaction_matrix.loc[cid, item] = 1

    svd = TruncatedSVD(n_components=10, random_state=42)
    reduced = svd.fit_transform(interaction_matrix)
    reconstructed = pd.DataFrame(svd.inverse_transform(reduced), index=interaction_matrix.index, columns=interaction_matrix.columns)

    for _, row in customer_df.iterrows():
        cid = row['Customer_ID']
        history = eval(row['Purchase_History']) if isinstance(row['Purchase_History'], str) else row['Purchase_History']
        if len(history) < 2:
            continue

        test_item = history[-1]
        train_items = history[:-1]

        scores = reconstructed.loc[cid]
        recommended = [prod for prod in scores.sort_values(ascending=False).index if prod not in train_items][:top_k]

        if test_item in recommended:
            correct_preds += 1
        total_preds += 1
        total_relevant += 1

    precision_at_k = correct_preds / total_preds if total_preds else 0
    recall_at_k = correct_preds / total_relevant if total_relevant else 0

    print(f" Evaluation Metrics (Top-{top_k}):")
    print(f" Precision@{top_k}: {precision_at_k:.2%}")
    print(f" Recall@{top_k}: {recall_at_k:.2%}")

In [9]:
evaluate_recommendations(customer_df, product_df, top_k=5)

 Evaluation Metrics (Top-5):
 Precision@5: 93.64%
 Recall@5: 93.64%
