In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [2]:
# Step 2: Data Preprocessing
# Merge transactions with product data
transactions = transactions.merge(products, on="ProductID", how="left")

# Merge with customer data
data = transactions.merge(customers, on="CustomerID", how="left")

In [3]:
# Step 3: Feature Engineering
# Calculate total spending per customer
total_spending = data.groupby("CustomerID")["TotalValue"].sum().rename("TotalSpending")

# Calculate purchase frequency per customer
purchase_frequency = data.groupby("CustomerID")["TransactionID"].nunique().rename("PurchaseFrequency")

# Calculate category preferences (percentage of spending in each category)
category_spending = data.groupby(["CustomerID", "Category"])['TotalValue'].sum().unstack(fill_value=0)
category_spending = category_spending.div(category_spending.sum(axis=1), axis=0)

# Combine features into a single dataframe
customer_features = pd.concat([total_spending, purchase_frequency, category_spending], axis=1).fillna(0)

In [4]:
# Step 4: Compute Similarity Matrix
# Normalize features
def normalize(df):
    return (df - df.min()) / (df.max() - df.min())

normalized_features = normalize(customer_features)

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

In [5]:
# Step 5: Lookalike Recommendation
# Function to get top 3 lookalikes for a customer
def get_top_lookalikes(customer_id, similarity_df, top_n=3):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:top_n+1]
    return [(cust, round(score, 4)) for cust, score in similar_customers.items()]

# Generate lookalikes for the first 20 customers
lookalike_results = {}
for customer_id in customers.iloc[:20].CustomerID:
    lookalike_results[customer_id] = get_top_lookalikes(customer_id, similarity_df)

In [6]:
# Step 6: Save Results to CSV
# Convert results into a DataFrame
lookalike_output = []
for customer_id, lookalikes in lookalike_results.items():
    for similar_customer, score in lookalikes:
        lookalike_output.append({"CustomerID": customer_id, "SimilarCustomerID": similar_customer, "Score": score})

lookalike_df = pd.DataFrame(lookalike_output)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'")

Lookalike recommendations saved to 'Lookalike.csv'
