In [1]:
## Import Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load Datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [3]:
# Merge Datasets for Comprehensive Analysis
# Merging transactions, customers, and products data into a single dataset
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

In [4]:
# Create User Profiles
# Customer profiles include mean 'Quantity' and 'TotalValue' purchased, as well as their category preferences
customer_profiles = merged_data.groupby("CustomerID")[["Quantity", "TotalValue"]].mean()

In [5]:
# Create one-hot encoded product category preferences for each customer
product_preferences = pd.get_dummies(merged_data[["CustomerID", "Category"]], columns=["Category"])
product_preferences = product_preferences.groupby("CustomerID").sum()

In [6]:
# Combine transaction features and product preferences into a final profile for each customer
final_profiles = pd.concat([customer_profiles, product_preferences], axis=1)

In [7]:
# Compute Cosine Similarity for Customer Profiles
# Generate a similarity matrix based on the final customer profiles
similarity_matrix = cosine_similarity(final_profiles)
similarity_df = pd.DataFrame(similarity_matrix, index=final_profiles.index, columns=final_profiles.index)

In [8]:
# Generate Lookalike Recommendations
# For the first 20 customers (C0001 - C0020), find the top-3 most similar customers
lookalike_results = {}
for customer in similarity_df.index[:20]:  # Assuming first 20 customers correspond to C0001 - C0020
    # Sort similarity scores in descending order and exclude the customer themselves
    similar_customers = similarity_df.loc[customer].sort_values(ascending=False).iloc[1:4]
    # Store top-3 similar customers and their scores in a dictionary
    lookalike_results[customer] = [(index, round(score, 4)) for index, score in zip(similar_customers.index, similar_customers.values)]

In [9]:
# Save Lookalike Results to CSV
# Convert the results dictionary into a format suitable for saving to a CSV file
lookalike_csv_data = [
    {"cust_id": customer, "recommendations": lookalike_results[customer]} for customer in lookalike_results
]
lookalike_df = pd.DataFrame(lookalike_csv_data)

In [10]:
# Save the results to 'Lookalike.csv'
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike Model completed. Results saved to Lookalike.csv.")

Lookalike Model completed. Results saved to Lookalike.csv.
