In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
transactions_df = pd.read_csv("Transactions.csv")

In [3]:
products_df = pd.read_csv("Products.csv")

In [4]:
customers_df = pd.read_csv("Customers.csv")

In [5]:
merged_df = transactions_df.merge(products_df, on="ProductID", how="left") \
                           .merge(customers_df, on="CustomerID", how="left")

In [6]:
customer_profile = merged_df.groupby("CustomerID").agg(
    total_spent=("TotalValue", "sum"),
    avg_spent=("TotalValue", "mean"),
    total_transactions=("TransactionID", "count"),
    unique_products=("ProductID", "nunique"),
    unique_categories=("Category", "nunique")
).reset_index()

In [7]:
scaler = StandardScaler()
customer_features = customer_profile.drop(columns=["CustomerID"])
customer_features_normalized = scaler.fit_transform(customer_features)

In [8]:
similarity_matrix = cosine_similarity(customer_features_normalized)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profile.CustomerID, columns=customer_profile.CustomerID)

In [9]:
lookalike_map = {}
target_customers = customer_profile.CustomerID[:20]

In [10]:
for customer_id in target_customers:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)
    similar_customers = similar_customers.drop(customer_id)
    top_3 = similar_customers.head(3)
    lookalike_map[customer_id] = [(cust_id, score) for cust_id, score in top_3.items()]

In [11]:
lookalike_df = pd.DataFrame([
    {"CustomerID": cust_id, "Lookalikes": str(recommendations)}
    for cust_id, recommendations in lookalike_map.items()
])

output_file = "Lookalike.csv"
lookalike_df.to_csv(output_file, index=False)
print(f"Lookalike recommendations saved to {output_file}")

Lookalike recommendations saved to Lookalike.csv
