In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
customers = pd.read_csv("Customers-checkpoint.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


In [4]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


In [5]:
customer_features = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",      # Total spend
    "TransactionID": "count", # Number of transactions
    "ProductID": "nunique"    # Unique products purchased
}).rename(columns={"TransactionID": "NumTransactions", "ProductID": "UniqueProducts"})


In [6]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)


In [7]:
# Compute Cosine Similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

In [8]:
#Extract top 3 similar customers for each customer in C0001-C0020
target_customers = [cust for cust in customers["CustomerID"].head(20) if cust in similarity_df.index]
lookalike_results = []

In [9]:
for cust_id in target_customers:
    similar_customers = similarity_df.loc[cust_id].drop(cust_id).nlargest(3)  # Exclude self-similarity
    for sim_cust, score in similar_customers.items():
        lookalike_results.append([cust_id, sim_cust, round(score, 4)])



In [10]:
# Convert to DataFrame and save as CSV
lookalike_df = pd.DataFrame(lookalike_results, columns=["CustomerID", "Lookalike", "Similarity_Score"])
lookalike_df.to_csv("Sanjana_tandekar_Lookalike.csv", index=False)

print("✅ Lookalike Model completed. Results saved in sanjana_tandekar_Lookalike.csv")

✅ Lookalike Model completed. Results saved in sanjana_tandekar_Lookalike.csv
