In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")
merged_data = pd.merge(transactions, customers, on="CustomerID")
merged_data = pd.merge(merged_data, products, on="ProductID")
customer_total_value = merged_data.groupby("CustomerID")["TotalValue"].sum()
customer_product_count = merged_data.groupby("CustomerID")["ProductID"].nunique()
customers["RegionEncoded"] = customers["Region"].astype("category").cat.codes
customer_features = customers[["CustomerID", "RegionEncoded"]].copy()
customer_features = customer_features.set_index("CustomerID")
customer_features["TotalValue"] = customer_total_value
customer_features["ProductCount"] = customer_product_count
customer_features = customer_features.fillna(0)
feature_matrix = customer_features.values
similarity_matrix = cosine_similarity(feature_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)
print("\nEnter a CustomerID to view similar customers (e.g., C0001):")
customer_id = input().strip()

if customer_id in similarity_df.index:
    print(f"\nSimilarity scores for Customer {customer_id}:")
    scores = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    print(scores)
else:
    print(f"\nCustomerID {customer_id} not found in the dataset!")
print("\nGenerating lookalike recommendations...")
lookalike_results = {}
for customer_id in customers["CustomerID"][:20]:
    scores = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_results[customer_id] = list(zip(scores.index, scores.values))
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "Lookalikes": [str(value) for value in lookalike_results.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)

print("\nLookalike recommendations saved to Lookalike.csv")
print("\nFirst few recommendations:")
print(lookalike_df.head())