In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv(r"Downloads\Customers.csv")
transactions = pd.read_csv(r"Downloads\Transactions.csv")

# Merge datasets
data = pd.merge(transactions, customers, on="CustomerID")

# Create customer features
customer_features = data.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_transactions=('TransactionID', 'count'),
    unique_products=('ProductID', 'nunique')
).reset_index()

# Normalize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

# Find top 3 lookalikes
lookalikes = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: -x[1])[1:4]  # Exclude self
    lookalikes[customer_id] = [(customer_features['CustomerID'][i], score) for i, score in similarity_scores]

# Save to Lookalike.csv
lookalike_results = []
for customer_id, similar_customers in lookalikes.items():
    for sim_cust_id, score in similar_customers:
        lookalike_results.append([customer_id, sim_cust_id, score])

lookalike_df = pd.DataFrame(lookalike_results, columns=['CustomerID', 'SimilarCustomerID', 'SimilarityScore'])
lookalike_df.to_csv("Lookalike.csv", index=False)
lookalike_df.to_csv(r"Downloads\Lookalike.csv", index=False)


