In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

merged_data = pd.read_csv('Processed_Data.csv')
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpending=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean'),
    TransactionFrequency=('TransactionID', 'count')
).reset_index()

scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])
similarity_matrix = cosine_similarity(scaled_features)

lookalike_map = {}
for i, customer in enumerate(customer_features['CustomerID']):
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: -x[1])  # Sort by similarity score
    top_3 = [(customer_features.iloc[j]['CustomerID'], score) for j, score in similarities[1:4]]
    lookalike_map[customer] = top_3

lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalikes}
    for cust_id, lookalikes in lookalike_map.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)
