In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from collections import defaultdict

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')


customer_spending = merged_data.groupby('CustomerID')['TotalValue'].sum().reset_index()
customer_spending.rename(columns={'TotalValue': 'TotalSpending'}, inplace=True)

customer_transactions_count = merged_data.groupby('CustomerID').size().reset_index(name='TotalTransactions')

customer_avg_transaction_value = merged_data.groupby('CustomerID')['TotalValue'].mean().reset_index()
customer_avg_transaction_value.rename(columns={'TotalValue': 'AvgTransactionValue'}, inplace=True)

category_interactions = merged_data.groupby(['CustomerID', 'Category']).size().unstack(fill_value=0)

customer_features = pd.merge(customer_spending, customer_transactions_count, on='CustomerID', how='left')
customer_features = pd.merge(customer_features, customer_avg_transaction_value, on='CustomerID', how='left')
customer_features = pd.merge(customer_features, category_interactions, on='CustomerID', how='left').fillna(0)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features.drop('CustomerID', axis=1))

cosine_sim = cosine_similarity(features_scaled)

lookalike_recommendations = defaultdict(list)

for idx, cust_id in enumerate(customer_features['CustomerID'].head(20)):
    similarities = cosine_sim[idx]
    similar_customer_indices = similarities.argsort()[::-1][1:4]  # Get top 3 similar customers excluding self
    for similar_idx in similar_customer_indices:
        similar_cust_id = customer_features['CustomerID'].iloc[similar_idx]
        score = similarities[similar_idx]
        lookalike_recommendations[cust_id].append((similar_cust_id, score))

lookalike_df = pd.DataFrame([(key, value[0][0], value[0][1], value[1][0], value[1][1], value[2][0], value[2][1])
                             for key, value in lookalike_recommendations.items()],
                            columns=["CustomerID", "Lookalike_1", "Score_1", "Lookalike_2", "Score_2", "Lookalike_3", "Score_3"])

lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model created and saved as Lookalike.csv")
