# Lookalike Model

In [None]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')

merged_df = pd.merge(transactions_df, customers_df, on='CustomerID')

customer_features = merged_df.groupby('CustomerID').agg(
    total_spending=('TotalValue', 'sum'),
    avg_spending=('TotalValue', 'mean'),
    purchase_count=('TransactionID', 'count'),
    product_variety=('ProductID', 'nunique')
).reset_index()

region_dummies = pd.get_dummies(customers_df.set_index('CustomerID')['Region'], prefix='region')
customer_features = customer_features.merge(region_dummies, left_on='CustomerID', right_index=True)

scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.drop(columns='CustomerID'))

similarity_matrix = cosine_similarity(normalized_features)

customer_ids = customer_features['CustomerID'].tolist()
lookalikes = {}

for i, cust_id in enumerate(customer_ids[:20]):
    sim_scores = list(enumerate(similarity_matrix[i]))
    top_similar = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalikes[cust_id] = [(customer_ids[j], round(score, 4)) for j, score in top_similar]


lookalike_df = pd.DataFrame([{'cust_id': k, 'lookalikes': v} for k, v in lookalikes.items()])
lookalike_df.to_csv('Lookalike.csv', index=False)
    