In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load and prepare customer features
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
current_date = pd.to_datetime('today')
customers['Tenure'] = (current_date - customers['SignupDate']).dt.days

# Transaction History
transaction_features = transactions.groupby('CustomerID').agg(
    TotalSpend=pd.NamedAgg(column='TotalValue', aggfunc='sum'),
    PurchaseFrequency=pd.NamedAgg(column='TransactionID', aggfunc='nunique'),
    AvgOrderValue=pd.NamedAgg(column='TotalValue', aggfunc='mean')
).reset_index()

# Merge with Customer Data
customer_features = customers.merge(transaction_features, on='CustomerID', how='left')
customer_features = pd.get_dummies(customer_features, columns=['Region'])
customer_features = customer_features.fillna(0)

# Scale features and compute similarity
features = customer_features.drop(columns=['CustomerID', 'CustomerName', 'SignupDate'])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalike customers
lookalike_map = {}
target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]

for cust_id in target_customers:
    idx = customer_features[customer_features['CustomerID'] == cust_id].index[0]
    scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_map[cust_id] = [(customer_features.iloc[i]['CustomerID'], score) for i, score in sorted_scores]

# Save results
lookalike_df = pd.DataFrame(lookalike_map.items(), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv('belo_abhigyan_Lookalike.csv', index=False)