In [None]:
### *Task 2: Lookalike Model *
#%%

pip install pandas numpy matplotlib seaborn scikit-learn plotly

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

# Create customer features
customer_features = merged_df.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    avg_transaction=('TotalValue', 'mean'),
    transaction_count=('TransactionID', 'nunique'),
    unique_products=('ProductID', 'nunique'),
    favorite_category=('Category', lambda x: x.mode()[0])
).reset_index()

# Merge with customer profile
customer_profile = pd.merge(customers, customer_features, on='CustomerID')

# Encode categorical features
profile_encoded = pd.get_dummies(customer_profile, columns=['Region', 'favorite_category'])

# Compute similarity
scaler = StandardScaler()
scaled_features = scaler.fit_transform(
    profile_encoded.drop(['CustomerID', 'CustomerName', 'SignupDate'], axis=1)
)
similarity_matrix = cosine_similarity(scaled_features)

# Generate recommendations
lookalike_mapping = {}
target_customers = customer_profile['CustomerID'].head(20).tolist()

for cust_id in target_customers:
    idx = customer_profile[customer_profile['CustomerID'] == cust_id].index[0]
    scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]  # Exclude self
    lookalike_mapping[cust_id] = [
        (customer_profile.iloc[i]['CustomerID'], round(score, 3)) 
        for i, score in sorted_scores
    ]

# Save to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_mapping, orient='index')
lookalike_df = lookalike_df.stack().apply(pd.Series).reset_index()
lookalike_df.columns = ['Source_Customer', 'Rank', 'Lookalike_Customer', 'Score']
lookalike_df.to_csv('Lookalike.csv', index=False)
