In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')
customers_df = pd.read_csv('Customers.csv')

# Merge data for customer transaction aggregation
transactions_products = pd.merge(transactions_df, products_df, on='ProductID')
customer_transactions = pd.merge(transactions_products, customers_df, on='CustomerID')

# Aggregate features for each customer
customer_profile = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'ProductID': 'nunique',  # Unique products purchased
    'Category': lambda x: x.mode()[0],  # Most frequent category purchased
    'Region': 'first'  # Region (assume it does not change)
}).reset_index()

# Encode categorical features
customer_profile = pd.get_dummies(customer_profile, columns=['Category', 'Region'], drop_first=True)

# Scale numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_profile.drop('CustomerID', axis=1))

# Compute similarity using cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Recommend top 3 similar customers for the first 20 customers
lookalike_results = {}
customer_ids = customer_profile['CustomerID'].tolist()

for idx, customer_id in enumerate(customer_ids[:20]):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_ids[i], round(score, 4)) for i, score in sorted_scores[1:4]]
    lookalike_results[customer_id] = top_3

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {'cust_id': cust_id, 'lookalikes': str(lookalikes)}
    for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to Lookalike.csv")


Lookalike recommendations saved to Lookalike.csv
