In [None]:
# Mrigank Raj Dubey - Lookalike Model for Data Science Assignment

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Feature Engineering
# Aggregate transaction data to create customer-level features
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'Quantity': 'sum',    # Total quantity purchased
    'Price': 'mean',      # Average price of purchased items
    'ProductID': 'nunique'  # Number of unique products purchased
}).reset_index()

# Rename columns for clarity
customer_features.rename(columns={
    'TotalValue': 'TotalSpending',
    'Quantity': 'TotalQuantity',
    'Price': 'AvgPrice',
    'ProductID': 'UniqueProducts'
}, inplace=True)

# Normalize data for similarity calculation
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Calculate Cosine Similarity
similarity_matrix = cosine_similarity(normalized_features)

# Generate Recommendations for the First 20 Customers
customer_ids = customer_features['CustomerID']
recommendations = {}

for idx, customer_id in enumerate(customer_ids[:20]):
    # Get similarity scores for the customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))

    # Sort by similarity score in descending order (excluding self-comparison)
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Get top 3 similar customers (excluding itself)
    top_3 = [(customer_ids[i], score) for i, score in similarity_scores[1:4]]
    recommendations[customer_id] = top_3

# Save Recommendations to CSV
recommendations_df = pd.DataFrame({
    'CustomerID': list(recommendations.keys()),
    'Recommendations': [str(rec) for rec in recommendations.values()]
})
recommendations_df.to_csv('Mrigank_Raj_Dubey_Lookalike.csv', index=False)

# Output Result
print("Lookalike recommendations saved to 'Mrigank_Raj_Dubey_Lookalike.csv'")


KeyError: "Column(s) ['Price'] do not exist"