In [10]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

# Merge datasets
merged = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Feature engineering
customer_features = merged.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': 'first',
    'Category': lambda x: x.mode()[0]  # Most frequent category
}).reset_index()

# One-hot encoding for categorical variables
encoder = OneHotEncoder()
categorical_features = encoder.fit_transform(customer_features[['Region', 'Category']]).toarray()
numeric_features = customer_features[['TotalValue', 'Quantity']].values

# Combine features
final_features = np.hstack((numeric_features, categorical_features))

# Compute similarity
similarity_matrix = cosine_similarity(final_features)

# Generate lookalikes
customer_ids = customer_features['CustomerID'].tolist()
lookalike_dict = {}

for i, cust_id in enumerate(customer_ids[:20]):  # First 20 customers
    similar_indices = np.argsort(similarity_matrix[i])[::-1][1:4]  # Top 3 excluding self
    similar_customers = [(customer_ids[idx], similarity_matrix[i][idx]) for idx in similar_indices]
    lookalike_dict[cust_id] = similar_customers

# Save results to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.to_csv("/content/Lookalike.csv")

print("Lookalike model execution complete. Results saved to Lookalike.csv.")


Lookalike model execution complete. Results saved to Lookalike.csv.


Description of Look Like Model :
"""
Lookalike Model for eCommerce Transactions

This script processes customer transaction data to build a lookalike model that finds similar customers.
It uses customer demographics, transaction history, and product preferences to compute customer similarity.
The model applies cosine similarity to numerical and categorical features to recommend the top 3 most similar customers for each of the first 20 customers.

### Steps:
1. Load the datasets (Customers, Products, and Transactions).
2. Merge transaction data with customer and product details.
3. Perform feature engineering by aggregating transaction data per customer.
4. Apply one-hot encoding to categorical variables (Region, Product Category).
5. Compute cosine similarity between customers based on numerical and categorical features.
6. Generate a lookalike list for the first 20 customers, recommending the top 3 most similar customers for each.
7. Save the results as 'Lookalike.csv'.

### Dependencies:
- pandas
- scikit-learn
- numpy

"""