In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('path_to_customers.csv')
products = pd.read_csv('path_to_products.csv')
transactions = pd.read_csv('path_to_transactions.csv')

# Merge datasets
merged_data = transactions.merge(customers, on='customer_id').merge(products, on='product_id')

# Feature Engineering
# Create features for each customer
customer_features = merged_data.groupby('customer_id').agg({
    'age': 'first',  # Assuming age is constant for each customer
    'gender': 'first',  # Assuming gender is constant for each customer
    'transaction_amount': 'sum',  # Total spending
    'product_id': 'nunique'  # Number of unique products purchased
}).reset_index()

# Convert categorical features to numerical
customer_features = pd.get_dummies(customer_features, columns=['gender'], drop_first=True)

# Normalize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features[['age', 'transaction_amount', 'product_id']])

# Calculate similarity scores
similarity_matrix = cosine_similarity(features_scaled)

# Create a DataFrame to hold similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['customer_id'], columns=customer_features['customer_id'])

# Generate recommendations for the first 20 customers
lookalike_results = {}

for customer_id in customer_features['customer_id'][:20]:
    # Get the similarity scores for the current customer
    similar_customers = similarity_df[customer_id].nlargest(4)  # Get top 4 (including self)
    similar_customers = similar_customers[similar_customers.index != customer_id]  # Exclude self
    lookalike_results[customer_id] = similar_customers.index.tolist()[:3], similar_customers.values.tolist()[:3]

# Create a DataFrame for the results
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index', columns=['similar_customers', 'similarity_scores'])
lookalike_df.reset_index(inplace=True)
lookalike_df.rename(columns={'index': 'customer_id'}, inplace=True)

# Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)