In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler



In [2]:
# Load datasets (update paths as needed)
customers_path = "Customers.csv"
products_path = "Products.csv"
transactions_path = "Transactions.csv"

customers = pd.read_csv(customers_path)
products = pd.read_csv(products_path)
transactions = pd.read_csv(transactions_path)

# Merge datasets for analysis
data = transactions.merge(customers, on='CustomerID', how='left')
data = data.merge(products, on='ProductID', how='left')



In [3]:
# Preprocess data for lookalike modeling
# Aggregate transaction data by customer
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])
customer_summary = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'TransactionID': 'count',
    'Price_x': 'mean'
}).rename(columns={
    'TotalValue': 'TotalSpent',
    'Quantity': 'TotalQuantity',
    'TransactionID': 'TransactionCount',
    'Price_x': 'AveragePrice'
})

# Merge customer profile data
customer_features = customers.merge(customer_summary, on='CustomerID', how='left')
customer_features.fillna(0, inplace=True)



In [4]:
# Normalize features
scaler = StandardScaler()
numeric_features = ['TotalSpent', 'TotalQuantity', 'TransactionCount', 'AveragePrice']
customer_features_scaled = scaler.fit_transform(customer_features[numeric_features])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_features_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Get top 3 lookalike customers for first 20 customers
lookalike_results = {}
for customer_id in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Exclude the customer themselves
    lookalike_results[customer_id] = similar_customers.reset_index().values.tolist()



In [5]:
# Save lookalike results to CSV
lookalike_path = "Lookalike.csv"
with open(lookalike_path, 'w') as f:
    f.write("CustomerID,Lookalikes\n")
    for customer_id, lookalikes in lookalike_results.items():
        lookalike_str = ";".join([f"{cust_id}:{score:.4f}" for cust_id, score in lookalikes])  # Treat CustomerID as string
        f.write(f"{customer_id},{lookalike_str}\n")

print(f"Lookalike results saved to {lookalike_path}")


Lookalike results saved to Lookalike.csv
