In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Preprocess and merge data
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

merged_data = pd.merge(transactions, customers, on="CustomerID", how="inner")
merged_data = pd.merge(merged_data, products, on="ProductID", how="inner")

# Create a pivot table for customer-product interactions
customer_product_matrix = merged_data.pivot_table(
    index='CustomerID', 
    columns='ProductID', 
    values='Quantity', 
    aggfunc='sum', 
    fill_value=0
)

# Normalize the data for cosine similarity
scaler = StandardScaler()
customer_product_normalized = scaler.fit_transform(customer_product_matrix)

# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(customer_product_normalized)
similarity_df = pd.DataFrame(
    similarity_matrix, 
    index=customer_product_matrix.index, 
    columns=customer_product_matrix.index
)

# Generate lookalike recommendations
def get_top_n_similar_customers(customer_id, n=3):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:n+1]
    return [(cust_id, score) for cust_id, score in similar_customers.items()]

# Generate lookalikes for first 20 customers (C0001 - C0020)
lookalike_results = {}
for customer_id in customers['CustomerID'][:20]:
    lookalike_results[customer_id] = get_top_n_similar_customers(customer_id)

# Save the results to a CSV file
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_results.keys(),
    'Lookalikes': [str(v) for v in lookalike_results.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)

# Display sample output
print(lookalike_df.head())

  CustomerID                                         Lookalikes
0      C0001  [('C0194', 0.4049275311893231), ('C0104', 0.37...
1      C0002  [('C0030', 0.40461685378594076), ('C0091', 0.3...
2      C0003  [('C0181', 0.4775717980039305), ('C0134', 0.47...
3      C0004  [('C0070', 0.3519014889798192), ('C0175', 0.31...
4      C0005  [('C0096', 0.48745613929263704), ('C0023', 0.4...
