In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
# Merge datasets
transactions = transactions.merge(customers, on='CustomerID', how='left')
transactions = transactions.merge(products, on='ProductID', how='left')

In [4]:
# Feature Engineering: Create a customer-product interaction matrix
customer_product_matrix = transactions.pivot_table(
    index='CustomerID',
    columns='ProductID',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

In [5]:
# Standardize the matrix for similarity computation
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

In [6]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)

In [7]:
# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)

In [8]:
# Get top 3 lookalikes for the first 20 customers (CustomerID: C0001 - C0020)
lookalike_map = {}
for customer in customers['CustomerID'][:20]:
    if customer in similarity_df.index:
        # Sort other customers by similarity score (excluding the customer itself)
        similar_customers = similarity_df.loc[customer].sort_values(ascending=False)
        top_3 = similar_customers.iloc[1:4]  # Skip the first as it's the customer itself
        lookalike_map[customer] = [(cust_id, round(score, 2)) for cust_id, score in top_3.items()]

In [9]:
# Convert to Lookalike.csv format
lookalike_csv = []
for cust_id, lookalikes in lookalike_map.items():
    for lookalike_id, score in lookalikes:
        lookalike_csv.append({'cust_id': cust_id, 'lookalike_id': lookalike_id, 'score': score})

lookalike_df = pd.DataFrame(lookalike_csv)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'")

Lookalike recommendations saved to 'Lookalike.csv'
