In [24]:
# import libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [25]:
# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [26]:
transactions = transactions.rename(columns={'Price': 'TransactionPrice'})

# Merge datasets to create a customer-product interaction matrix
customer_product_matrix = pd.merge(transactions, customers, on='CustomerID')
customer_product_matrix = pd.merge(customer_product_matrix, products, on='ProductID')

In [27]:
# Feature engineering
customer_product_matrix['TotalPrice'] = customer_product_matrix['Quantity'] * customer_product_matrix['Price']
customer_features = customer_product_matrix.groupby('CustomerID').agg(
    TotalSpent=('TotalPrice', 'sum'),
    AvgTransactionValue=('TotalPrice', 'mean'),
    TotalTransactions=('TransactionID', 'count'),
    UniqueProducts=('ProductID', 'nunique')
)


In [28]:
# Standardize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)


In [29]:
# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)


In [30]:
# Build the Lookalike Model
def get_lookalikes(customer_id, top_n=3):
    customer_index = customer_features.index.get_loc(customer_id)
    similarities = similarity_matrix[customer_index]
    sorted_indices = similarities.argsort()[::-1]
    top_indices = sorted_indices[1:top_n + 1]
    lookalikes = customer_features.iloc[top_indices].index.tolist()
    scores = similarities[top_indices].tolist()
    return lookalikes, scores


In [31]:
# Generate lookalikes for the first 20 customers
lookalike_results = {}
for customer_id in customers['CustomerID'][:20]:
    lookalikes, scores = get_lookalikes(customer_id)
    lookalike_results[customer_id] = list(zip(lookalikes, scores))


In [32]:
# Create Lookalike.csv
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index')
lookalike_df.to_csv('Lookalike.csv')