In [18]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [19]:
customers_file = 'Customers.csv'
products_file = 'Products.csv'
transactions_file = 'Transactions.csv'

In [20]:
customers = pd.read_csv(customers_file)
products = pd.read_csv(products_file)
transactions = pd.read_csv(transactions_file)

In [21]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'], errors='coerce')
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'], errors='coerce')


In [22]:
merged_data = transactions.merge(customers, on='CustomerID', how='left').merge(products, on='ProductID', how='left')

In [23]:
# Feature Engineering
customer_features = merged_data.groupby('CustomerID').agg(
    total_spending=('TotalValue', 'sum'),
    avg_spending=('TotalValue', 'mean'),
    num_transactions=('TransactionID', 'count'),
    unique_products=('ProductID', 'nunique'),
    region=('Region', 'first')
).reset_index()

In [24]:
customer_features = pd.get_dummies(customer_features, columns=['region'], drop_first=True)

In [25]:
scaler = StandardScaler()
numeric_features = ['total_spending', 'avg_spending', 'num_transactions', 'unique_products']
customer_features[numeric_features] = scaler.fit_transform(customer_features[numeric_features])


In [26]:
similarity_matrix = cosine_similarity(customer_features[numeric_features])

In [27]:
#  Generate Lookalike Recommendations
lookalike_map = {}

customer_ids = customer_features['CustomerID'][:20]

for i, cust_id in enumerate(customer_ids):

    customer_similarities = list(enumerate(similarity_matrix[i]))

    customer_similarities = sorted(customer_similarities, key=lambda x: x[1], reverse=True)[1:4]

    lookalike_map[cust_id] = [
        (customer_features['CustomerID'][j], round(score, 4)) for j, score in customer_similarities
    ]


In [28]:
lookalike_df = pd.DataFrame([
    {'cust_id': cust, 'lookalikes': str(lookalike_map[cust])} for cust in lookalike_map
])

lookalike_csv_path = 'Lookalike.csv'
lookalike_df.to_csv(lookalike_csv_path, index=False)

In [29]:
print(f"Lookalike recommendations saved to {lookalike_csv_path}")


Lookalike recommendations saved to Lookalike.csv
