In [None]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# 2. Load Datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

In [None]:
# 3. Data Preprocessing
# Merge datasets
#TotalSpend: Sum of TotalValue for all transactions.
#AverageOrderValue: TotalSpend / Number of Transactions.
#ProductCategoryPreferences: Frequency of purchased categories.
merged_data = transactions.merge(customers, on="CustomerID", how="left") \
                          .merge(products, on="ProductID", how="left")

In [None]:
# 3. Data Preprocessing
# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID", how="left") \
                          .merge(products, on="ProductID", how="left")

# Feature engineering
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),
    AvgOrderValue=('TotalValue', 'mean'),
    NumTransactions=('TransactionID', 'count'),
    Region=('Region', 'first')
).reset_index()

# One-hot encode categorical variables (e.g., Region)
customer_features = pd.get_dummies(customer_features, columns=['Region'])


In [None]:
# Normalize numerical features
scaler = MinMaxScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

In [None]:

# ======================
# 4. Similarity Calculation
# ======================
# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_features_scaled)

# Create a DataFrame to store similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

In [None]:
# 5. Generate Recommendations
# ======================
# Function to get top N similar customers
def get_top_similar(customer_id, top_n=3):
    scores = similarity_df.loc[customer_id].sort_values(ascending=False)[1:top_n+1]
    return list(zip(scores.index, scores.values))


In [None]:
# Generate recommendations for CustomerID: C0001 to C0020
lookalike_results = {}
for customer_id in customer_features['CustomerID'][:20]:
    lookalike_results[customer_id] = get_top_similar(customer_id)

# Save recommendations to Lookalike.csv
lookalike_data = [{"cust_id": cust_id, "recommendations": str(recommendations)}
                  for cust_id, recommendations in lookalike_results.items()]
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("Lookalike.csv", index=False)

In [None]:
# 6. Output Example
# ======================
print(lookalike_df.head())

  cust_id                                    recommendations
0   C0001  [('C0137', 0.9999955257915722), ('C0152', 0.99...
1   C0002  [('C0142', 0.9975354915923277), ('C0088', 0.99...
2   C0003  [('C0133', 0.9996544462069554), ('C0052', 0.99...
3   C0004  [('C0113', 0.9988541163061573), ('C0102', 0.99...
4   C0005  [('C0159', 0.9999642002884194), ('C0186', 0.99...
