In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
# Load datasets

In [6]:
customer_data = pd.read_csv("Customers.csv")
product_data = pd.read_csv("Products.csv")
transac_data = pd.read_csv("Transactions.csv")

In [10]:
# Data Cleaning and Merging

In [25]:
customer_data_cleaned = customer_data.drop(['CustomerName', 'SignupDate'], axis=1)
transaction_data_cleaned = transac_data.drop(['TransactionID', 'TransactionDate'], axis=1)

In [14]:
# Merge customer and transaction data

In [27]:
merged_data = pd.merge(customer_data_cleaned, transaction_data_cleaned, on='CustomerID')

In [18]:
# Feature Engineering: Group by CustomerID and aggregate features

In [34]:
customer_features = merged_data.groupby('CustomerID').agg({
    'Region': 'first',    # Categorical, we'll encode this
    'Quantity': 'sum',    # Total quantity purchased
    'TotalValue': 'sum',  # Total value of transactions
    'Price': 'mean'       # Average price per item
}).reset_index()

In [36]:
# Encoding categorical data (Region)

In [38]:
customer_features['Region'] = customer_features['Region'].astype('category').cat.codes

In [40]:
# Scaling numerical features

In [42]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])


In [44]:
# Calculate Cosine Similarity

In [46]:
similarity_matrix = cosine_similarity(scaled_features)

In [48]:
# Create a Lookalike Map for the first 20 customers (C0001 - C0020)

In [50]:
customer_ids = customer_features['CustomerID'].values
lookalike_map = {}

In [52]:
for idx, cust_id in enumerate(customer_ids[:20]):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    top_3_similar = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_map[cust_id] = [(customer_ids[sim_idx], round(score, 3)) for sim_idx, score in top_3_similar]


In [57]:

lookalike_data = []
for cust_id, recommendations in lookalike_map.items():
    lookalike_data.append({
        "CustomerID": cust_id,
        "Lookalikes": str(recommendations)  # Save as string representation
    })

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike.csv file has been created.")

Lookalike.csv file has been created.
