In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [4]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")


In [5]:
np.random.seed(42)
product_ids = products_df['ProductID'].unique()
customer_preferences = pd.DataFrame(
    np.random.rand(len(customers_df), len(product_ids)),
    columns=product_ids,
    index=customers_df['CustomerID']
)

customer_features = pd.get_dummies(customers_df.set_index('CustomerID')['Region'])
customer_data = pd.concat([customer_features, customer_preferences], axis=1)

In [6]:
similarity_matrix = cosine_similarity(customer_data)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data.index, columns=customer_data.index)


In [11]:
lookalike_map = {}
for customer_id in customers_df['CustomerID'][:20]:
    similarities = similarity_df.loc[customer_id].drop(customer_id).sort_values(ascending=False)
    top_3 = similarities.head(3).to_dict()
    lookalike_map[customer_id] = [{"cust_id": k, "score": v} for k, v in top_3.items()]

In [8]:
lookalike_df = pd.DataFrame({
    "cust_id": lookalike_map.keys(),
    "lookalikes": [str(v) for v in lookalike_map.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)


print("Lookalike Map for the first 20 customers:")
print(lookalike_df.head())


Lookalike Map for the first 20 customers:
  cust_id                                         lookalikes
0   C0001  [{'cust_id': 'C0151', 'score': 0.8297433031394...
1   C0002  [{'cust_id': 'C0161', 'score': 0.8038080606351...
2   C0003  [{'cust_id': 'C0148', 'score': 0.8366096745345...
3   C0004  [{'cust_id': 'C0012', 'score': 0.8226788659845...
4   C0005  [{'cust_id': 'C0007', 'score': 0.8001205105497...
