In [39]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [40]:
customers_df = pd.read_csv('/content/Customers.csv')
products_df = pd.read_csv('/content/Products.csv')
transactions_df = pd.read_csv('/content/Transactions.csv')

In [41]:
data = transactions_df.merge(customers_df, on="CustomerID").merge(products_df, on="ProductID")

In [42]:
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': ['mean', 'sum'],
    'TransactionID': 'count',
    'Category': lambda x: x.mode()[0]
}).reset_index()

In [43]:
customer_features.columns = ['CustomerID', 'AvgSpending', 'TotalSpending', 'TotalTransactions', 'PreferredCategory']

In [44]:
region_encoded = pd.get_dummies(customers_df.set_index('CustomerID')['Region'], prefix='Region')
category_encoded = pd.get_dummies(customer_features.set_index('CustomerID')['PreferredCategory'], prefix='Category')

In [45]:
customer_profiles = customer_features.set_index('CustomerID').drop('PreferredCategory', axis=1)
customer_profiles = customer_profiles.merge(region_encoded, left_index=True, right_index=True).merge(category_encoded, left_index=True, right_index=True)

In [46]:
scaler = StandardScaler()
normalized_profiles = scaler.fit_transform(customer_profiles)

In [47]:
similarity_matrix = cosine_similarity(customer_profiles)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles.index, columns=customer_profiles.index)

In [48]:
lookalike_data = {}
for customer_id in customer_profiles.index[:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_data[customer_id] = list(zip(similar_customers.index, similar_customers.values))

In [49]:
lookalike_df = pd.DataFrame({
    "CustomerID": list(lookalike_data.keys()),
    "Lookalikes": [str(v) for v in lookalike_data.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)

In [50]:
lookalike_df.head(20)

Unnamed: 0,CustomerID,Lookalikes
0,C0001,"[('C0190', 0.999999982108667), ('C0048', 0.999..."
1,C0002,"[('C0029', 0.9999997125550519), ('C0142', 0.99..."
2,C0003,"[('C0052', 0.9999999896198248), ('C0158', 0.99..."
3,C0004,"[('C0021', 0.9999999659115772), ('C0101', 0.99..."
4,C0005,"[('C0186', 0.9999999713571794), ('C0007', 0.99..."
5,C0006,"[('C0168', 0.9999999873315506), ('C0117', 0.99..."
6,C0007,"[('C0115', 0.9999999765243283), ('C0005', 0.99..."
7,C0008,"[('C0175', 0.999999655630328), ('C0065', 0.999..."
8,C0009,"[('C0077', 0.9999987702506024), ('C0062', 0.99..."
9,C0010,"[('C0029', 0.9999996889595073), ('C0002', 0.99..."
