In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import davies_bouldin_score



In [2]:
# Load datasets
customers = pd.read_csv("C:/Users/santh/Downloads/Customers.csv")
products = pd.read_csv('C:/Users/santh/Downloads/Products.csv')
transactions = pd.read_csv('C:/Users/santh/Downloads/Transactions.csv')


In [3]:
# Merge datasets
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


In [4]:

customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price_x': 'mean',
    'Category': lambda x: ','.join(x),  # Concatenate product categories
    'Region': 'first'  # Region is the same for a customer
}).reset_index()

# One-Hot Encoding for categorical features
categorical_features = ['Region']
encoder = OneHotEncoder()
encoded_cats = encoder.fit_transform(customer_features[categorical_features]).toarray()

numerical_features = ['TotalValue', 'Quantity', 'Price_x']
scaler = StandardScaler()
scaled_nums = scaler.fit_transform(customer_features[numerical_features])

feature_matrix = np.hstack([scaled_nums, encoded_cats])

similarity_matrix = cosine_similarity(feature_matrix)

lookalikes = {}
for i, customer_id in enumerate(customer_features['CustomerID'][:20]):
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: -x[1])[1:4]  # Exclude self (top 3 similar customers)
    lookalikes[customer_id] = [(customer_features['CustomerID'][j], round(score, 2)) for j, score in similarities]


lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalikes.keys()),
    'Lookalikes': [str(val) for val in lookalikes.values()]
})
lookalike_df.to_csv('C:/Users/Santh/Downloads/Lookalike.csv', index=False)
print("Lookalike.csv created.")





Lookalike.csv created.
