In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

transactions = transactions.merge(products, on='ProductID', how='left')

customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': lambda x: x.nunique(),  
    'Category': lambda x: x.mode()[0] if len(x) > 0 else None  
}).reset_index()
customer_transactions.rename(columns={
    'TotalValue': 'TotalSpent',
    'Quantity': 'TotalQuantity',
    'ProductID': 'UniqueProducts',
    'Category': 'FavoriteCategory'
}, inplace=True)



In [4]:
customer_data = customers.merge(customer_transactions, on='CustomerID', how='left')

customer_data['TotalSpent'].fillna(0, inplace=True)
customer_data['TotalQuantity'].fillna(0, inplace=True)
customer_data['UniqueProducts'].fillna(0, inplace=True)
customer_data['FavoriteCategory'].fillna('Unknown', inplace=True)

customer_data = pd.get_dummies(customer_data, columns=['Region', 'FavoriteCategory'], drop_first=True)

scaler = MinMaxScaler()
features = customer_data.drop(columns=['CustomerID', 'CustomerName', 'TransactionDate'], errors='ignore')

if 'TransactionDate' in customer_data.columns:
    customer_data['TransactionDate'] = pd.to_datetime(customer_data['TransactionDate'], errors='coerce')
    features['TransactionDateNumeric'] = (customer_data['TransactionDate'] - pd.Timestamp('1970-01-01')).dt.days

features = pd.get_dummies(features, drop_first=True)

scaled_features = scaler.fit_transform(features)

scaled_features = scaler.fit_transform(features)

similarity_matrix = cosine_similarity(scaled_features)

lookalike_map = {}
customer_ids = customer_data['CustomerID'].values
for i, customer_id in enumerate(customer_ids[:20]):  
    similarity_scores = similarity_matrix[i]
    similar_indices = similarity_scores.argsort()[::-1][1:4]  
    similar_customers = [(customer_ids[j], similarity_scores[j]) for j in similar_indices]
    lookalike_map[customer_id] = similar_customers

lookalike_output = []
for cust_id, similar_list in lookalike_map.items():
    row = [cust_id]
    for similar_cust, score in similar_list:
        row.append((similar_cust, round(score, 4)))
    lookalike_output.append(row)

lookalike_df = pd.DataFrame(lookalike_output, columns=['CustomerID', 'Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.to_csv('Manjushree_Magesh_Lookalike.csv', index=False)
print("Manjushree_Magesh_Lookalike.csv has been generated successfully!")


Manjushree_Magesh_Lookalike.csv has been generated successfully!
