In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
import csv

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
data = (
    transactions
    .merge(customers, on='CustomerID')
    .merge(products, on='ProductID')
)

In [5]:
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': ['sum', 'mean'],  
    'ProductID': 'nunique',        
    'Category': 'nunique'          
}).reset_index()

In [6]:
customer_features.columns = ['CustomerID', 'TotalSpend', 'AvgSpend', 'UniqueProducts', 'UniqueCategories']

In [7]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

In [9]:
similarity_matrix = cosine_similarity(scaled_features)

In [10]:
def get_top_similar_customers(customer_idx, similarity_matrix, top_n=3):
    """
    Get top N similar customers for a given customer index.
    Args:
        customer_idx (int): Index of the target customer.
        similarity_matrix (array): Precomputed cosine similarity matrix.
        top_n (int): Number of similar customers to return.
    Returns:
        List[Tuple]: List of (CustomerID, similarity_score) tuples.
    """
    similarities = similarity_matrix[customer_idx]
    similar_indices = np.argsort(similarities)[::-1][1:top_n + 1]
    return [(customer_features['CustomerID'].iloc[i], similarities[i]) for i in similar_indices]


In [11]:
recommendations = {}
for idx in range(20):
    customer_id = customer_features['CustomerID'].iloc[idx]
    recommendations[customer_id] = get_top_similar_customers(idx, similarity_matrix)


In [12]:
output_file = 'Sneha_Gupta_Lookalike.csv'
with open(output_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['cust_id', 'lookalikes'])  
    for cust_id, lookalikes in recommendations.items():
        lookalikes_str = "; ".join([f"({sim[0]}, {sim[1]:.4f})" for sim in lookalikes])
        writer.writerow([cust_id, lookalikes_str])

print(f"Lookalike.csv created: {output_file}")

Lookalike.csv created: Sneha_Gupta_Lookalike.csv


In [14]:
        print(f"Customer: {cust_id} - Lookalikes: {lookalikes_str}")


Customer: C0020 - Lookalikes: (C0097, 1.0000); (C0110, 1.0000); (C0078, 1.0000)
