In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

In [8]:
def create_customer_profile(customer_id):
   
    customer_transactions = transactions[transactions['CustomerID'] == customer_id]
    total_purchases = customer_transactions['TotalValue'].sum()
    avg_purchase_value = customer_transactions['TotalValue'].mean()
    purchase_frequency = len(customer_transactions)
    
   
    product_details = customer_transactions.merge(products, on='ProductID')
    category_breakdown = product_details['Category'].value_counts(normalize=True)
    
    return {
        'total_purchases': total_purchases,
        'avg_purchase_value': avg_purchase_value,
        'purchase_frequency': purchase_frequency,
        **category_breakdown.to_dict()
    }

In [9]:
customer_profiles = {}
for cust_id in customers['CustomerID'][:20]:
    customer_profiles[cust_id] = create_customer_profile(cust_id)

In [10]:
profiles_df = pd.DataFrame.from_dict(customer_profiles, orient='index')
profiles_df = profiles_df.fillna(0)

In [11]:
scaler = StandardScaler()
normalized_profiles = scaler.fit_transform(profiles_df)

In [12]:
similarity_matrix = cosine_similarity(normalized_profiles)

In [13]:
def find_top_lookalikes(target_index, similarity_matrix, top_n=3):
    similarities = similarity_matrix[target_index]
    similar_indices = similarities.argsort()[::-1][1:top_n+1]
    return [(list(customer_profiles.keys())[idx], similarities[idx]) for idx in similar_indices]

In [14]:
lookalikes_dict = {}
for i, cust_id in enumerate(list(customer_profiles.keys())[:20]):
    lookalikes = find_top_lookalikes(i, similarity_matrix)
    lookalikes_dict[cust_id] = lookalikes

In [15]:
lookalike_data = []
for cust_id, lookalikes in lookalikes_dict.items():
    for similar_cust, score in lookalikes:
        lookalike_data.append({
            'SourceCustomer': cust_id,
            'SimilarCustomer': similar_cust,
            'SimilarityScore': score
        })

In [16]:
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Poornima M_Nerale_Lookalike.csv', index=False)

print("Lookalike Model Complete!")

Lookalike Model Complete!
