 <h1 style=" font-family:calibri; color:black; font-size:250%; text-align:center; ">Lookalike Model</h1> 

<div style="border-radius:10px; padding: 5px; background-color: grey; font-size:120%; text-align:left">

<h3 align="left"><font color=black>Problem Statement:</font></h3>
Lookalike Model that takes a user's information as input and recommends 3 similar
 customers based on their profile and transaction history.

In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [26]:
customers.head()

Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07
3,C0004,Kathleen Rodriguez,South America,2022-10-09
4,C0005,Laura Weber,Asia,2022-08-15


In [28]:
products.head()

Unnamed: 0,ProductID,ProductName,Category,Price
0,P001,ActiveWear Biography,Books,169.3
1,P002,ActiveWear Smartwatch,Electronics,346.3
2,P003,ComfortLiving Biography,Books,44.12
3,P004,BookWorld Rug,Home Decor,95.69
4,P005,TechPro T-Shirt,Clothing,429.31


In [30]:
transactions.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68


In [32]:
# Merge datasets
transactions_products = transactions.merge(products, on='ProductID')
full_data = transactions_products.merge(customers, on='CustomerID')

In [34]:
# Feature Engineering
customer_features = full_data.groupby('CustomerID').agg({
    'Quantity': 'sum',  # Total purchases
    'Category': pd.Series.nunique,  # Unique product categories
    'TotalValue': 'mean',  # Average spending per transaction
    'SignupDate': lambda x: (pd.Timestamp('today') - pd.to_datetime(x.iloc[0])).days  # Tenure
}).reset_index()

In [36]:
customer_features.rename(columns={
    'Quantity': 'TotalPurchases',
    'Category': 'UniqueCategories',
    'TotalValue': 'AverageSpending',
    'SignupDate': 'TenureDays'
}, inplace=True)

In [38]:
# Standardize numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalPurchases', 'UniqueCategories', 'AverageSpending', 'TenureDays']])
customer_features[['TotalPurchases', 'UniqueCategories', 'AverageSpending', 'TenureDays']] = scaled_features

In [40]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

In [42]:
# Function to get top 3 lookalikes for a customer
def get_top_lookalikes(similarity_matrix, customer_index, n=3):
    scores = similarity_matrix[customer_index]
    top_indices = np.argsort(scores)[::-1][1:n+1]  # Skip self-match
    return [(customer_features.iloc[idx]['CustomerID'], scores[idx]) for idx in top_indices]

In [44]:
# Generate recommendations for the first 20 customers
lookalike_map = {}
for i in range(20):
    lookalikes = get_top_lookalikes(similarity_matrix, i, 3)
    lookalike_map[customer_features.iloc[i]['CustomerID']] = [
        {'cust_id': lookalike[0], 'score': round(lookalike[1], 4)} for lookalike in lookalikes
    ]

In [46]:
# Save lookalike recommendations to a separate DataFrame
lookalike_list = []
for cust_id, lookalikes in lookalike_map.items():
    for lookalike in lookalikes:
        lookalike_list.append({'CustomerID': cust_id, 'LookalikeID': lookalike['cust_id'], 'Score': lookalike['score']})

lookalike_df = pd.DataFrame(lookalike_list)


In [48]:
# Add Lookalikes to the original Customers dataset
customers['Lookalikes'] = customers['CustomerID'].map(lambda x: [
    {'cust_id': lookalike['cust_id'], 'score': lookalike['score']}
    for lookalike in lookalike_map.get(x, [])
])

In [50]:
lookalike_df.to_csv('Sreeyuktha_Ramesh_Lookalike.csv', index=False)