# Lookalike Model for eCommerce Customers

This notebook builds a Lookalike Model that recommends similar customers based on their profile and transaction history.

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets to create a customer profile
customer_transactions = transactions.merge(customers, on='CustomerID')

# Create a feature set for similarity calculation
# Example features: Region, Total Transactions, Average Transaction Value
customer_features = customer_transactions.groupby('CustomerID').agg({
    'TransactionID': 'count',
    'TotalValue': 'mean',
    'Region': 'first'
}).reset_index()
customer_features.rename(columns={'TransactionID': 'TotalTransactions', 'TotalValue': 'AverageTransactionValue'}, inplace=True)

# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalTransactions', 'AverageTransactionValue']])

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Function to get lookalikes for a given customer
def get_lookalikes(customer_id, top_n=3):
    similar_customers = similarity_df[customer_id].nlargest(top_n + 1).iloc[1:]
    return similar_customers.index.tolist(), similar_customers.values.tolist()

# Get lookalikes for the first 20 customers
lookalike_results = {}
for customer_id in customer_features['CustomerID'][:20]:
    lookalikes, scores = get_lookalikes(customer_id)
    lookalike_results[customer_id] = list(zip(lookalikes, scores))

# Save the results to Lookalike.csv
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index')
lookalike_df.to_csv('Lookalike.csv', header=False)
lookalike_df.head()

Unnamed: 0,0,1,2
C0001,"(C0137, 0.9994457904895733)","(C0121, 0.9965362177760971)","(C0152, 0.995593121405886)"
C0002,"(C0032, 0.9997639757352742)","(C0077, 0.9995846525879665)","(C0083, 0.9995505871471825)"
C0003,"(C0005, 0.9999178039528952)","(C0178, 0.9993126788222454)","(C0144, 0.9986957495852484)"
C0004,"(C0067, 0.99999826278984)","(C0021, 0.9996825062957394)","(C0075, 0.9995236451520084)"
C0005,"(C0003, 0.9999178039528952)","(C0073, 0.9992013516230509)","(C0063, 0.9988524757169495)"
