Task 2: Lookalike Model

Step 1: Data Preprocessing

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

customer_transactions = customers.merge(transactions.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),  # Total value of transactions for the customer
    TransactionCount=('TransactionID', 'count')  # Number of transactions
).reset_index(), on='CustomerID')

scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_transactions[['TotalSpend', 'TransactionCount']])

cosine_sim = cosine_similarity(scaled_features)

lookalike_dict = {}

for i in range(20):  # First 20 customers
    customer_id = customer_transactions.iloc[i]['CustomerID']
    
    similarity_scores = list(enumerate(cosine_sim[i]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    top_3_similar_customers = [(customer_transactions.iloc[j[0]]['CustomerID'], j[1]) for j in similarity_scores[1:4]]
    
    lookalike_dict[customer_id] = top_3_similar_customers

lookalike_data = []

for customer_id, similar_customers in lookalike_dict.items():
    for similar_customer in similar_customers:
        lookalike_data.append([customer_id, similar_customer[0], similar_customer[1]])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'Lookalike_CustomerID', 'Similarity_Score'])

lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike Model Created Successfully!")


Lookalike Model Created Successfully!
