In [20]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

In [21]:

# Step 1: Load the dataset of customer csv 
customers = pd.read_csv(r"C:/Users/ramiu/OneDrive/GenAi/ZeoTap Internship task/Customers_EDA/Customers.csv")

# Step 2: load the dataset for product
products = pd.read_csv(r"C:/Users/ramiu/OneDrive/GenAi/ZeoTap Internship task/Products_EDA/Products.csv")

transactions = pd.read_csv(r'C:/Users/ramiu/OneDrive/GenAi/ZeoTap Internship task/Transactions_EDA/Transactions.csv')

In [24]:
customers.head()

Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07
3,C0004,Kathleen Rodriguez,South America,2022-10-09
4,C0005,Laura Weber,Asia,2022-08-15


In [26]:
products.head()

Unnamed: 0,ProductID,ProductName,Category,Price
0,P001,ActiveWear Biography,Books,169.3
1,P002,ActiveWear Smartwatch,Electronics,346.3
2,P003,ComfortLiving Biography,Books,44.12
3,P004,BookWorld Rug,Home Decor,95.69
4,P005,TechPro T-Shirt,Clothing,429.31


In [27]:

transactions.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,ComfortLiving Bluetooth Speaker,Electronics,300.68


In [22]:
# Step 1: Data Preparation
# Merge transactions with products to get product details
transactions = transactions.merge(products, on='ProductID')

In [28]:
# Step 2: Feature Engineering
# Create a summary of transactions for each customer
customer_summary = transactions.groupby('CustomerID').agg(
    total_spend=('Price_x', 'sum'),
    purchase_count=('TransactionID', 'count'),
    unique_products=('ProductID', 'nunique')
).reset_index()

In [29]:
# Merge customer profiles with transaction summaries
customer_data = customers.merge(customer_summary, on='CustomerID', how='left').fillna(0)

In [30]:
# Step 3: Normalize Features
features = customer_data[['total_spend', 'purchase_count', 'unique_products']]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [31]:
# Step 4: Calculate Similarity Scores
similarity_matrix = cosine_similarity(features_scaled)

In [32]:
# Step 5: Generate Recommendations
lookalike_recommendations = {}

for index, row in customer_data.iterrows():
    cust_id = row['CustomerID']
    # Get similarity scores for the current customer
    scores = similarity_matrix[index]
    
    # Create a DataFrame of customer IDs and their similarity scores
    similar_customers = pd.DataFrame({
        'CustomerID': customer_data['CustomerID'],
        'SimilarityScore': scores
    })
    
    # Exclude the current customer and get top 3 lookalikes
    top_lookalikes = similar_customers[similar_customers['CustomerID'] != cust_id].nlargest(3, 'SimilarityScore')
    
    # Store the results
    lookalike_recommendations[cust_id] = top_lookalikes

In [34]:
# Step 6: Prepare Lookalike.csv
lookalike_records = []

for cust_id, lookalikes in lookalike_recommendations.items():
    if len(lookalikes) == 3:
        lookalike_records.append({
            'cust_id': cust_id,
            'lookalike_1': lookalikes.iloc[0]['CustomerID'],
            'score_1': lookalikes.iloc[0]['SimilarityScore'],
            'lookalike_2': lookalikes.iloc[1]['CustomerID'],
            'score_2': lookalikes.iloc[1]['SimilarityScore'],
            'lookalike_3': lookalikes.iloc[2]['CustomerID'],
            'score_3': lookalikes.iloc[2]['SimilarityScore']
        })

# Convert the list of records to a DataFrame
lookalike_df = pd.DataFrame(lookalike_records)

# Save to Lookalike.csv
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations have been saved to Lookalike.csv.")

Lookalike recommendations have been saved to Lookalike.csv.
