In [None]:
import pandas as pd

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Display first few rows of each
print("Customers Data:")
print(customers.head())

print("\nProducts Data:")
print(products.head())

print("\nTransactions Data:")
print(transactions.head())




In [None]:
# Merge datasets
combined_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Display merged data
print(combined_data.head())


In [24]:
# Total spending
customer_spending = combined_data.groupby('CustomerID')['TotalValue'].sum().reset_index(name='TotalSpending')

# Average transaction value
avg_transaction_value = combined_data.groupby('CustomerID')['TotalValue'].mean().reset_index(name='AvgTransactionValue')

# Most frequent product category
most_freq_category = combined_data.groupby(['CustomerID', 'Category']).size().reset_index(name='Count')
most_freq_category = most_freq_category.sort_values('Count', ascending=False).drop_duplicates('CustomerID')[['CustomerID', 'Category']]


In [None]:
# Merge features
customer_features = customers.merge(customer_spending, on='CustomerID') \
                              .merge(avg_transaction_value, on='CustomerID') \
                              .merge(most_freq_category, on='CustomerID')

print(customer_features.head())


In [26]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Normalize numerical features
scaler = MinMaxScaler()
customer_features[['TotalSpending', 'AvgTransactionValue']] = scaler.fit_transform(
    customer_features[['TotalSpending', 'AvgTransactionValue']]
)

# Define similarity function
def calculate_similarity(row1, row2):
    # Numerical features: Spending and transaction value
    spending_similarity = 1 - abs(row1['TotalSpending'] - row2['TotalSpending'])
    transaction_similarity = 1 - abs(row1['AvgTransactionValue'] - row2['AvgTransactionValue'])
    
    # Categorical features: Category and Region
    category_similarity = 1 if row1['Category'] == row2['Category'] else 0
    region_similarity = 1 if row1['Region'] == row2['Region'] else 0

    # Combine with weights
    similarity_score = (
        0.4 * spending_similarity +
        0.4 * transaction_similarity +
        0.1 * category_similarity +
        0.1 * region_similarity
    )
    return similarity_score


In [27]:
# Create recommendations
lookalike_recommendations = {}

for i, customer1 in customer_features.iterrows():
    if i >= 20:  # Limit to first 20 customers
        break
    
    similarities = []
    for j, customer2 in customer_features.iterrows():
        if customer1['CustomerID'] != customer2['CustomerID']:  # Exclude self-comparison
            score = calculate_similarity(customer1, customer2)
            similarities.append((customer2['CustomerID'], score))
    
    # Sort by similarity score and get top 3
    top_3 = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]
    lookalike_recommendations[customer1['CustomerID']] = top_3

# Print recommendations
print(lookalike_recommendations)
# 

{'C0001': [('C0107', 0.972456000941437), ('C0118', 0.9652334714915587), ('C0190', 0.9620170911274826)], 'C0002': [('C0043', 0.9438212740292493), ('C0128', 0.9333549465303792), ('C0106', 0.9199558034498942)], 'C0003': [('C0133', 0.9811865197861422), ('C0152', 0.9737109578473857), ('C0052', 0.9719046505293275)], 'C0004': [('C0126', 0.9406710796635684), ('C0153', 0.9378100412789967), ('C0165', 0.9243983273164496)], 'C0005': [('C0146', 0.9683304327541126), ('C0186', 0.9604114029485457), ('C0007', 0.920767620031796)], 'C0006': [('C0171', 0.9563696248017928), ('C0153', 0.894683290341701), ('C0126', 0.891822251957129)], 'C0007': [('C0146', 0.9296256074027667), ('C0005', 0.920767620031796), ('C0115', 0.9190568582076355)], 'C0008': [('C0024', 0.9463247361728941), ('C0194', 0.9443177655528474), ('C0047', 0.9415795668051192)], 'C0009': [('C0111', 0.928469532280304), ('C0010', 0.9269041467134419), ('C0062', 0.8920913017127197)], 'C0010': [('C0111', 0.979138404544274), ('C0062', 0.959294888457491),

In [28]:
# Save to CSV
import csv

with open('Pranav_Pandey_Lookalike.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'Lookalikes'])

    for customer, recommendations in lookalike_recommendations.items():
        writer.writerow([
            customer,
            [(rec[0], round(rec[1], 2)) for rec in recommendations]  # Round similarity scores
        ])
