In [7]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import csv

# Load the data
customers = pd.read_csv('data/Customers.csv')
products = pd.read_csv('data/Products.csv')
transactions = pd.read_csv('data/Transactions.csv')

# Merge the data to create customer profiles
merged_df = transactions.merge(customers, on='CustomerID', how='left')
merged_df = merged_df.merge(products, on='ProductID', how='left')

# Create features for each customer
customer_features = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'Category': lambda x: x.mode()[0]  # Most frequent category
}).rename(columns={
    'TotalValue': 'TotalSpending',
    'TransactionID': 'NumTransactions',
    'Category': 'PreferredCategory'
}).reset_index()

# Print the customer features
print(customer_features.head())

# Encode the PreferredCategory column using one-hot encoding
customer_features = pd.get_dummies(customer_features, columns=['PreferredCategory'])

# Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(customer_features.iloc[:, 1:])  # Exclude CustomerID
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_features['CustomerID'],
    columns=customer_features['CustomerID']
)

# Print similarity matrix (for first few rows)
print(similarity_df.head())

# Find the top 3 similar customers for each of the first 20 customers
lookalike_map = {}
for customer in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].sort_values(ascending=False).iloc[1:4]
    lookalike_map[customer] = list(zip(similar_customers.index, similar_customers.values))

# Print lookalike map
print(lookalike_map)

# Save the Lookalike Map results to a CSV file
with open('FirstName_LastName_Lookalike.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['CustomerID', 'Lookalikes'])
    for key, value in lookalike_map.items():
        writer.writerow([key, value])


  CustomerID  TotalSpending  NumTransactions PreferredCategory
0      C0001        3354.52                5       Electronics
1      C0002        1862.74                4          Clothing
2      C0003        2725.38                4        Home Decor
3      C0004        5354.88                8             Books
4      C0005        2034.24                3       Electronics
CustomerID  C0001  C0002  C0003  C0004  C0005     C0006     C0007     C0008  \
CustomerID                                                                    
C0001         1.0    1.0    1.0    1.0    1.0  1.000000  1.000000  1.000000   
C0002         1.0    1.0    1.0    1.0    1.0  0.999999  0.999999  1.000000   
C0003         1.0    1.0    1.0    1.0    1.0  1.000000  1.000000  1.000000   
C0004         1.0    1.0    1.0    1.0    1.0  1.000000  1.000000  1.000000   
C0005         1.0    1.0    1.0    1.0    1.0  1.000000  1.000000  0.999999   

CustomerID     C0009     C0010  ...  C0191  C0192     C0193  C0194  