In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets to create features
transactions['TotalSpend'] = transactions['Quantity'] * transactions['Price']
data = transactions.groupby('CustomerID').agg({
    'TotalSpend': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Calculate similarity
similarity_matrix = cosine_similarity(data.drop('CustomerID', axis=1))

# Get recommendations for first 20 customers
lookalikes = {}
for i in range(20):
    scores = list(enumerate(similarity_matrix[i]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalikes[data['CustomerID'][i]] = [(data['CustomerID'][s[0]], s[1]) for s in scores]

# Save recommendations to a CSV
import csv
with open('Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'Recommendations'])
    for customer, recs in lookalikes.items():
        writer.writerow([customer, recs])
