In [1]:
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Step 1: Load the datasets
customers = pd.read_csv("C:/Users/NELAKURTHI MANOJ/Downloads/Customers.csv")
products = pd.read_csv("C:/Users/NELAKURTHI MANOJ/Downloads/Products.csv")
transactions = pd.read_csv("C:/Users/NELAKURTHI MANOJ/Downloads/Transactions.csv")

In [3]:
# Renaming columns
products.rename(columns={'Price': 'Products_Price'}, inplace=True)
transactions.rename(columns={'Price': 'Transactions_Price'}, inplace=True)

In [4]:
# Clean and preprocess data
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

# Merge data for analysis
merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')

In [5]:
# Aggregate features for similarity calculation
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Transactions_Price': 'mean'
}).reset_index()

In [6]:
# Normalize features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

In [7]:
# Calculate similarity
similarity_matrix = cosine_similarity(scaled_features)

In [8]:
# Recommend top 3 similar customers
customer_ids = customer_features['CustomerID'].values
recommendations = {}

for i, customer_id in enumerate(customer_ids):
    similar_indices = np.argsort(-similarity_matrix[i])[:4]  # Top 4 (including self)
    similar_customers = [(customer_ids[j], similarity_matrix[i][j]) for j in similar_indices if j != i]
    recommendations[customer_id] = similar_customers[:3]

In [9]:
# Save recommendations to CSV
# Filter recommendations to include only CustomerID C0001 - C0020
filtered_recommendations = {k: v for k, v in recommendations.items() if k in [f'C{str(i).zfill(4)}' for i in range(1, 21)]}

with open('Lookalike.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['CustomerID', 'Recommendations'])
    for customer_id, recs in filtered_recommendations.items():
        writer.writerow([customer_id, recs])