In [16]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Loading the datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Step 2: Merging of customer and transaction data
customer_transactions = transactions.merge(customers, on='CustomerID')

# Step 3: Creating customer profiles
customer_profiles = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'ProductID': 'nunique'
}).reset_index()

# Renaming of columns for clarity
customer_profiles.columns = ['CustomerID', 'TotalSpent', 'TransactionCount', 'UniqueProducts']

# Merging with customer demographics to include region information
customer_profiles = customer_profiles.merge(customers[['CustomerID', 'Region']], on='CustomerID')

# Step 4: Preparing features for similarity calculation
features = pd.get_dummies(customer_profiles[['Region']], drop_first=True)
features = pd.concat([customer_profiles[['TotalSpent', 'TransactionCount', 'UniqueProducts']], features], axis=1)

# Step 5: Scaling the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Step 6: Calculating cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Step 7: Creating a dictionary to store recommendations
recommendations = {}

# Step 8: Get the top 3 lookalikes for the first 20 customers
for i in range(20):
    customer_id = customer_profiles['CustomerID'].iloc[i]
    similarity_scores = list(enumerate(similarity_matrix[i]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    recommendations[customer_id] = [(customer_profiles['CustomerID'].iloc[j], score) for j, score in sorted_scores]

# Step 9: Preparing data for CSV output
lookalike_data = []
for cust_id, lookalikes in recommendations.items():
    for lookalike_id, score in lookalikes:
        lookalike_data.append({'CustomerID': cust_id, 'LookalikeID': lookalike_id, 'SimilarityScore': score})

# Step 10: Creating DataFrame and saving to CSV
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Lookalike.csv', index=False)

# Step 11: Downloading the file
from google.colab import files
files.download('Lookalike.csv')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Lookalike recommendations saved to Lookalike.csv and ready for download.
