In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load data
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge data to create customer profiles
merged_data = pd.merge(transactions, customers, on='CustomerID')

# Create customer profiles
profiles = merged_data.groupby(['CustomerID', 'Region']).agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# One-hot encode 'Region'
profiles = pd.get_dummies(profiles, columns=['Region'])

# Normalize features
scaler = StandardScaler()
scaled_profiles = scaler.fit_transform(profiles.drop('CustomerID', axis=1))

# Calculate cosine similarity
similarity = cosine_similarity(scaled_profiles)
similarity_df = pd.DataFrame(similarity, index=profiles['CustomerID'], columns=profiles['CustomerID'])

# Function to get top similar customers
def get_top_similar(customer_id, similarity_df, top_n=3):
    similar = similarity_df[customer_id].sort_values(ascending=False).iloc[1:top_n+1]
    return list(zip(similar.index, similar.values))

# Create lookalike map for the first 20 customers
lookalike_map = {customer_id: get_top_similar(customer_id, similarity_df) for customer_id in profiles['CustomerID'][:20]}

# Save results to CSV
lookalike_df = pd.DataFrame(lookalike_map.items(), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv('Shreesha_Bhat_Lookalike.csv', index=False)

print("Lookalike model completed and saved to 'Shreesha_Bhat_Lookalike.csv'.")

Lookalike model completed and saved to 'Shreesha_Bhat_Lookalike.csv'.
