In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [2]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
merged = transactions.merge(customers, on='CustomerID', how='left')

In [4]:
customer_profiles = merged.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': lambda x: x.mode()[0]  # Most frequent region
}).reset_index()


In [5]:
customer_profiles = pd.get_dummies(customer_profiles, columns=['Region'], drop_first=True)

In [6]:
scaler = StandardScaler()
customer_features = scaler.fit_transform(customer_profiles.drop('CustomerID', axis=1))


In [7]:
similarity_matrix = cosine_similarity(customer_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])


In [8]:
lookalike_recommendations = {}
first_20_customers = customer_profiles['CustomerID'][:20]

for cust_id in first_20_customers:
    similar_customers = similarity_df[cust_id].sort_values(ascending=False)[1:4]  # Top 3 excluding itself
    lookalike_recommendations[cust_id] = list(similar_customers.index), list(similar_customers.values)

In [9]:
recommendations_list = []
for cust_id, (similar_cust_ids, scores) in lookalike_recommendations.items():
    for similar_cust, score in zip(similar_cust_ids, scores):
        recommendations_list.append({'cust_id': cust_id, 'similar_cust_id': similar_cust, 'score': score})

recommendations_df = pd.DataFrame(recommendations_list)

In [10]:
recommendations_df.to_csv('Firstname_Lastname_Lookalike.csv', index=False)

print("Lookalike recommendations saved to Firstname_Lastname_Lookalike.csv.")


Lookalike recommendations saved to Firstname_Lastname_Lookalike.csv.
