# Task 2: Lookalike Model

### 1. Import the library 

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

### 2. Load the datasets

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

### 3. Merge datasets for analysis

In [3]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

### 4. Prepare customer profiles

In [4]:
customer_features = merged_data.groupby('CustomerID').agg({
    'Region': 'first',
    'ProductID': lambda x: list(x),
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()

### 5. Encode categorical data (Region)

In [5]:
customer_features = pd.get_dummies(customer_features, columns=['Region'])


### 6. Transform product purchases into binary features

In [6]:
product_matrix = pd.get_dummies(merged_data[['CustomerID', 'ProductID']], columns=['ProductID'])
product_features = product_matrix.groupby('CustomerID').sum().reset_index()

### 7. Combine customer features and product features

In [7]:
combined_features = pd.merge(customer_features, product_features, on='CustomerID', how='inner')

### 8. Scale numerical features

In [8]:
scaler = StandardScaler()
numerical_cols = ['Quantity', 'TotalValue']
combined_features[numerical_cols] = scaler.fit_transform(combined_features[numerical_cols])

### 9. Compute similarity matrix

In [9]:
feature_matrix = combined_features.drop(columns=['CustomerID', 'ProductID']).set_index(combined_features['CustomerID'])
similarity_matrix = cosine_similarity(feature_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=feature_matrix.index, columns=feature_matrix.index)

### 10. Generate lookalike recommendations

In [10]:
lookalike_map = {}
for customer_id in feature_matrix.index:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_map[customer_id] = list(zip(similar_customers.index, similar_customers.values))

### 11. Filter for the first 20 customers

In [11]:
filtered_map = {cust_id: lookalike_map[cust_id] for cust_id in feature_matrix.index[:20]}

### 12. Save to Lookalike.csv

In [12]:
lookalike_df = pd.DataFrame([
    {'cust_id': cust_id, 'lookalikes': str(lookalikes)}
    for cust_id, lookalikes in filtered_map.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)

In [13]:
print("Lookalike recommendations saved to 'Lookalike.csv'.")

Lookalike recommendations saved to 'Lookalike.csv'.


In [14]:
df = pd.read_csv("Lookalike.csv")

In [15]:
df

Unnamed: 0,cust_id,lookalikes
0,C0001,"[('C0190', 0.49322853409460554), ('C0120', 0.3..."
1,C0002,"[('C0110', 0.4787020566267993), ('C0097', 0.47..."
2,C0003,"[('C0181', 0.5087183333495168), ('C0031', 0.40..."
3,C0004,"[('C0065', 0.5542858466644812), ('C0082', 0.49..."
4,C0005,"[('C0078', 0.6013720771239098), ('C0097', 0.60..."
5,C0006,"[('C0171', 0.5546482293033211), ('C0040', 0.38..."
6,C0007,"[('C0140', 0.668452101791043), ('C0078', 0.521..."
7,C0008,"[('C0109', 0.4049782890119175), ('C0059', 0.40..."
8,C0009,"[('C0060', 0.7514750519052317), ('C0014', 0.75..."
9,C0010,"[('C0132', 0.595598247411566), ('C0121', 0.511..."
