# **Task 2: Lookalike Model**

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np

In [3]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [4]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

## Aggregate transaction data of each customer

In [14]:
customer_data = df.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum",
    "Price": "mean"
}).reset_index()

## Data Normalization

In [7]:
scaler = StandardScaler()
customer_data_scaled = scaler.fit_transform(customer_data.iloc[:, 1:])

## Computing similarity scores

In [8]:
similarity_matrix = cosine_similarity(customer_data_scaled)

## Creating a dictionary for top 5 similar customers per use

In [15]:
similar_customers = {}
for i, cust_id in enumerate(customer_data["CustomerID"][:20]):  # First 20 customers
    scores = list(enumerate(similarity_matrix[i]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:6]  # Top 5 excluding self
    similar_customers[cust_id] = [(customer_data["CustomerID"][idx], round(score, 3)) for idx, score in scores]



## Saving as CSV file

In [16]:
similar_df = pd.DataFrame([(key, v[0][0], v[0][1], v[1][0], v[1][1],
                            v[2][0], v[2][1], v[3][0], v[3][1], v[4][0], v[4][1])
                           for key, v in similar_customers.items()],
                          columns=["CustomerID", "Similar1", "Score1", "Similar2", "Score2",
                                   "Similar3", "Score3", "Similar4", "Score4", "Similar5", "Score5"])

similar_df.to_csv("Navitha_Abhinaya_Lookalike.csv", index=False)
