<a href="https://colab.research.google.com/github/sarique2003/Zeo_Tap/blob/main/Mohd_Sarique_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import requests
from google.colab import drive

# the json files are uploaded in drive and being used after mounting
drive.mount('/content/drive')
drive_path = '/content/drive/MyDrive/Zeotap'

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


customers = pd.read_csv(f"{drive_path}/Customers.csv")
products = pd.read_csv(f"{drive_path}/Products.csv")
transactions = pd.read_csv(f"{drive_path}/Transactions.csv")


merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spend
    'Quantity': 'sum',    # Total quantity purchased
    'Region': 'first',    # Region
    'SignupDate': 'first' # Signup date
}).reset_index()

# Calculate signup duration (in days)
customer_features['SignupDuration'] = (pd.to_datetime('today') - pd.to_datetime(customer_features['SignupDate'])).dt.days
customer_features = customer_features.drop(columns=['SignupDate'])
customer_features = pd.get_dummies(customer_features, columns=['Region'])

# Normalize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))
similarity_matrix = cosine_similarity(scaled_features)

# Function to get top 3 lookalikes
def get_lookalikes(customer_id, similarity_matrix, top_n=3):
    customer_index = customer_features[customer_features['CustomerID'] == customer_id].index[0]
    similarities = similarity_matrix[customer_index]
    top_indices = similarities.argsort()[-top_n-1:-1][::-1]  # Exclude the customer itself
    return [(customer_features.iloc[i]['CustomerID'], similarities[i]) for i in top_indices]

lookalike_map = {}
for customer_id in customer_features['CustomerID'].iloc[:20]:
    lookalike_map[customer_id] = get_lookalikes(customer_id, similarity_matrix)


lookalike_df = pd.DataFrame(lookalike_map.items(), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike Recommendations:")
print(lookalike_df)

Lookalike Recommendations:
   CustomerID                                         Lookalikes
0       C0001  [(C0184, 0.9953164048833796), (C0152, 0.986461...
1       C0002  [(C0159, 0.9762072067882096), (C0007, 0.976058...
2       C0003  [(C0190, 0.990316934348167), (C0181, 0.9672020...
3       C0004  [(C0113, 0.9848821475382803), (C0102, 0.981839...
4       C0005  [(C0159, 0.9981854734500303), (C0007, 0.989670...
5       C0006  [(C0126, 0.9908486592116308), (C0187, 0.957528...
6       C0007  [(C0159, 0.9927704645126596), (C0005, 0.989670...
7       C0008  [(C0018, 0.9455849910250068), (C0034, 0.936933...
8       C0009  [(C0119, 0.9831364766104133), (C0063, 0.973460...
9       C0010  [(C0019, 0.9913050237693206), (C0073, 0.976647...
10      C0011  [(C0107, 0.99212001597206), (C0001, 0.96995070...
11      C0012  [(C0013, 0.9804910159898064), (C0039, 0.972446...
12      C0013  [(C0012, 0.9804910159898064), (C0087, 0.976456...
13      C0014  [(C0063, 0.9805580562974943), (C0119, 0.964916..