<a href="https://colab.research.google.com/github/nisargaa07/zeotap/blob/main/Looklikemodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

# Merge transactions with products to include product details
transactions = transactions.merge(products, on='ProductID', how='left')

# Feature Engineering: Create a customer-product matrix
customer_product_matrix = transactions.pivot_table(
    index='CustomerID',
    columns='ProductID',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

# Normalize the matrix for better similarity computation
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# Compute similarity scores using cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)

# Convert similarity matrix to a DataFrame for easier handling
similarity_df = pd.DataFrame(similarity_matrix,
                             index=customer_product_matrix.index,
                             columns=customer_product_matrix.index)

# Function to get top 3 lookalikes for each customer
def get_top_lookalikes(similarity_df, top_n=3):
    lookalike_map = {}
    for customer_id in similarity_df.index[:20]:  # First 20 customers (C0001 - C0020)
        similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:top_n+1]
        # Store the top lookalikes
        lookalike_map[customer_id] = [(cust_id, score) for cust_id, score in zip(similar_customers.index, similar_customers.values)]
    return lookalike_map

# Get lookalikes
lookalike_map = get_top_lookalikes(similarity_df)

# Flatten the lookalike map into rows suitable for CSV
lookalike_list = []
for cust_id, lookalikes in lookalike_map.items():
    row = {'CustomerID': cust_id}
    for i, (similar_cust_id, score) in enumerate(lookalikes, 1):
        row[f'LookalikeID_{i}'] = similar_cust_id
        row[f'SimilarityScore_{i}'] = score
    lookalike_list.append(row)

# Convert the list of dictionaries into a DataFrame
lookalike_df = pd.DataFrame(lookalike_list)

# Save the lookalike map to a CSV file
lookalike_df.to_csv('Lookalike.csv', index=False)

# Confirmation and output
print("Lookalike map saved to 'Lookalike.csv'")
print(lookalike_df.head(20))  # Preview of the first few records in the DataFrame


Lookalike map saved to 'Lookalike.csv'
   CustomerID LookalikeID_1  SimilarityScore_1 LookalikeID_2  \
0       C0001         C0194           0.404928         C0104   
1       C0002         C0030           0.404617         C0091   
2       C0003         C0181           0.477572         C0134   
3       C0004         C0070           0.351901         C0175   
4       C0005         C0096           0.487456         C0023   
5       C0006         C0040           0.486909         C0178   
6       C0007         C0079           0.617442         C0118   
7       C0008         C0144           0.326751         C0169   
8       C0009         C0140           0.533441         C0083   
9       C0010         C0094           0.515064         C0092   
10      C0011         C0135           0.513283         C0120   
11      C0012         C0164           0.488257         C0158   
12      C0013         C0169           0.450756         C0092   
13      C0014         C0128           0.902943         C0159   
1

In [12]:
from google.colab import files

# Download the CSV file
files.download('Lookalike.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>