<a href="https://colab.research.google.com/github/sanjaysanju673/demo2/blob/main/task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Task 2: Lookalike Model**

Build a Lookalike Model that takes a user's information as input and recommends 3 similar
customers based on their profile and transaction history. The model should:
● Use both customer and product information.
● Assign a similarity score to each recommended customer.

In [44]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np



In [4]:
customers = pd.read_csv('/Customers.csv')
products = pd.read_csv('/Products.csv')
transactions = pd.read_csv('/Transactions.csv')

In [5]:
customer_transactions = pd.merge(transactions, customers, on='CustomerID')
customer_products = pd.merge(customer_transactions, products, on='ProductID')

In [6]:
customer_transactions

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price,CustomerName,Region,SignupDate
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15
...,...,...,...,...,...,...,...,...,...,...
995,T00496,C0118,P037,2024-10-24 08:30:27,1,459.86,459.86,Jacob Holt,South America,2022-01-22
996,T00759,C0059,P037,2024-06-04 02:15:24,3,1379.58,459.86,Mrs. Kimberly Wright,North America,2024-04-07
997,T00922,C0018,P037,2024-04-05 13:05:32,4,1839.44,459.86,Tyler Haynes,North America,2024-09-21
998,T00959,C0115,P037,2024-09-29 10:16:02,2,919.72,459.86,Joshua Hamilton,Asia,2024-11-11


In [7]:
customer_products

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,CustomerName,Region,SignupDate,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15,ComfortLiving Bluetooth Speaker,Electronics,300.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,T00496,C0118,P037,2024-10-24 08:30:27,1,459.86,459.86,Jacob Holt,South America,2022-01-22,SoundWave Smartwatch,Electronics,459.86
996,T00759,C0059,P037,2024-06-04 02:15:24,3,1379.58,459.86,Mrs. Kimberly Wright,North America,2024-04-07,SoundWave Smartwatch,Electronics,459.86
997,T00922,C0018,P037,2024-04-05 13:05:32,4,1839.44,459.86,Tyler Haynes,North America,2024-09-21,SoundWave Smartwatch,Electronics,459.86
998,T00959,C0115,P037,2024-09-29 10:16:02,2,919.72,459.86,Joshua Hamilton,Asia,2024-11-11,SoundWave Smartwatch,Electronics,459.86


In [43]:
customer_profile = customer_products.groupby('CustomerID').agg(
    total_spend=('TotalValue', 'sum'),
    avg_spend=('TotalValue', 'mean'),
    num_transactions=('TransactionID', 'count'),
    most_bought_category=('Category', lambda x: x.mode()[0])
).reset_index()

In [9]:
customer_profile

Unnamed: 0,CustomerID,total_spend,avg_spend,num_transactions,most_bought_category
0,C0001,3354.52,670.904000,5,Electronics
1,C0002,1862.74,465.685000,4,Clothing
2,C0003,2725.38,681.345000,4,Home Decor
3,C0004,5354.88,669.360000,8,Books
4,C0005,2034.24,678.080000,3,Electronics
...,...,...,...,...,...
194,C0196,4982.88,1245.720000,4,Home Decor
195,C0197,1928.65,642.883333,3,Electronics
196,C0198,931.83,465.915000,2,Clothing
197,C0199,1979.28,494.820000,4,Electronics


In [15]:
scaler = StandardScaler()
customer_profile[['total_spend', 'avg_spend', 'num_transactions']] = scaler.fit_transform(
    customer_profile[['total_spend', 'avg_spend', 'num_transactions']])

In [24]:
profile_matrix = customer_profile.drop(['CustomerID', 'most_bought_category'], axis=1).values


In [31]:
print(profile_matrix.shape)

(199, 3)


In [33]:
similarity_matrix = cosine_similarity(profile_matrix)


In [34]:
print(similarity_matrix.shape)

(199, 199)


In [35]:
lookalike_recommendations = {}
for i in range(20):
    similarities = similarity_matrix[i]
    similar_customers = similarities.argsort()[-4:-1]
    lookalike_recommendations[customers.iloc[i]['CustomerID']] = [
        (customers.iloc[similar].CustomerID, similarities[similar]) for similar in similar_customers
    ]

In [38]:
lookalike_recommendations.keys()

dict_keys(['C0001', 'C0002', 'C0003', 'C0004', 'C0005', 'C0006', 'C0007', 'C0008', 'C0009', 'C0010', 'C0011', 'C0012', 'C0013', 'C0014', 'C0015', 'C0016', 'C0017', 'C0018', 'C0019', 'C0020'])

In [39]:
flattened_recommendations = []
for customer_id, lookalikes in lookalike_recommendations.items():
    # Ensure that each list contains 3 lookalikes, even if there are less than 3
    lookalikes = lookalikes[:3]  # Limit to top 3
    lookalike_data = [customer_id]
    for lookalike in lookalikes:
        lookalike_data.extend(lookalike)  # Add the lookalike ID and score
    # Pad with None if there are less than 3 lookalikes
    while len(lookalike_data) < 7:
        lookalike_data.append(None)
    flattened_recommendations.append(lookalike_data)


In [40]:
lookalike_df = pd.DataFrame(flattened_recommendations, columns=['CustomerID', 'Lookalike1', 'Score1', 'Lookalike2', 'Score2', 'Lookalike3', 'Score3'])


In [41]:
lookalike_df.to_csv('Lookalike.csv', index=False)


In [42]:
print(lookalike_df.head());

  CustomerID Lookalike1    Score1 Lookalike2    Score2 Lookalike3    Score3
0      C0001      C0121  0.993012      C0152  0.995658      C0137  0.999360
1      C0002      C0010  0.998831      C0198  0.998867      C0029  0.999638
2      C0003      C0144  0.999217      C0178  0.999565      C0005  0.999894
3      C0004      C0075  0.999288      C0021  0.999658      C0067  0.999991
4      C0005      C0063  0.999259      C0073  0.999495      C0003  0.999894


Save to the csv