In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from google.colab import files
uploaded = files.upload()
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')
merged = transactions.merge(customers, on='CustomerID', how='inner').merge(products, on='ProductID', how='inner')
print(merged.columns)
if 'Price' in merged.columns:
    customer_features = merged.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'Price': 'mean',
        'Region': 'first'
    }).reset_index()
else:
    print("Warning: 'Price' column not found. Adjusting aggregation.")
    customer_features = merged.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'Region': 'first'
    }).reset_index()

customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(scaled_features)

def recommend_lookalikes(customer_id, top_n=3):
    customer_index = customer_features[customer_features['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_similar = similarity_scores[1:top_n + 1]
    return [(customer_features.iloc[i[0]]['CustomerID'], i[1]) for i in top_similar]

lookalike_results = {}
for customer_id in customer_features['CustomerID'][:20]:
    lookalike_results[customer_id] = recommend_lookalikes(customer_id)
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalikes}
    for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv('Keesari_Saiakhil_Lookalike.csv', index=False)
print(lookalike_df)
from google.colab import files
files.download('Keesari_Saiakhil_Lookalike.csv')




Saving Customers.csv to Customers (6).csv
Saving Products.csv to Products (6).csv
Saving Transactions.csv to Transactions (6).csv
Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')
   CustomerID                                         Lookalikes
0       C0001  [(C0107, 0.9964159076264131), (C0137, 0.995699...
1       C0002  [(C0088, 0.9961371521325482), (C0142, 0.988231...
2       C0003  [(C0147, 0.9977417997931421), (C0190, 0.996976...
3       C0004  [(C0113, 0.9943242911409664), (C0102, 0.979222...
4       C0005  [(C0186, 0.9969881420656154), (C0159, 0.996467...
5       C0006  [(C0048, 0.9934351014317927), (C0126, 0.991102...
6       C0007  [(C0146, 0.9999943827772282), (C0178, 0.994386...
7       C0008  [(C0018, 0.9837389614522309), (C0122, 0.961298...
8       C0009  [(C0198, 0.9999826095098449), (C0014, 0.996597..

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>