In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
c = pd.read_csv("Customers.csv")
p = pd.read_csv("Products.csv")
t = pd.read_csv("Transactions.csv")

print(c.head())
print(p.head())
print(t.head())
t = t.merge(p, on='ProductID', how='left')
t = t.merge(c, on='CustomerID', how='left')

customer_summary = t.groupby('CustomerID').agg({
    'TotalValue': 'sum',                
    'TransactionID': 'count',          
    'ProductID': 'nunique',          
    'Quantity': 'sum'                  
}).reset_index()


customer_summary = customer_summary.merge(c[['CustomerID', 'Region']], on='CustomerID', how='left')
customer_summary = pd.get_dummies(customer_summary, columns=['Region'], drop_first=True)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_summary.drop('CustomerID', axis=1))
similarity_matrix = cosine_similarity(scaled_features)
lookalike_results = {}
for i, customer_id in enumerate(customer_summary['CustomerID']):
    similarity_scores = similarity_matrix[i]
    similar_c = sorted(
        [(customer_summary['CustomerID'][j], score) for j, score in enumerate(similarity_scores) if j != i],
        key=lambda x: x[1],
        reverse=True
    )
    lookalike_results[customer_id] = similar_c[:3]
lookalike_data = []
for customer_id, lookalikes in lookalike_results.items():
    for similar_customer, score in lookalikes:
        lookalike_data.append([customer_id, similar_customer, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'SimilarCustomerID', 'SimilarityScore'])
lookalike_df_subset = lookalike_df[lookalike_df['CustomerID'].isin(c['CustomerID'][:20])]
lookalike_df_subset.to_csv("ESWARI_LAKSHMI_SUSHMITHA_PULI_Lookalike.csv", index=False)
print("Lookalike results saved to 'ESWARI_LAKSHMI_SUSHMITHA_PULI_Lookalike.csv'.")
print(lookalike_df_subset.head())


  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3       