In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv(r"C:\Users\Dell\Downloads\Customers.csv")
products = pd.read_csv(r"C:\Users\Dell\Downloads\Products.csv")
transactions = pd.read_csv(r"C:\Users\Dell\Downloads\Transactions.csv")

# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

# Prepare customer profile matrix
customer_profile = merged_data.groupby("CustomerID")["TotalValue"].sum().reset_index()
customer_profile = customer_profile.merge(customers, on="CustomerID", how="left")

# Feature encoding (e.g., one-hot encode 'Region')
customer_profile_encoded = pd.get_dummies(customer_profile.drop(columns=["CustomerID", "CustomerName", "SignupDate"]))

# Scale data for similarity calculations
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_profile_encoded)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profile["CustomerID"], columns=customer_profile["CustomerID"])

# Generate top 3 lookalikes for the first 20 customers
lookalikes = {}
for customer_id in customer_profile["CustomerID"].head(20):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalikes[customer_id] = [(sim_id, round(score, 4)) for sim_id, score in similar_customers.items()]

# Save lookalikes to CSV
lookalike_df = pd.DataFrame({"CustomerID": list(lookalikes.keys()), "Lookalikes": list(lookalikes.values())})
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model completed. Results saved to 'Lookalike.csv'.")


Lookalike model completed. Results saved to 'Lookalike.csv'.
