In [41]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import numpy as np

In [42]:
# Merge datasets
transactions_products = transactions.merge(products, on="ProductID", how="left")
customer_transactions = transactions_products.merge(customers, on="CustomerID", how="left")

In [43]:
# Aggregate features for customer similarity
customer_features = customer_transactions.groupby("CustomerID").agg(
    total_spend=pd.NamedAgg(column="TotalValue", aggfunc="sum"),
    transaction_count=pd.NamedAgg(column="TransactionID", aggfunc="count"),
    avg_transaction_value=pd.NamedAgg(column="TotalValue", aggfunc="mean"),
    region=pd.NamedAgg(column="Region", aggfunc="first")
).reset_index()

In [44]:
# One-hot encode regions
customer_features = pd.get_dummies(customer_features, columns=["region"], prefix="region")

In [45]:
# Normalize numerical features
scaler = StandardScaler()
numerical_features = ["total_spend", "transaction_count", "avg_transaction_value"]
normalized_features = scaler.fit_transform(customer_features[numerical_features])

In [46]:
# Combine normalized features with one-hot encoded regions
X = pd.concat(
    [pd.DataFrame(normalized_features, columns=numerical_features), 
     customer_features.filter(like="region_").reset_index(drop=True)], 
    axis=1
)

In [47]:
# Calculate cosine similarity
similarity_matrix = cosine_similarity(X)

In [48]:
# Generate lookalike recommendations for the first 20 customers
lookalike_map = {}
customer_ids = customer_features["CustomerID"].values

for i, customer_id in enumerate(customer_ids[:20]):
    lookalike_map[customer_id] = get_top_n_similar_customers(i, similarity_matrix, n=3)

In [49]:
 # Function to get top n similar customers
def get_top_n_similar_customers(index, similarity_matrix, n=3):
    similarities = similarity_matrix[index]
    similar_indices = similarities.argsort()[-(n + 1):][::-1][1:]  # Exclude self
    return [(customer_ids[idx], similarities[idx]) for idx in similar_indices]

In [50]:
# Generate lookalike recommendations for the first 20 customers
lookalike_map = {}
for i, customer_id in enumerate(customer_ids[:20]):
    lookalike_map[customer_id] = get_top_n_similar_customers(i, similarity_matrix, n=3)

In [51]:
# Display the lookalike recommendations
for cust_id, lookalikes in lookalike_map.items():
    print(f"CustomerID: {cust_id}")
    for similar_cust_id, score in lookalikes:
        print(f"  SimilarCustomerID: {similar_cust_id}, SimilarityScore: {score:.2f}")
    print()

CustomerID: C0001
  SimilarCustomerID: C0137, SimilarityScore: 1.00
  SimilarCustomerID: C0152, SimilarityScore: 1.00
  SimilarCustomerID: C0107, SimilarityScore: 0.96

CustomerID: C0002
  SimilarCustomerID: C0043, SimilarityScore: 0.98
  SimilarCustomerID: C0142, SimilarityScore: 0.98
  SimilarCustomerID: C0088, SimilarityScore: 0.95

CustomerID: C0003
  SimilarCustomerID: C0133, SimilarityScore: 0.99
  SimilarCustomerID: C0052, SimilarityScore: 0.98
  SimilarCustomerID: C0112, SimilarityScore: 0.94

CustomerID: C0004
  SimilarCustomerID: C0108, SimilarityScore: 0.98
  SimilarCustomerID: C0113, SimilarityScore: 0.98
  SimilarCustomerID: C0165, SimilarityScore: 0.97

CustomerID: C0005
  SimilarCustomerID: C0159, SimilarityScore: 1.00
  SimilarCustomerID: C0123, SimilarityScore: 0.98
  SimilarCustomerID: C0186, SimilarityScore: 0.97

CustomerID: C0006
  SimilarCustomerID: C0168, SimilarityScore: 0.98
  SimilarCustomerID: C0158, SimilarityScore: 0.97
  SimilarCustomerID: C0171, Similarit

In [52]:
# Prepare the output for CSV
output_data = []
for cust_id, lookalikes in lookalike_map.items():
    for similar_cust_id, score in lookalikes:
        output_data.append({"CustomerID": cust_id, "SimilarCustomerID": similar_cust_id, "SimilarityScore": score})

In [53]:
# Save the lookalikes to a CSV file
output_df = pd.DataFrame(output_data)
output_file = "Lookalike.csv"
output_df.to_csv(output_file, index=False)

In [54]:
# Save the output to a CSV file
output_df = pd.DataFrame(output_data)

In [55]:
# Specify the path where you want to save the file (adjust as needed)
output_file_path = "C:/OUTPUTLA/Lookalike.csv"  # Replace with your desired path

In [56]:
output_df.to_csv(output_file_path, index=False)

print(f"Lookalike recommendations saved to {output_file_path}.")

Lookalike recommendations saved to C:/OUTPUTLA/Lookalike.csv.
