In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

# Aggregate customer transaction data
customer_aggregated = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'TransactionID': 'count'
}).reset_index()

# Merge customer and product data into transaction-level data
transactions = transactions.merge(products, on='ProductID', how='left')
customer_aggregated = customers.merge(customer_aggregated, on='CustomerID', how='left')
customer_aggregated.fillna(0, inplace=True)

# Preprocess data
numeric_features = customer_aggregated.drop(columns=['CustomerID', 'CustomerName', 'Region', 'SignupDate'])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(numeric_features)

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalike recommendations
lookalike_dict = {}
for idx, customer_id in enumerate(customer_aggregated['CustomerID'][:20]):  # Top 20 customers
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get top 3 most similar customers excluding the customer itself
    top_lookalikes = [
        (customer_aggregated['CustomerID'][sim[0]], sim[1])
        for sim in similarity_scores[1:4]
    ]
    lookalike_dict[customer_id] = top_lookalikes

# Create Lookalike.csv
lookalike_rows = []
for cust_id, lookalikes in lookalike_dict.items():
    for lookalike_id, score in lookalikes:
        lookalike_rows.append({'CustomerID': cust_id, 'LookalikeID': lookalike_id, 'Score': score})

lookalike_df = pd.DataFrame(lookalike_rows)
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

print("Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'.")
