In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [None]:
# Load datasets
customers = pd.read_csv(r"E:\assignment zeotap\all_tasks\Customers.csv")
transactions = pd.read_csv(r"E:\assignment zeotap\all_tasks\Transactions.csv")

In [None]:
# Feature Engineering: Aggregate transaction data per customer
customer_spending = transactions.groupby("CustomerID").agg(
    TotalSpending=("TotalValue", "sum"),
    AvgPurchaseValue=("TotalValue", "mean"),
    PurchaseFrequency=("TransactionID", "count")
).reset_index()

In [None]:
# Merge customer profiles with transaction history
customer_data = customers.merge(customer_spending, on="CustomerID", how="left").fillna(0)

In [None]:
# Select numerical features for similarity
features = ["TotalSpending", "AvgPurchaseValue", "PurchaseFrequency"]

In [None]:
# Normalize data for distance calculation
scaler = StandardScaler()
customer_data_scaled = scaler.fit_transform(customer_data[features])

In [None]:
# Apply K-Nearest Neighbors (KNN) for similarity search
knn = NearestNeighbors(n_neighbors=4, metric="euclidean")  # 4 to include self
knn.fit(customer_data_scaled)

In [None]:
# Find similar customers for C0001 - C0020
lookalike_results = []
for i in range(20):  # First 20 customers
    customer_id = customer_data.iloc[i]["CustomerID"]
    distances, indices = knn.kneighbors([customer_data_scaled[i]])
    
    # Get top 3 similar customers (excluding self)
    similar_customers = customer_data.iloc[indices[0][1:4]][["CustomerID"]].values.flatten()
    similarity_scores = 1 / (1 + distances[0][1:4])  # Convert distance to similarity score
    
    # Store in results
    lookalike_results.append({
        "CustomerID": customer_id,
        "Lookalikes": list(zip(similar_customers, similarity_scores))
    })

In [None]:
# Save results as Lookalike.csv
lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df.to_csv("E:/assignment zeotap/all_tasks/Lookalike.csv", index=False)

In [None]:
print("Lookalike model completed! Results saved to Lookalike.csv.")