<a href="https://colab.research.google.com/github/naveennekkanti1/Zeotap/blob/main/Nekkenti_DurgaNaveen_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("/content/drive/MyDrive/Zetop/Customers.csv")
products = pd.read_csv("/content/drive/MyDrive/Zetop/Products.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Zetop/Transactions.csv")

# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Aggregate data for feature generation
customer_transactions = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum"
}).reset_index()

# Merge aggregated data with customer profiles
customer_data = customers.merge(customer_transactions, on="CustomerID", how="left").fillna(0)

# Encode categorical variables (e.g., Region)
customer_data_encoded = pd.get_dummies(customer_data, columns=["Region"], drop_first=True)

# Feature matrix preparation
features = customer_data_encoded.drop(columns=["CustomerID", "CustomerName", "SignupDate"])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Get customer IDs
customer_ids = customer_data_encoded["CustomerID"].tolist()

# Generate top 3 lookalikes for the first 20 customers
lookalike_results = {}

for i, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    similarities = list(enumerate(similarity_matrix[i]))
    sorted_similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]  # Exclude self
    lookalike_results[customer_id] = [
        {"CustomerID": customer_ids[idx], "Score": round(score, 4)} for idx, score in sorted_similarities
    ]

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {"CustomerID": key, "Lookalikes": str(value)} for key, value in lookalike_results.items()
])
lookalike_df.to_csv("/content/drive/MyDrive/Zetop/Nekkenti_DurgaNaveen_Lookalike.csv", index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'.")


Lookalike recommendations saved to 'Lookalike.csv'.
