<a href="https://colab.research.google.com/github/priyankabansall/PRIYANKA_BANSAL/blob/main/PRIYANKA_BANSAL_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")
# Merge datasets
merged = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Create customer-level features
customer_features = (
    merged.groupby("CustomerID")
    .agg(
        TotalSpending=("TotalValue", "sum"),
        AvgSpending=("TotalValue", "mean"),
        TransactionCount=("TransactionID", "count"),
        UniqueProducts=("ProductID", "nunique"),
        UniqueCategories=("Category", "nunique"),
    )
    .reset_index()
)

# HANDLING CATEGORICAL DATA
encoder = OneHotEncoder(drop='first')  # , sparse=False)
encoded_categories = encoder.fit_transform(customer_features[['UniqueCategories']]).toarray()

#SCALING NUMERICAL DATA
numerical_features = customer_features.drop(columns=['CustomerID', 'UniqueCategories'])
scaler = StandardScaler()
scaled_numerical_features = scaler.fit_transform(numerical_features)

# Combine the features
features = np.hstack([scaled_numerical_features, encoded_categories])

# SIMILARITY
def get_similar_customers(user_id, top_n=3):
    user_index = customer_features[customer_features['CustomerID'] == user_id].index[0]
    similarity_matrix = cosine_similarity(features)
    scores = list(enumerate(similarity_matrix[user_index]))

    # Sort by similarity (highest first), excluding self (similarity with self will be 1)
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_scores = scores[1:top_n + 1]

    # Get the customer IDs of the top N most similar customers
    similar_customers = [(customer_features.iloc[i[0]]['CustomerID'], round(i[1], 4)) for i in top_scores]

    return similar_customers

# LOOKALIKES FOR FIRST 20 CUSTOMERS
lookalike_map = {}
for customer_id in customer_features['CustomerID'][:20]:
    top_lookalikes = get_similar_customers(customer_id)

    # Map customer ID to their top 3 lookalikes
    lookalike_map[customer_id] = [{'CustomerID': lookalike[0], 'SimilarityScore': lookalike[1]} for lookalike in top_lookalikes]

# CSV
# Convert the lookalike map to DataFrame for saving as CSV
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'Lookalikes': [str(lookalikes) for lookalikes in lookalike_map.values()]
})

# Save to CSV
lookalike_df.to_csv("PRIYANKA_BANSAL_Lookalike", index=False)

print("Lookalike results saved as 'PRIYANKA_BANSAL_Lookalike.csv'.")

# TOP 3 LOOKALIKES
# USER INPUT FOR CUSTOMER ID
user_id = input("Enter the Customer ID for Lookalike Recommendation: ")

# Ensure the entered customer ID exists
if user_id not in customer_features["CustomerID"].values:
    print("Invalid Customer ID. Please try again with a valid ID.")
else:
    # RECOMMEND TOP 3 LOOKALIKES
    top_lookalikes = get_similar_customers(user_id)

    #OUTPUT SIMILAR CUSTOMERS
    print(f"Top 3 Lookalikes for Customer {user_id}:")
    for idx, (lookalike_id, score) in enumerate(top_lookalikes, 1):
        print(f"{idx}. Customer {lookalike_id} with Similarity Score: {score}")



Lookalike results saved as 'PRIYANKA_BANSAL_Lookalike.csv'.
