In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def build_lookalike_model(customers_df, transactions_df, target_customer_id):
    """
    Builds a lookalike model for a given customer ID.

    Args:
        customers_df (pd.DataFrame): DataFrame containing customer information.
        transactions_df (pd.DataFrame): DataFrame containing transaction information.
        target_customer_id (str): ID of the customer for whom to find lookalikes.

    Returns:
        pd.DataFrame: DataFrame containing the top 3 lookalike customers with their similarity scores.
    """

    # Merge customer and transaction data
    customer_transactions_df = customers_df.merge(transactions_df, on='CustomerID', how='left')

    # Filter transactions for the target customer
    target_customer_transactions = customer_transactions_df[customer_transactions_df['CustomerID'] == target_customer_id]

    # Create customer profiles (consider region and purchase history)
    customer_profiles = []
    for customer_id in customers_df['CustomerID']:
        customer_data = customer_transactions_df[customer_transactions_df['CustomerID'] == customer_id]
        customer_profile = f"{customers_df[customers_df['CustomerID'] == customer_id]['Region'].values[0]} " \
                          f"{' '.join(customer_data['ProductName'].fillna('').tolist())} " \
                          f"{' '.join(customer_data['Category'].fillna('').tolist())}"
        customer_profiles.append(customer_profile)

    # Create TF-IDF vectors for customer profiles
    vectorizer = TfidfVectorizer()
    customer_profiles_tfidf = vectorizer.fit_transform(customer_profiles)

    # Calculate cosine similarity
    target_customer_index = customers_df[customers_df['CustomerID'] == target_customer_id].index[0]
    similarity_scores = cosine_similarity(customer_profiles_tfidf[target_customer_index], customer_profiles_tfidf)[0]

    # Create DataFrame of similar customers
    similar_customers = pd.DataFrame({
        'CustomerID': customers_df['CustomerID'],
        'SimilarityScore': similarity_scores
    })

    # Sort by similarity score and select top 3
    top_lookalikes = similar_customers.sort_values(by='SimilarityScore', ascending=False)[1:4]

    return top_lookalikes

# Load data
customers_df = pd.read_csv("Customers.csv")
transactions_df = pd.read_csv("Transactions.csv")
products_df = pd.read_csv("Products.csv")

# Merge transactions with product information
merged_df = transactions_df.merge(products_df, on='ProductID', how='left')

# Create lookalike recommendations
lookalike_list = []
for customer_id in customers_df["CustomerID"].head(20):
    top_lookalikes = build_lookalike_model(customers_df, merged_df, customer_id)
    for index, row in top_lookalikes.iterrows():
        lookalike_list.append([customer_id, row['CustomerID'], row['SimilarityScore']])

# Create "Lookalike.csv"
lookalike_df = pd.DataFrame(lookalike_list, columns=['TargetCustomerID', 'LookalikeCustomerID', 'SimilarityScore'])
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike customer recommendations generated for the first 20 customers.")

Lookalike customer recommendations generated for the first 20 customers.
