In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
transactions_df = pd.read_csv('Transactions.csv')
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')

# Convert dates to datetime format
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])

# Step 1: Data Aggregation and Feature Engineering

# Aggregate transaction data for customers
customer_transaction_summary = transactions_df.groupby('CustomerID').agg(
    TotalTransactions=('TransactionID', 'count'),
    TotalQuantity=('Quantity', 'sum'),
    TotalSpending=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean')
).reset_index()

# Merge customer profile data with transaction summary
customer_data = pd.merge(customers_df, customer_transaction_summary, on='CustomerID', how='inner')

# Calculate customer age based on signup date
customer_data['CustomerAge'] = (pd.Timestamp.now() - customer_data['SignupDate']).dt.days

# Merge product data into transactions
transactions_with_products = pd.merge(transactions_df, products_df, on='ProductID', how='inner')

# Create product preference features for each customer
product_preferences = transactions_with_products.pivot_table(
    index='CustomerID', columns='Category', values='Quantity', aggfunc='sum', fill_value=0
).reset_index()

# Merge product preferences into customer data
customer_features = pd.merge(customer_data, product_preferences, on='CustomerID', how='inner')

# Select features for similarity computation
features = customer_features.drop(columns=['CustomerID', 'CustomerName', 'SignupDate', 'Region'])
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Step 2: Lookalike Model Function

def recommend_similar_customers(input_customer_id, top_n=3):
    # Find the input customer's index
    customer_index = customer_features[customer_features['CustomerID'] == input_customer_id].index[0]

    # Compute cosine similarity between the input customer and all others
    similarities = cosine_similarity(features_scaled[customer_index].reshape(1, -1), features_scaled).flatten()

    # Rank customers by similarity score
    similar_customers = np.argsort(similarities)[::-1]  # Sort in descending order
    similar_customers = [idx for idx in similar_customers if idx != customer_index]  # Exclude the input customer

    # Get the top N similar customers
    recommendations = []
    for idx in similar_customers[:top_n]:
        recommendations.append({
            'CustomerID': customer_features.iloc[idx]['CustomerID'],
            'SimilarityScore': similarities[idx]
        })
    
    return recommendations

# Step 3: Example Usage

input_customer_id = 'C0001'  # Example input customer ID
recommendations = recommend_similar_customers(input_customer_id)

print(f"Top {len(recommendations)} customers similar to Customer {input_customer_id}:")
for rec in recommendations:
    print(f"CustomerID: {rec['CustomerID']}, Similarity Score: {rec['SimilarityScore']:.4f}")

Top 3 customers similar to Customer C0001:
CustomerID: C0023, Similarity Score: 0.7979
CustomerID: C0120, Similarity Score: 0.7648
CustomerID: C0197, Similarity Score: 0.6974


In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
transactions_df = pd.read_csv('Transactions.csv')
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')

# Convert dates to datetime format
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])

# Step 1: Data Aggregation and Feature Engineering

# Aggregate transaction data for customers
customer_transaction_summary = transactions_df.groupby('CustomerID').agg(
    TotalTransactions=('TransactionID', 'count'),
    TotalQuantity=('Quantity', 'sum'),
    TotalSpending=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean')
).reset_index()

# Merge customer profile data with transaction summary
customer_data = pd.merge(customers_df, customer_transaction_summary, on='CustomerID', how='inner')

# Calculate customer age based on signup date
customer_data['CustomerAge'] = (pd.Timestamp.now() - customer_data['SignupDate']).dt.days

# Merge product data into transactions
transactions_with_products = pd.merge(transactions_df, products_df, on='ProductID', how='inner')

# Create product preference features for each customer
product_preferences = transactions_with_products.pivot_table(
    index='CustomerID', columns='Category', values='Quantity', aggfunc='sum', fill_value=0
).reset_index()

# Merge product preferences into customer data
customer_features = pd.merge(customer_data, product_preferences, on='CustomerID', how='inner')

# Select features for similarity computation
features = customer_features.drop(columns=['CustomerID', 'CustomerName', 'SignupDate', 'Region'])
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Step 2: Lookalike Model Function

def get_top_lookalikes(input_customer_id, top_n=3):
    # Find the input customer's index
    customer_index = customer_features[customer_features['CustomerID'] == input_customer_id].index[0]

    # Compute cosine similarity between the input customer and all others
    similarities = cosine_similarity(features_scaled[customer_index].reshape(1, -1), features_scaled).flatten()

    # Rank customers by similarity score
    similar_customers = np.argsort(similarities)[::-1]  # Sort in descending order
    similar_customers = [idx for idx in similar_customers if idx != customer_index]  # Exclude the input customer

    # Get the top N similar customers
    recommendations = []
    for idx in similar_customers[:top_n]:
        recommendations.append({
            'CustomerID': customer_features.iloc[idx]['CustomerID'],
            'SimilarityScore': similarities[idx]
        })
    
    return recommendations

# Step 3: Generate Lookalike Data for Customers C0001 - C0020

lookalike_data = {}
target_customers = customers_df['CustomerID'][:20]  # First 20 customers (C0001 - C0020)

for cust_id in target_customers:
    recommendations = get_top_lookalikes(cust_id)
    lookalike_data[cust_id] = recommendations

# Step 4: Save Lookalike Data to CSV

# Convert lookalike_data into a DataFrame
lookalike_list = []
for cust_id, recs in lookalike_data.items():
    lookalike_list.append({
        'CustomerID': cust_id,
        'Lookalikes': [{'CustomerID': rec['CustomerID'], 'Score': rec['SimilarityScore']} for rec in recs]
    })

lookalike_df = pd.DataFrame(lookalike_list)

# Save to Lookalike.csv
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike.csv has been created successfully!")

Lookalike.csv has been created successfully!
