In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

In [27]:
class CustomerLookalike:
    def __init__(self):
        self.scaler = StandardScaler()
        self.features = None
        self.customer_ids = None
        
    def _preprocess_customer_data(self, customers_df):
        """Process customer profile data"""
        # Convert signup date to numeric
        feature
        customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
        customers_df['days_since_signup'] = (
            datetime.now() - customers_df['SignupDate']
        ).dt.days
        
        # One-hot encode region
        region_features = pd.get_dummies(customers_df['Region'], prefix='region')
        
        return pd.concat([
            customers_df[['CustomerID', 'days_since_signup']],
            region_features
        ], axis=1)
    
    def _preprocess_transaction_data(self, transactions_df, products_df):
        """Process transaction and product data"""
        # Merge transactions with products
        txn_data = transactions_df.merge(
            products_df[['ProductID', 'Category', 'Price']],
            on='ProductID',
            how='left'
        )
        
        # Calculate customer-level transaction metrics
        txn_metrics = txn_data.groupby('CustomerID').agg({
            'TransactionID': 'count',
            'TotalValue': ['sum', 'mean', 'std'],
            'Quantity': ['sum', 'mean'],
            'Price': ['mean', 'max']
        }).round(2)
        
        # Flatten column names
        txn_metrics.columns = [
            'transaction_count',
            'total_spend',
            'avg_transaction_value',
            'std_transaction_value',
            'total_quantity',
            'avg_quantity',
            'avg_price',
            'max_price'
        ]
        
        # Calculate category preferences
        category_spend = pd.pivot_table(
            txn_data,
            values='TotalValue',
            index='CustomerID',
            columns='Category',
            aggfunc='sum',
            fill_value=0
        )
        category_spend.columns = [f'spend_{col.lower().replace(" ", "_")}' for col in category_spend.columns]
        
        # Calculate percentage split of spending across categories
        category_pct = category_spend.div(category_spend.sum(axis=1), axis=0)
        category_pct.columns = [f'pct_{col}' for col in category_pct.columns]
        
        # Combine all transaction features
        return pd.concat([
            txn_metrics,
            category_spend,
            category_pct
        ], axis=1).reset_index()
    
    def fit(self, customers_df, transactions_df, products_df):
        """Prepare feature matrix for similarity calculations"""
        # Process all data sources
        customer_features = self._preprocess_customer_data(customers_df)
        transaction_features = self._preprocess_transaction_data(transactions_df, products_df)
        
        # Merge all features
        all_features = customer_features.merge(
            transaction_features,
            on='CustomerID',
            how='left'
        )
        
        # Store customer IDs for reference
        self.customer_ids = all_features['CustomerID']
        
        # Fill missing values with means
        feature_cols = [col for col in all_features.columns if col != 'CustomerID']
        all_features[feature_cols] = all_features[feature_cols].fillna(all_features[feature_cols].mean())
        
        # Scale features
        self.features = self.scaler.fit_transform(all_features[feature_cols])
        
        return self
    
    def get_lookalikes(self, customer_id, n_recommendations=3):
        """Find top N similar customers"""
        # Get index of target customer
        customer_idx = self.customer_ids[self.customer_ids == customer_id].index[0]
        
        # Calculate similarity scores
        similarity_scores = cosine_similarity(
            self.features[customer_idx].reshape(1, -1),
            self.features
        )[0]
        
        # Get top similar customers (excluding self)
        similar_indices = similarity_scores.argsort()[::-1][1:n_recommendations+1]
        
        return pd.DataFrame({
            'similar_customer': self.customer_ids.iloc[similar_indices],
            'similarity_score': similarity_scores[similar_indices]
        })

In [24]:
def generate_lookalike_recommendations(customers_path, transactions_path, products_path):
    """Generate lookalike recommendations for first 20 customers"""
    # Load data
    customers_df = pd.read_csv(customers_path)
    transactions_df = pd.read_csv(transactions_path)
    products_df = pd.read_csv(products_path)
    
    # Initialize and fit model
    model = CustomerLookalike()
    model.fit(customers_df, transactions_df, products_df)
    
    # Generate recommendations for first 20 customers
    all_recommendations = []
    target_customers = customers_df[
        customers_df['CustomerID'].between('C0001', 'C0020')
    ]['CustomerID']
    
    for customer_id in target_customers:
        recs = model.get_lookalikes(customer_id)
        recommendations = {
            'customer_id': customer_id,
            'recommendations': [
                {
                    'similar_customer': row['similar_customer'],
                    'similarity_score': round(row['similarity_score'], 4)
                }
                for _, row in recs.iterrows()
            ]
        }
        all_recommendations.append(recommendations)
    
    # Create output dataframe
    output_df = pd.DataFrame({
        'customer_id': [rec['customer_id'] for rec in all_recommendations],
        'recommendations': [str(rec['recommendations']) for rec in all_recommendations]
    })
    
    return output_df

In [25]:
def main():
    # File paths
    customers_path = "Customers.csv"
    transactions_path = "Transactions.csv"
    products_path = "Products.csv"
    
    try:
        # Generate recommendations
        recommendations = generate_lookalike_recommendations(
            customers_path,
            transactions_path,
            products_path
        )
        
        # Save results
        recommendations.to_csv("FirstName_LastName_Lookalike.csv", index=False)
        
        print("Recommendations generated successfully!")
        print("\nSample of recommendations:")
        print(recommendations.head().to_string())
        
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        print("Please check your input data and file paths.")

In [33]:
if __name__ == "__main__":
    main()