# Generate Looklike Model for first 20 customers.

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

def create_customer_features(df):
    """
    Create customer-level features from transaction data
    """
    # Calculate customer-level transaction metrics
    customer_metrics = df.groupby('CustomerID').agg({
        'TransactionID': 'count',  # Number of transactions
        'Quantity': ['sum', 'mean'],  # Purchase quantity metrics
        'TotalValue': ['sum', 'mean'],  # Spending metrics
        'Category': lambda x: ','.join(set(x))  # Product categories bought
    }).reset_index()
    
    # Flatten column names
    customer_metrics.columns = ['CustomerID', 'transaction_count', 'total_quantity', 
                              'avg_quantity', 'total_spent', 'avg_transaction_value', 
                              'categories']
    
    # Create category preference features
    categories = df['Category'].unique()
    for cat in categories:
        customer_metrics[f'buys_{cat.lower()}'] = customer_metrics['categories'].str.contains(cat).astype(int)
    
    # Drop the concatenated categories column
    customer_metrics.drop('categories', axis=1, inplace=True)
    
    # Add region as one-hot encoded features
    region_dummies = pd.get_dummies(df.groupby('CustomerID')['Region'].first(), prefix='region')
    customer_metrics = customer_metrics.merge(region_dummies, on='CustomerID')
    
    return customer_metrics

def calculate_similarity_scores(customer_features, target_customer_id):
    """
    Calculate similarity scores between target customer and all other customers
    """
    # Create copy of features for scaling
    features = customer_features.copy()
    customer_ids = features['CustomerID']
    features = features.drop('CustomerID', axis=1)
    
    # Scale features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    # Calculate cosine similarity
    similarity_matrix = cosine_similarity(features_scaled)
    
    # Get target customer's index
    target_idx = customer_ids[customer_ids == target_customer_id].index[0]
    
    # Get similarity scores for target customer
    similarity_scores = similarity_matrix[target_idx]
    
    # Create recommendations dataframe
    recommendations = pd.DataFrame({
        'CustomerID': customer_ids,
        'similarity_score': similarity_scores
    })
    
    # Remove target customer and sort by similarity
    recommendations = recommendations[recommendations['CustomerID'] != target_customer_id]
    recommendations = recommendations.sort_values('similarity_score', ascending=False)
    
    return recommendations.head(3)

def generate_lookalikes(df, target_customer_ids):
    """
    Generate lookalike recommendations for multiple target customers
    """
    # Create customer features
    customer_features = create_customer_features(df)
    
    # Store results
    all_recommendations = {}
    
    # Generate recommendations for each target customer
    for customer_id in target_customer_ids:
        recommendations = calculate_similarity_scores(customer_features, customer_id)
        
        # Format recommendations as list of tuples (customer_id, score)
        rec_list = recommendations.apply(
            lambda x: (x['CustomerID'], round(x['similarity_score'], 3)), 
            axis=1
        ).tolist()
        
        all_recommendations[customer_id] = rec_list
    
    return all_recommendations

# Read and process the data
df = pd.read_csv('D:\LAB\zeotap\Data_Science_intern_assignment\dataset\merged.csv')

# Generate recommendations for first 20 customers
target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
lookalike_recommendations = generate_lookalikes(df, target_customers)

# Create output DataFrame
output_rows = []
for customer_id, recommendations in lookalike_recommendations.items():
    rec_str = '; '.join([f"{rec[0]}({rec[1]})" for rec in recommendations])
    output_rows.append({
        'customer_id': customer_id,
        'recommendations': rec_str
    })

output_df = pd.DataFrame(output_rows)

# Save to CSV
output_df.to_csv('Yogesh_Pahuja_Lookalike.csv', index=False)
output_df

Unnamed: 0,customer_id,recommendations
0,C0001,C0174(0.984); C0152(0.959); C0085(0.863)
1,C0002,C0159(0.951); C0134(0.888); C0128(0.76)
2,C0003,C0129(0.915); C0091(0.9); C0195(0.86)
3,C0004,C0012(0.972); C0113(0.791); C0102(0.779)
4,C0005,C0007(0.96); C0140(0.949); C0128(0.78)
5,C0006,C0187(0.865); C0108(0.668); C0153(0.632)
6,C0007,C0140(0.978); C0005(0.96); C0080(0.817)
7,C0008,C0098(0.917); C0024(0.903); C0194(0.902)
8,C0009,C0198(0.973); C0058(0.711); C0060(0.657)
9,C0010,C0132(0.933); C0061(0.77); C0063(0.696)


# Looklike Model based on user information.

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

class CustomerRecommender:
    def __init__(self, df):
        """Initialize with transaction data"""
        self.df = df
        self.customer_features = self._create_customer_features()
        
    def _create_customer_features(self):
        """Create customer-level features from transaction data"""
        # Calculate customer-level transaction metrics
        customer_metrics = self.df.groupby('CustomerID').agg({
            'TransactionID': 'count',
            'Quantity': ['sum', 'mean'],
            'TotalValue': ['sum', 'mean'],
            'Category': lambda x: ','.join(set(x))
        }).reset_index()
        
        # Flatten column names
        customer_metrics.columns = ['CustomerID', 'transaction_count', 'total_quantity', 
                                  'avg_quantity', 'total_spent', 'avg_transaction_value', 
                                  'categories']
        
        # Create category preference features
        categories = self.df['Category'].unique()
        for cat in categories:
            customer_metrics[f'buys_{cat.lower()}'] = customer_metrics['categories'].str.contains(cat).astype(int)
        
        # Drop the concatenated categories column
        customer_metrics.drop('categories', axis=1, inplace=True)
        
        # Add region as one-hot encoded features
        region_dummies = pd.get_dummies(self.df.groupby('CustomerID')['Region'].first(), prefix='region')
        customer_metrics = customer_metrics.merge(region_dummies, on='CustomerID')
        
        return customer_metrics
    
    def find_similar_customers(self, input_value, input_type='customer_id', n_recommendations=3):
        """
        Find similar customers based on input type and value
        
        Parameters:
        input_value: The input value (customer_id, product_id, or product_name)
        input_type: Type of input ('customer_id', 'product_id', or 'product_name')
        n_recommendations: Number of recommendations to return
        """
        if input_type == 'customer_id':
            target_customer_id = input_value
        else:
            # If input is product-based, find the most representative customer
            if input_type == 'product_name':
                product_transactions = self.df[self.df['ProductName'] == input_value]
            else:  # product_id
                product_transactions = self.df[self.df['ProductID'] == input_value]
            
            if product_transactions.empty:
                return "No transactions found for this product."
            
            # Find customer who bought this product the most
            target_customer_id = (product_transactions.groupby('CustomerID')
                                .size()
                                .sort_values(ascending=False)
                                .index[0])
        
        # Get target customer's features
        target_features = self.customer_features[
            self.customer_features['CustomerID'] == target_customer_id
        ]
        
        if target_features.empty:
            return "Customer not found."
        
        # Prepare features for similarity calculation
        features = self.customer_features.copy()
        customer_ids = features['CustomerID']
        features = features.drop('CustomerID', axis=1)
        
        # Scale features
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        
        # Calculate cosine similarity
        target_features_scaled = scaler.transform(
            target_features.drop('CustomerID', axis=1)
        )
        similarity_scores = cosine_similarity(target_features_scaled, features_scaled)[0]
        
        # Create recommendations dataframe
        recommendations = pd.DataFrame({
            'CustomerID': customer_ids,
            'similarity_score': similarity_scores
        })
        
        # Remove target customer and sort by similarity
        recommendations = recommendations[
            recommendations['CustomerID'] != target_customer_id
        ]
        recommendations = recommendations.sort_values(
            'similarity_score', 
            ascending=False
        )
        
        # Add customer details
        customer_details = self.df.groupby('CustomerID').agg({
            'CustomerName': 'first',
            'Region': 'first'
        }).reset_index()
        
        recommendations = recommendations.merge(customer_details, on='CustomerID')
        
        # Format output
        top_recommendations = recommendations.head(n_recommendations)
        return {
            'input_value': input_value,
            'input_type': input_type,
            'target_customer': target_customer_id,
            'recommendations': top_recommendations[
                ['CustomerID', 'CustomerName', 'Region', 'similarity_score']
            ].to_dict('records')
        }

# Example usage
df = pd.read_csv('D:\LAB\zeotap\Data_Science_intern_assignment\dataset\merged.csv')
recommender = CustomerRecommender(df)

# Example 1: Find similar customers based on customer ID
print("\nRecommendations based on customer ID:")
result1 = recommender.find_similar_customers('C0001', input_type='customer_id')
print(result1)

# Example 2: Find similar customers based on product name
print("\nRecommendations based on product name:")
result2 = recommender.find_similar_customers(
    'ComfortLiving Bluetooth Speaker', 
    input_type='product_name'
)
print(result2)

# Example 3: Find similar customers based on product ID
print("\nRecommendations based on product ID:")
result3 = recommender.find_similar_customers('P067', input_type='product_id')

print(result3)



Recommendations based on customer ID:
{'input_value': 'C0001', 'input_type': 'customer_id', 'target_customer': 'C0001', 'recommendations': [{'CustomerID': 'C0174', 'CustomerName': 'Tracy Steele', 'Region': 'South America', 'similarity_score': 0.9843336045456204}, {'CustomerID': 'C0152', 'CustomerName': 'Justin Evans', 'Region': 'South America', 'similarity_score': 0.9593067012351622}, {'CustomerID': 'C0085', 'CustomerName': 'Richard Brown', 'Region': 'South America', 'similarity_score': 0.8625793679416225}]}

Recommendations based on product name:
{'input_value': 'ComfortLiving Bluetooth Speaker', 'input_type': 'product_name', 'target_customer': 'C0008', 'recommendations': [{'CustomerID': 'C0098', 'CustomerName': 'Laura Gilbert', 'Region': 'North America', 'similarity_score': 0.9173478039913854}, {'CustomerID': 'C0024', 'CustomerName': 'Michele Cooley', 'Region': 'North America', 'similarity_score': 0.9031461871600093}, {'CustomerID': 'C0194', 'CustomerName': 'Stacy Cook', 'Region': '