In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, classification_report
import warnings
warnings.filterwarnings('ignore')

print("📈 MARKETING ATTRIBUTION ANALYSIS")
print("=" * 60)

class MarketingAttributionAnalyzer:
    """
    Advanced marketing attribution modeling for e-commerce
    Multi-touch attribution, channel optimization, and ROI analysis
    """
    
    def __init__(self, transaction_data):
        self.transaction_data = transaction_data
        self.customer_journeys = {}
        self.attribution_models = {}
        self.channel_performance = {}
        
    def simulate_customer_journeys(self):
        """Simulate realistic customer journey data with multiple touchpoints"""
        
        print("🔍 Simulating customer journey data...")
        
        # Marketing channels with realistic properties
        channels = {
            'organic_search': {
                'cost_per_click': 0.0,
                'conversion_rate': 0.025,
                'typical_position': 'first_touch',
                'attribution_weight': 0.4
            },
            'paid_search': {
                'cost_per_click': 2.50,
                'conversion_rate': 0.045,
                'typical_position': 'last_touch',
                'attribution_weight': 0.25
            },
            'social_media': {
                'cost_per_click': 1.80,
                'conversion_rate': 0.018,
                'typical_position': 'middle_touch',
                'attribution_weight': 0.15
            },
            'email_marketing': {
                'cost_per_click': 0.20,
                'conversion_rate': 0.065,
                'typical_position': 'last_touch',
                'attribution_weight': 0.30
            },
            'display_ads': {
                'cost_per_click': 3.20,
                'conversion_rate': 0.012,
                'typical_position': 'first_touch',
                'attribution_weight': 0.10
            },
            'affiliate': {
                'cost_per_click': 1.50,
                'conversion_rate': 0.035,
                'typical_position': 'last_touch',
                'attribution_weight': 0.20
            },
            'direct': {
                'cost_per_click': 0.0,
                'conversion_rate': 0.080,
                'typical_position': 'last_touch',
                'attribution_weight': 0.35
            },
            'referral': {
                'cost_per_click': 0.0,
                'conversion_rate': 0.055,
                'typical_position': 'middle_touch',
                'attribution_weight': 0.25
            }
        }
        
        customer_journeys = []
        journey_id = 1
        
        for _, transaction in self.transaction_data.iterrows():
            customer_id = transaction['customer_id']
            purchase_date = pd.to_datetime(transaction['date'])
            purchase_value = transaction['total_amount']
            
            # Simulate journey length (1-7 touchpoints)
            journey_length = np.random.choice([1, 2, 3, 4, 5, 6, 7], 
                                            p=[0.25, 0.20, 0.18, 0.15, 0.10, 0.07, 0.05])
            
            # Generate touchpoints leading to purchase
            touchpoints = []
            journey_channels = []
            
            for touch_num in range(journey_length):
                # Select channel based on position in journey - FIXED PROBABILITIES
                if touch_num == 0:  # First touch
                    channel_probs = [0.30, 0.15, 0.20, 0.05, 0.25, 0.02, 0.02, 0.01]
                elif touch_num == journey_length - 1:  # Last touch
                    channel_probs = [0.10, 0.25, 0.10, 0.30, 0.05, 0.15, 0.03, 0.02]
                else:  # Middle touches
                    channel_probs = [0.15, 0.20, 0.25, 0.15, 0.10, 0.10, 0.03, 0.02]
                
                # Normalize probabilities to ensure they sum to 1.0
                channel_probs = np.array(channel_probs)
                channel_probs = channel_probs / channel_probs.sum()
                
                channel = np.random.choice(list(channels.keys()), p=channel_probs)
                journey_channels.append(channel)
                
                # Calculate touchpoint date (1-30 days before purchase)
                days_before = np.random.exponential(scale=5) + touch_num * 2
                days_before = min(days_before, 30)  # Cap at 30 days
                touchpoint_date = purchase_date - timedelta(days=days_before)
                
                touchpoint = {
                    'journey_id': journey_id,
                    'customer_id': customer_id,
                    'touchpoint_number': touch_num + 1,
                    'channel': channel,
                    'touchpoint_date': touchpoint_date,
                    'purchase_date': purchase_date,
                    'days_to_conversion': days_before,
                    'conversion_value': purchase_value,
                    'channel_cost': channels[channel]['cost_per_click'],
                    'converted': 1
                }
                
                touchpoints.append(touchpoint)
            
            # Add journey summary
            journey_summary = {
                'journey_id': journey_id,
                'customer_id': customer_id,
                'journey_length': journey_length,
                'first_touch_channel': journey_channels[0],
                'last_touch_channel': journey_channels[-1],
                'journey_duration': max([t['days_to_conversion'] for t in touchpoints]),
                'total_touchpoints': journey_length,
                'unique_channels': len(set(journey_channels)),
                'conversion_value': purchase_value,
                'converted': 1
            }
            
            customer_journeys.extend(touchpoints)
            journey_id += 1
        
        # Create non-converting journeys (important for attribution modeling)
        non_converting_journeys = []
        for _ in range(int(len(customer_journeys) * 0.8)):  # 80% non-converting traffic
            
            journey_length = np.random.choice([1, 2, 3, 4], p=[0.60, 0.25, 0.10, 0.05])
            customer_id = np.random.randint(1, 10000)  # New customer IDs
            journey_date = pd.to_datetime(np.random.choice(self.transaction_data['date']))
            
            for touch_num in range(journey_length):
                channel_probs = np.array([0.25, 0.15, 0.25, 0.10, 0.15, 0.05, 0.03, 0.02])
                channel_probs = channel_probs / channel_probs.sum()  # Normalize
                channel = np.random.choice(list(channels.keys()), p=channel_probs)
                
                days_before = np.random.exponential(scale=3) + touch_num
                touchpoint_date = journey_date - timedelta(days=days_before)
                
                touchpoint = {
                    'journey_id': journey_id,
                    'customer_id': customer_id,
                    'touchpoint_number': touch_num + 1,
                    'channel': channel,
                    'touchpoint_date': touchpoint_date,
                    'purchase_date': None,
                    'days_to_conversion': None,
                    'conversion_value': 0,
                    'channel_cost': channels[channel]['cost_per_click'],
                    'converted': 0
                }
                
                non_converting_journeys.append(touchpoint)
            
            journey_id += 1
        
        # Combine all touchpoints
        all_touchpoints = customer_journeys + non_converting_journeys
        touchpoints_df = pd.DataFrame(all_touchpoints)
        
        print(f"✅ Customer journey data simulated:")
        print(f"  🎯 {len(set([t['journey_id'] for t in customer_journeys]))} converting journeys")
        print(f"  📊 {len(set([t['journey_id'] for t in non_converting_journeys]))} non-converting journeys")
        print(f"  📈 {len(all_touchpoints)} total touchpoints")
        
        self.customer_journeys = touchpoints_df
        return touchpoints_df
    
    def build_attribution_models(self):
        """Build multiple attribution models for comparison"""
        
        print("\n🤖 Building attribution models...")
        
        journeys_df = self.customer_journeys
        
        # 1. Rule-based attribution models
        attribution_models = {}
        
        # First-touch attribution
        first_touch = journeys_df[journeys_df['touchpoint_number'] == 1].copy()
        first_touch_attr = first_touch.groupby('channel').agg({
            'conversion_value': 'sum',
            'converted': 'sum',
            'journey_id': 'nunique'
        }).round(2)
        first_touch_attr.columns = ['attributed_revenue', 'attributed_conversions', 'attributed_journeys']
        attribution_models['first_touch'] = first_touch_attr
        
        # Last-touch attribution
        last_touch = journeys_df.loc[journeys_df.groupby('journey_id')['touchpoint_number'].idxmax()]
        last_touch_attr = last_touch.groupby('channel').agg({
            'conversion_value': 'sum',
            'converted': 'sum',
            'journey_id': 'nunique'
        }).round(2)
        last_touch_attr.columns = ['attributed_revenue', 'attributed_conversions', 'attributed_journeys']
        attribution_models['last_touch'] = last_touch_attr
        
        # Linear attribution (equal credit to all touchpoints)
        converting_journeys = journeys_df[journeys_df['converted'] == 1].copy()
        journey_lengths = converting_journeys.groupby('journey_id')['touchpoint_number'].max()
        converting_journeys = converting_journeys.merge(
            journey_lengths.rename('total_touchpoints'), 
            on='journey_id'
        )
        converting_journeys['linear_attribution'] = (
            converting_journeys['conversion_value'] / converting_journeys['total_touchpoints']
        )
        
        linear_attr = converting_journeys.groupby('channel').agg({
            'linear_attribution': 'sum',
            'journey_id': 'nunique'
        }).round(2)
        linear_attr.columns = ['attributed_revenue', 'attributed_journeys']
        linear_attr['attributed_conversions'] = linear_attr['attributed_journeys']  # Approximation
        attribution_models['linear'] = linear_attr
        
        # Time-decay attribution (more recent touchpoints get more credit)
        converting_journeys['time_decay_weight'] = np.exp(-converting_journeys['days_to_conversion'] / 7)  # 7-day half-life
        # Normalize weights within each journey
        journey_weight_sums = converting_journeys.groupby('journey_id')['time_decay_weight'].sum()
        converting_journeys = converting_journeys.merge(
            journey_weight_sums.rename('total_weight'), 
            on='journey_id'
        )
        converting_journeys['time_decay_attribution'] = (
            converting_journeys['conversion_value'] * 
            converting_journeys['time_decay_weight'] / converting_journeys['total_weight']
        )
        
        time_decay_attr = converting_journeys.groupby('channel').agg({
            'time_decay_attribution': 'sum',
            'journey_id': 'nunique'
        }).round(2)
        time_decay_attr.columns = ['attributed_revenue', 'attributed_journeys']
        time_decay_attr['attributed_conversions'] = time_decay_attr['attributed_journeys']
        attribution_models['time_decay'] = time_decay_attr
        
        # 2. Machine Learning Attribution Model
        print("  🧠 Training ML attribution model...")
        
        # Prepare features for ML model
        ml_features = journeys_df.copy()
        
        # Channel encoding
        le_channel = LabelEncoder()
        ml_features['channel_encoded'] = le_channel.fit_transform(ml_features['channel'])
        
        # Feature engineering
        ml_features['is_first_touch'] = (ml_features['touchpoint_number'] == 1).astype(int)
        ml_features['touchpoint_number_log'] = np.log(ml_features['touchpoint_number'])
        ml_features['days_to_conversion_log'] = np.log(ml_features['days_to_conversion'].fillna(30) + 1)
        ml_features['channel_cost_log'] = np.log(ml_features['channel_cost'] + 0.1)
        
        # Journey-level features
        journey_stats = ml_features.groupby('journey_id').agg({
            'touchpoint_number': 'max',
            'channel': 'nunique',
            'channel_cost': 'sum'
        }).rename(columns={
            'touchpoint_number': 'journey_length',
            'channel': 'unique_channels',
            'channel_cost': 'total_journey_cost'
        })
        
        ml_features = ml_features.merge(journey_stats, on='journey_id')
        
        # Prepare ML dataset
        feature_columns = [
            'channel_encoded', 'touchpoint_number', 'is_first_touch',
            'touchpoint_number_log', 'days_to_conversion_log', 'channel_cost_log',
            'journey_length', 'unique_channels', 'total_journey_cost'
        ]
        
        X = ml_features[feature_columns].fillna(0)
        y = ml_features['converted']
        
        # Train model
        rf_attribution = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_attribution.fit(X, y)
        
        # Predict attribution scores
        attribution_scores = rf_attribution.predict_proba(X)[:, 1]
        ml_features['ml_attribution_score'] = attribution_scores
        
        # Calculate ML-based attribution
        # Normalize scores within each journey for converting journeys
        converting_ml = ml_features[ml_features['converted'] == 1].copy()
        journey_score_sums = converting_ml.groupby('journey_id')['ml_attribution_score'].sum()
        converting_ml = converting_ml.merge(
            journey_score_sums.rename('total_score'), 
            on='journey_id'
        )
        converting_ml['ml_attribution'] = (
            converting_ml['conversion_value'] * 
            converting_ml['ml_attribution_score'] / converting_ml['total_score']
        )
        
        ml_attr = converting_ml.groupby('channel').agg({
            'ml_attribution': 'sum',
            'journey_id': 'nunique'
        }).round(2)
        ml_attr.columns = ['attributed_revenue', 'attributed_journeys']
        ml_attr['attributed_conversions'] = ml_attr['attributed_journeys']
        attribution_models['ml_based'] = ml_attr
        
        # Model performance
        auc_score = roc_auc_score(y, attribution_scores)
        print(f"  📈 ML model AUC: {auc_score:.3f}")
        
        self.attribution_models = attribution_models
        
        print(f"✅ Attribution models built:")
        for model_name in attribution_models.keys():
            total_attributed = attribution_models[model_name]['attributed_revenue'].sum()
            print(f"  📊 {model_name.replace('_', ' ').title()}: ${total_attributed:,.0f} attributed revenue")
        
        return attribution_models
    
    def calculate_channel_performance(self):
        """Calculate comprehensive channel performance metrics"""
        
        print("\n📊 Calculating channel performance metrics...")
        
        journeys_df = self.customer_journeys
        
        # Channel-level metrics
        channel_metrics = journeys_df.groupby('channel').agg({
            'journey_id': 'nunique',
            'conversion_value': 'sum',
            'converted': 'sum',
            'channel_cost': 'sum'
        }).round(2)
        
        channel_metrics.columns = ['total_touchpoints', 'total_revenue', 'total_conversions', 'total_cost']
        
        # Calculate performance metrics
        channel_metrics['conversion_rate'] = (
            channel_metrics['total_conversions'] / channel_metrics['total_touchpoints']
        ).round(4)
        
        channel_metrics['cost_per_click'] = (
            channel_metrics['total_cost'] / channel_metrics['total_touchpoints']
        ).round(2)
        
        channel_metrics['cost_per_conversion'] = (
            channel_metrics['total_cost'] / channel_metrics['total_conversions']
        ).replace([np.inf, -np.inf], 0).round(2)
        
        channel_metrics['revenue_per_conversion'] = (
            channel_metrics['total_revenue'] / channel_metrics['total_conversions']
        ).replace([np.inf, -np.inf], 0).round(2)
        
        channel_metrics['roas'] = (  # Return on Ad Spend
            channel_metrics['total_revenue'] / channel_metrics['total_cost']
        ).replace([np.inf, -np.inf], 0).round(2)
        
        # Set ROAS for free channels (organic, direct, referral) to indicate their value
        free_channels = ['organic_search', 'direct', 'referral']
        for channel in free_channels:
            if channel in channel_metrics.index:
                channel_metrics.loc[channel, 'roas'] = 999.99  # Indicate "infinite" ROAS
        
        # Channel attribution comparison across models
        attribution_comparison = pd.DataFrame()
        for model_name, model_results in self.attribution_models.items():
            model_results_copy = model_results.copy()
            model_results_copy['model'] = model_name
            attribution_comparison = pd.concat([attribution_comparison, model_results_copy.reset_index()])
        
        # Pivot for easy comparison
        attribution_pivot = attribution_comparison.pivot(
            index='channel', 
            columns='model', 
            values='attributed_revenue'
        ).fillna(0).round(0)
        
        self.channel_performance = {
            'metrics': channel_metrics,
            'attribution_comparison': attribution_pivot
        }
        
        print(f"✅ Channel performance calculated:")
        print(f"  🎯 {len(channel_metrics)} channels analyzed")
        print(f"  📊 Best ROAS: {channel_metrics['roas'].max():.2f}")
        print(f"  💰 Total marketing spend: ${channel_metrics['total_cost'].sum():,.0f}")
        
        return channel_metrics, attribution_pivot
    
    def optimize_budget_allocation(self):
        """Optimize marketing budget allocation using attribution insights"""
        
        print("\n💰 Optimizing budget allocation...")
        
        channel_metrics = self.channel_performance['metrics']
        attribution_comparison = self.channel_performance['attribution_comparison']
        
        # Current budget allocation (based on current spend)
        total_current_spend = channel_metrics['total_cost'].sum()
        current_allocation = (channel_metrics['total_cost'] / total_current_spend * 100).round(1)
        
        # Optimal allocation based on multiple factors
        optimization_factors = {}
        
        for channel in channel_metrics.index:
            # Factor 1: ROAS efficiency
            roas = channel_metrics.loc[channel, 'roas']
            roas_score = min(roas / 5.0, 1.0) if roas < 999 else 1.0  # Normalize, cap at 1.0
            
            # Factor 2: Attribution consistency (how consistent across models)
            channel_attributions = attribution_comparison.loc[channel].values
            attribution_std = np.std(channel_attributions) / np.mean(channel_attributions) if np.mean(channel_attributions) > 0 else 1
            consistency_score = max(0, 1 - attribution_std)  # Lower std = higher consistency
            
            # Factor 3: Conversion rate
            conv_rate = channel_metrics.loc[channel, 'conversion_rate']
            conv_score = min(conv_rate / 0.1, 1.0)  # Normalize with 10% as max
            
            # Factor 4: Scale potential (inverse of current cost per click)
            cpc = channel_metrics.loc[channel, 'cost_per_click']
            scale_score = 1 / (1 + cpc / 5.0) if cpc > 0 else 1.0  # Lower CPC = more scalable
            
            # Combined optimization score
            optimization_score = (
                roas_score * 0.40 +
                consistency_score * 0.25 +
                conv_score * 0.20 +
                scale_score * 0.15
            )
            
            optimization_factors[channel] = {
                'roas_score': roas_score,
                'consistency_score': consistency_score,
                'conversion_score': conv_score,
                'scale_score': scale_score,
                'optimization_score': optimization_score,
                'current_allocation': current_allocation.get(channel, 0),
                'current_spend': channel_metrics.loc[channel, 'total_cost']
            }
        
        # Calculate optimal allocation
        total_optimization_score = sum([factors['optimization_score'] for factors in optimization_factors.values()])
        
        # Proposed budget allocation
        proposed_budget = 100000  # Assume $100K total budget
        
        budget_recommendations = []
        for channel, factors in optimization_factors.items():
            optimal_allocation = factors['optimization_score'] / total_optimization_score * 100
            optimal_budget = proposed_budget * optimal_allocation / 100
            
            current_budget = factors['current_spend']
            budget_change = optimal_budget - current_budget
            budget_change_pct = (budget_change / current_budget * 100) if current_budget > 0 else 0
            
            # Expected impact
            current_roas = channel_metrics.loc[channel, 'roas']
            expected_revenue_impact = budget_change * (current_roas if current_roas < 999 else 5.0)
            
            budget_recommendations.append({
                'channel': channel,
                'current_allocation_pct': factors['current_allocation'],
                'current_budget': current_budget,
                'optimal_allocation_pct': optimal_allocation,
                'optimal_budget': optimal_budget,
                'budget_change': budget_change,
                'budget_change_pct': budget_change_pct,
                'optimization_score': factors['optimization_score'],
                'expected_revenue_impact': expected_revenue_impact,
                'recommendation': self._get_budget_recommendation(budget_change_pct, factors['optimization_score'])
            })
        
        budget_df = pd.DataFrame(budget_recommendations)
        budget_df = budget_df.sort_values('optimization_score', ascending=False)
        
        print(f"✅ Budget optimization completed:")
        print(f"  💰 Total budget optimized: ${proposed_budget:,}")
        
        # Display top recommendations
        print(f"\n🎯 TOP BUDGET RECOMMENDATIONS:")
        for i, (_, rec) in enumerate(budget_df.head(3).iterrows(), 1):
            print(f"  {i}. {rec['channel'].replace('_', ' ').title()}")
            print(f"     📊 Change: {rec['budget_change_pct']:+.0f}% (${rec['budget_change']:+,.0f})")
            print(f"     💰 Expected impact: ${rec['expected_revenue_impact']:+,.0f}")
            print(f"     💡 {rec['recommendation']}")
        
        return budget_df
    
    def _get_budget_recommendation(self, change_pct, optimization_score):
        """Generate budget recommendation based on metrics"""
        if change_pct > 50 and optimization_score > 0.7:
            return "Significantly increase investment - high ROI potential"
        elif change_pct > 20 and optimization_score > 0.5:
            return "Increase investment - good performance"
        elif abs(change_pct) < 20:
            return "Maintain current investment level"
        elif change_pct < -20 and optimization_score < 0.3:
            return "Reduce investment - poor performance"
        else:
            return "Monitor closely - mixed signals"
    
    def create_attribution_summary(self):
        """Create a comprehensive summary of attribution analysis"""
        
        print("\n📊 Creating attribution analysis summary...")
        
        attribution_comparison = self.channel_performance['attribution_comparison']
        channel_metrics = self.channel_performance['metrics']
        
        summary = {
            'total_revenue': self.transaction_data['total_amount'].sum(),
            'total_transactions': len(self.transaction_data),
            'total_touchpoints': len(self.customer_journeys),
            'avg_journey_length': self.customer_journeys.groupby('journey_id')['touchpoint_number'].max().mean(),
            'conversion_rate': self.customer_journeys['converted'].mean(),
            'top_performing_channel': channel_metrics['roas'].idxmax(),
            'best_roas': channel_metrics['roas'].max(),
            'total_marketing_spend': channel_metrics['total_cost'].sum(),
            'overall_roas': channel_metrics['total_revenue'].sum() / channel_metrics['total_cost'].sum()
        }
        
        return summary

# Create sample data for attribution analysis
def create_attribution_sample_data():
    """Create sample e-commerce data for marketing attribution"""
    np.random.seed(42)
    
    n_transactions = 5000
    
    # Create date range with proper datetime objects
    date_range = pd.date_range(start='2024-01-01', periods=n_transactions, freq='4H')
    
    data = {
        'transaction_id': range(1, n_transactions + 1),
        'customer_id': np.random.randint(1, 3500, n_transactions),
        'date': date_range,
        'category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Home', 'Sports'], n_transactions),
        'total_amount': np.random.lognormal(3.5, 0.7, n_transactions),
        'rating': np.random.choice([1, 2, 3, 4, 5], n_transactions, p=[0.02, 0.08, 0.20, 0.45, 0.25]),
        'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n_transactions, p=[0.68, 0.28, 0.04]),
        'region': np.random.choice(['North America', 'Europe', 'Asia', 'Others'], n_transactions)
    }
    
    return pd.DataFrame(data)

# Initialize attribution analysis
print("📊 Creating sample data for attribution analysis...")
transaction_data = create_attribution_sample_data()
print(f"✅ Transaction data: {len(transaction_data)} purchases, ${transaction_data['total_amount'].sum():,.0f} revenue")

# Initialize analyzer
analyzer = MarketingAttributionAnalyzer(transaction_data)

# Run complete attribution analysis
print("\n🚀 Starting marketing attribution analysis...")

try:
    # 1. Simulate customer journeys
    customer_journeys = analyzer.simulate_customer_journeys()

    # 2. Build attribution models
    attribution_models = analyzer.build_attribution_models()

    # 3. Calculate channel performance
    channel_performance, attribution_comparison = analyzer.calculate_channel_performance()

    # 4. Optimize budget allocation
    budget_optimization = analyzer.optimize_budget_allocation()

    # 5. Create summary
    attribution_summary = analyzer.create_attribution_summary()

    print(f"\n✅ Marketing attribution analysis completed!")
    print(f"🎯 {len(customer_journeys)} customer touchpoints analyzed")
    print(f"📊 {len(attribution_models)} attribution models compared")
    print(f"💰 Overall ROAS: {attribution_summary['overall_roas']:.2f}x")
    print(f"🏆 Best performing channel: {attribution_summary['top_performing_channel'].replace('_', ' ').title()}")

    # Display key metrics
    print(f"\n📈 ATTRIBUTION MODEL COMPARISON:")
    total_revenues = attribution_comparison.sum()
    for model, revenue in total_revenues.items():
        print(f"  📊 {model.replace('_', ' ').title()}: ${revenue:,.0f}")

    print(f"\n🎯 TOP CHANNEL PERFORMANCE:")
    channel_performance_sorted = channel_performance.sort_values('roas', ascending=False)
    for i, (channel, metrics) in enumerate(channel_performance_sorted.head(5).iterrows(), 1):
        roas_display = f"{metrics['roas']:.1f}x" if metrics['roas'] < 999 else "∞"
        print(f"  {i}. {channel.replace('_', ' ').title()}: {roas_display} ROAS, {metrics['conversion_rate']:.1%} conversion")
        
    # Display budget recommendations
    print(f"\n💰 BUDGET OPTIMIZATION SUMMARY:")
    for i, (_, rec) in enumerate(budget_optimization.head(3).iterrows(), 1):
        print(f"  {i}. {rec['channel'].replace('_', ' ').title()}: {rec['budget_change_pct']:+.0f}% change recommended")

except Exception as e:
    print(f"❌ Error in analysis: {str(e)}")
    print("Please check the data and try again.")

📈 MARKETING ATTRIBUTION ANALYSIS
📊 Creating sample data for attribution analysis...
✅ Transaction data: 5000 purchases, $209,787 revenue

🚀 Starting marketing attribution analysis...
🔍 Simulating customer journey data...
✅ Customer journey data simulated:
  🎯 5000 converting journeys
  📊 12332 non-converting journeys
  📈 35181 total touchpoints

🤖 Building attribution models...
  🧠 Training ML attribution model...
  📈 ML model AUC: 1.000
✅ Attribution models built:
  📊 First Touch: $209,787 attributed revenue
  📊 Last Touch: $209,787 attributed revenue
  📊 Linear: $209,787 attributed revenue
  📊 Time Decay: $209,787 attributed revenue
  📊 Ml Based: $209,787 attributed revenue

📊 Calculating channel performance metrics...
✅ Channel performance calculated:
  🎯 8 channels analyzed
  📊 Best ROAS: 999.99
  💰 Total marketing spend: $49,901

💰 Optimizing budget allocation...
✅ Budget optimization completed:
  💰 Total budget optimized: $100,000

🎯 TOP BUDGET RECOMMENDATIONS:
  1. Direct
     📊