In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Generate dates for 2022-2023
start_date = datetime(2022, 1, 1)
dates = [start_date + timedelta(days=x) for x in range(365*2)]  # 2 years of data

# Define channels and campaign types
channels = ['Facebook', 'Google Ads', 'TikTok', 'Instagram', 'YouTube', 'Twitter']
campaign_types = ['App_Install', 'Brand_Awareness', 'Lead_Generation', 'Retargeting', 'Promotional']
regions = ['US_West', 'US_East', 'EU', 'APAC', 'LATAM']

# Define base revenue per conversion by campaign type
revenue_by_campaign = {
    'App_Install': {'base': 45, 'variance': 15},
    'Brand_Awareness': {'base': 30, 'variance': 10},
    'Lead_Generation': {'base': 120, 'variance': 40},
    'Retargeting': {'base': 85, 'variance': 25},
    'Promotional': {'base': 65, 'variance': 20}
}

# Regional revenue multipliers
region_multipliers = {
    'US_West': 1.2,
    'US_East': 1.1,
    'EU': 1.0,
    'APAC': 0.9,
    'LATAM': 0.8
}

# Create combinations for 50K rows
data = []
for date in dates:
    for channel in channels:
        for campaign_type in campaign_types:
            for region in regions:
                if np.random.random() > 0.6:  # Random sampling to get around 50K rows
                    # Base metrics with random variations
                    base_impressions = np.random.normal(100000, 20000)
                    ctr = np.random.normal(0.03, 0.01)
                    clicks = int(base_impressions * ctr)
                    conv_rate = np.random.normal(0.08, 0.02)
                    conversions = int(clicks * conv_rate)
                    install_rate = np.random.normal(0.80, 0.05)
                    installs = int(conversions * install_rate)
                    
                    # Calculate revenue
                    base_revenue_per_conv = revenue_by_campaign[campaign_type]['base']
                    revenue_variance = revenue_by_campaign[campaign_type]['variance']
                    avg_revenue_per_conv = np.random.normal(base_revenue_per_conv, revenue_variance)
                    region_multiplier = region_multipliers[region]
                    
                    # Total revenue with seasonal adjustments
                    base_revenue = conversions * avg_revenue_per_conv * region_multiplier
                    
                    # Seasonal revenue adjustments
                    month = date.month
                    if month in [11, 12]:  # Holiday season
                        base_revenue *= 1.4
                    elif month in [1, 2]:  # Post-holiday season
                        base_revenue *= 0.8
                    elif month in [6, 7]:  # Summer season
                        base_revenue *= 0.9
                    
                    # Costs vary by channel
                    if channel == 'Google Ads':
                        cpc = np.random.normal(0.5, 0.1)
                    elif channel in ['Facebook', 'Instagram']:
                        cpc = np.random.normal(0.35, 0.08)
                    else:
                        cpc = np.random.normal(0.45, 0.09)
                    
                    cost = clicks * cpc
                    
                    # Add seasonal variations to base metrics
                    if month in [11, 12]:  # Holiday season
                        base_impressions *= 1.3
                        conv_rate *= 1.2
                    elif month in [6, 7]:  # Summer season
                        base_impressions *= 0.9
                        
                    # Create row
                    row = {
                        'Date': date.strftime('%Y-%m-%d'),
                        'month_year': date.strftime('%Y-%m'),
                        'Channel': channel,
                        'Campaign_Type': campaign_type,
                        'Region': region,
                        'Campaign_Name': f"{region}_{campaign_type}_{date.strftime('%Y%m')}",
                        'Impressions': int(base_impressions),
                        'Clicks': clicks,
                        'CTR': round(clicks/base_impressions * 100, 2),
                        'Conversions': conversions,
                        'Installs': installs,
                        'Cost': round(cost, 2),
                        'Revenue': round(base_revenue, 2),
                        'ROAS': round(base_revenue/cost if cost > 0 else 0, 2),
                        'Profit': round(base_revenue - cost, 2),
                        'CPC': round(cost/clicks if clicks > 0 else 0, 2),
                        'CPI': round(cost/installs if installs > 0 else 0, 2),
                        'CPM': round(cost/base_impressions * 1000, 2),
                        'Revenue_Per_Conversion': round(base_revenue/conversions if conversions > 0 else 0, 2)
                    }
                    data.append(row)

# Create DataFrame
mktg = pd.DataFrame(data)

# Save to CSV
mktg.to_csv('marketing_performance_2022_2023.csv', index=False)

# Print sample and stats
mktg.head(5)

Unnamed: 0,Date,month_year,Channel,Campaign_Type,Region,Campaign_Name,Impressions,Clicks,CTR,Conversions,Installs,Cost,Revenue,ROAS,Profit,CPC,CPI,CPM,Revenue_Per_Conversion
0,2022-01-01,2022-01,Facebook,App_Install,US_East,US_East_App_Install_202201,112953,5108,4.52,384,302,2101.4,23211.11,11.05,21109.71,0.41,6.96,18.6,60.45
1,2022-01-01,2022-01,Facebook,App_Install,APAC,APAC_App_Install_202201,90610,3209,3.54,226,175,631.97,7912.98,12.52,7281.01,0.2,3.61,6.97,35.01
2,2022-01-01,2022-01,Facebook,Brand_Awareness,US_East,US_East_Brand_Awareness_202201,91439,2064,2.26,136,94,821.1,2837.04,3.46,2015.95,0.4,8.74,8.98,20.86
3,2022-01-01,2022-01,Facebook,Lead_Generation,US_West,US_West_Lead_Generation_202201,87994,3473,3.95,298,228,1170.61,22639.77,19.34,21469.17,0.34,5.13,13.3,75.97
4,2022-01-01,2022-01,Facebook,Lead_Generation,APAC,APAC_Lead_Generation_202201,116450,2071,1.78,174,122,757.47,8377.79,11.06,7620.33,0.37,6.21,6.5,48.15
