# Real Campaign Data Analysis

This notebook analyzes the actual marketing campaign A/B test data from August 2019.
Unlike the simulation walkthrough, this uses real messy data with:
- Missing values (Aug 5, 2019)
- Aggregated daily metrics
- Real business KPIs

In [None]:
# Cell 2 - Setup
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from real_data_analyzer import RealDataAnalyzer

sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [12, 6]

analyzer = RealDataAnalyzer()
analyzer.load_data()
print("✅ Data loaded successfully")

In [None]:
# Cell 3 - Data Quality
quality = analyzer.get_data_quality_report()
print("Data Quality Report:")
print(f"  Date range: {quality['date_range']['start'].strftime('%Y-%m-%d')} to {quality['date_range']['end'].strftime('%Y-%m-%d')}")
print(f"  Control: {quality['control_rows']} days, {quality['control_missing_values']} missing values")
print(f"  Test: {quality['test_rows']} days, {quality['test_missing_values']} missing values")

if quality['control_missing_dates']:
    print(f"  ⚠️ Control missing dates: {quality['control_missing_dates']}")

In [None]:
# Cell 4 - Time Series Plot
daily = analyzer.get_daily_metrics()
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Conversion Rate over time
for campaign in ['control', 'test']:
    data = daily[daily['campaign'] == campaign]
    axes[0, 0].plot(data['Date'], data['conversion_rate'], label=campaign, marker='o', markersize=4)
axes[0, 0].set_title('Conversion Rate Over Time')
axes[0, 0].set_ylabel('Conversion Rate')
axes[0, 0].legend()
axes[0, 0].tick_params(axis='x', rotation=45)

# CTR over time
for campaign in ['control', 'test']:
    data = daily[daily['campaign'] == campaign]
    axes[0, 1].plot(data['Date'], data['ctr'], label=campaign, marker='o', markersize=4)
axes[0, 1].set_title('Click-Through Rate Over Time')
axes[0, 1].set_ylabel('CTR')
axes[0, 1].legend()
axes[0, 1].tick_params(axis='x', rotation=45)

# Cumulative Purchases
for campaign in ['control', 'test']:
    data = daily[daily['campaign'] == campaign].sort_values('Date')
    axes[1, 0].plot(data['Date'], data['Purchase'].cumsum(), label=campaign)
axes[1, 0].set_title('Cumulative Purchases')
axes[1, 0].set_ylabel('Total Purchases')
axes[1, 0].legend()
axes[1, 0].tick_params(axis='x', rotation=45)

# Daily Spend
width = 0.35
control_data = daily[daily['campaign'] == 'control'].sort_values('Date')
test_data = daily[daily['campaign'] == 'test'].sort_values('Date')
x = np.arange(len(control_data))
axes[1, 1].bar(x - width/2, control_data['Spend_USD'].values, width, alpha=0.7, label='control')
axes[1, 1].bar(x + width/2, test_data['Spend_USD'].values, width, alpha=0.7, label='test')
axes[1, 1].set_title('Daily Spend')
axes[1, 1].set_ylabel('Spend (USD)')
axes[1, 1].legend()
axes[1, 1].set_xticks(x[::5])
axes[1, 1].set_xticklabels([d.strftime('%m/%d') for d in control_data['Date'].values[::5]], rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Cell 5 - Statistical Comparison
result = analyzer.compare_campaigns()
print(result)

In [None]:
# Cell 6 - Funnel Visualization
fig, ax = plt.subplots(figsize=(10, 6))

metrics = ['Impressions', 'Clicks', 'Purchases']
control_values = [
    result.control_metrics.total_impressions,
    result.control_metrics.total_clicks,
    result.control_metrics.total_purchases
]
test_values = [
    result.test_metrics.total_impressions,
    result.test_metrics.total_clicks,
    result.test_metrics.total_purchases
]

x = np.arange(len(metrics))
width = 0.35

bars1 = ax.bar(x - width/2, control_values, width, label='Control', color='steelblue')
bars2 = ax.bar(x + width/2, test_values, width, label='Test', color='seagreen')

ax.set_ylabel('Count')
ax.set_title('Marketing Funnel: Control vs Test')
ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.legend()
ax.set_yscale('log')

plt.tight_layout()
plt.show()

In [None]:
# Cell 7 - Summary and Recommendation
print("=" * 50)
print("ANALYSIS SUMMARY")
print("=" * 50)
print(f"\nConversion Rate:")
print(f"  Control: {result.control_metrics.conversion_rate:.2%}")
print(f"  Test: {result.test_metrics.conversion_rate:.2%}")
print(f"  Lift: {result.conversion_lift_relative:+.2%} (p={result.conversion_p_value:.4f})")

print(f"\nCost Per Purchase:")
print(f"  Control: ${result.control_metrics.cost_per_purchase:.2f}")
print(f"  Test: ${result.test_metrics.cost_per_purchase:.2f}")
print(f"  Change: {result.cost_per_purchase_change:+.2%}")

print("\n" + "=" * 50)
if result.conversion_significant:
    if result.conversion_lift_relative > 0:
        print("✅ RECOMMENDATION: Ship the test variant")
    else:
        print("❌ RECOMMENDATION: Revert to control")
else:
    print("⚠️ RECOMMENDATION: Continue testing (not significant)")
print("=" * 50)