# Data Generation and Extraction
This notebook contains the code to obtain the daily click-to-purchase funnel data

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

## SQL Query for Data Extraction
This is the query that would be run against your data warehouse

In [None]:
query = """
WITH daily_clicks AS (
    SELECT 
        DATE(occurred_at) as activity_date,
        COUNT(DISTINCT interaction_id) as total_clicks,
        COUNT(DISTINCT user_id) as unique_clickers
    FROM clicks
    WHERE occurred_at BETWEEN '2025-03-01' AND '2025-09-30'
    GROUP BY 1
),
daily_purchases AS (
    SELECT 
        DATE(purchased_at) as activity_date,
        COUNT(DISTINCT purchase_id) as total_purchases,
        SUM(quantity * unit_price) as total_revenue,
        AVG(quantity * unit_price) as aov
    FROM purchases
    WHERE purchased_at BETWEEN '2025-03-01' AND '2025-09-30'
    GROUP BY 1
)
SELECT 
    COALESCE(c.activity_date, p.activity_date) as activity_date,
    COALESCE(c.total_clicks, 0) as total_clicks,
    COALESCE(p.total_purchases, 0) as total_purchases,
    COALESCE(p.aov, 0) as aov,
    COALESCE(p.total_revenue, 0) as total_revenue
FROM daily_clicks c
FULL OUTER JOIN daily_purchases p ON c.activity_date = p.activity_date
ORDER BY 1
"""

print("SQL Query prepared for data extraction")

## Load Existing Data

In [None]:
# Load the data
display_df = pd.read_csv('daily_click_to_purchase_funnel_summary.csv')

print("--- Daily Click-to-Purchase Funnel Summary ---")
print(f"Data loaded: {len(display_df)} days")
print(f"Date range: {display_df['activity_date'].min()} to {display_df['activity_date'].max()}")
print("\nSample data:")
display_df.head(10)

## Data Statistics

In [None]:
# Summary statistics
print("Summary Statistics:")
print(display_df[['total_clicks', 'total_purchases', 'aov', 'total_revenue']].describe())

# Calculate conversion rate
display_df['conversion_rate_calc'] = (display_df['total_purchases'] / display_df['total_clicks'] * 100).round(2)
print(f"\nAverage conversion rate: {display_df['conversion_rate_calc'].mean():.2f}%")
print(f"Average AOV: ${display_df['aov'].mean():,.2f}")

## Export Data

In [None]:
# Save the data (already exists, so commenting out)
# display_df.to_csv('daily_click_to_purchase_funnel_summary.csv', index=False)
# print("Data saved to: daily_click_to_purchase_funnel_summary.csv")

print("Data is ready for ARDL analysis!")
print("\nTo run the analysis, use:")
print("  python3 main.py")