In [ ]:
print("Webhook Analytics Data Models")
print("=" * 50)

print("Data models available in: data_models/11_webhook_events.sql")

# Show the Delta Lake table structure
webhook_tables = {
    "webhook_raw_events": "Raw audit trail of all webhook events",
    "webhook_email_events": "Email delivery and engagement events", 
    "webhook_customer_events": "Customer subscription and preference events",
    "webhook_sms_events": "SMS delivery and engagement events",
    "webhook_push_events": "Push notification events",
    "webhook_in_app_events": "In-app message events",
    "webhook_slack_events": "Slack message events",
    "webhook_webhook_events": "Webhook delivery tracking events"
}

print("\nDelta Lake Tables:")
for table, description in webhook_tables.items():
    print(f"   {table}")
    print(f"      {description}")

print("\nAnalytics Views Available:")
analytics_views = [
    "daily_email_engagement - Daily email performance metrics",
    "campaign_performance - Campaign-level analytics", 
    "customer_engagement_summary - Cross-channel customer engagement",
    "webhook_error_summary - Error monitoring and alerts",
    "journey_performance - Journey and flow analytics"
]

for view in analytics_views:
    print(f"   {view}")

print("\nExample Analytics Queries:")

# Email engagement rates
print("\nEmail Engagement Rates:")
email_query = '''
SELECT 
    event_date,
    SUM(CASE WHEN metric = 'delivered' THEN 1 ELSE 0 END) as emails_delivered,
    SUM(CASE WHEN metric = 'opened' THEN 1 ELSE 0 END) as emails_opened,
    SUM(CASE WHEN metric = 'clicked' THEN 1 ELSE 0 END) as emails_clicked,
    ROUND(SUM(CASE WHEN metric = 'opened' THEN 1 ELSE 0 END) * 100.0 / 
          SUM(CASE WHEN metric = 'delivered' THEN 1 ELSE 0 END), 2) as open_rate,
    ROUND(SUM(CASE WHEN metric = 'clicked' THEN 1 ELSE 0 END) * 100.0 / 
          SUM(CASE WHEN metric = 'delivered' THEN 1 ELSE 0 END), 2) as click_rate
FROM webhook_email_events
WHERE event_date >= CURRENT_DATE() - INTERVAL 7 DAYS
GROUP BY event_date
ORDER BY event_date DESC
'''
print(email_query)

# Customer engagement summary
print("\nCustomer Engagement Summary:")
customer_query = '''
SELECT 
    customer_id,
    COUNT(DISTINCT object_type) as channels_used,
    SUM(CASE WHEN metric IN ('opened', 'clicked') THEN 1 ELSE 0 END) as engagement_events,
    MAX(event_date) as last_interaction_date
FROM (
    SELECT customer_id, object_type, metric, event_date FROM webhook_email_events
    UNION ALL
    SELECT customer_id, object_type, metric, event_date FROM webhook_sms_events  
    UNION ALL
    SELECT customer_id, object_type, metric, event_date FROM webhook_push_events
) all_events
WHERE customer_id IS NOT NULL
GROUP BY customer_id
HAVING engagement_events > 0
ORDER BY engagement_events DESC
LIMIT 10
'''
print(customer_query)

print("\nKey Analytics Use Cases:")
print("SUCCESS: Real-time delivery monitoring")
print("SUCCESS: Campaign performance optimization") 
print("SUCCESS: Customer engagement scoring")
print("SUCCESS: Channel effectiveness analysis")
print("SUCCESS: Journey and flow analytics")
print("SUCCESS: Error monitoring and alerting")
print("SUCCESS: Conversion attribution")
print("SUCCESS: Customer lifecycle analysis")

print("\nFor complete data model documentation:")
print("   See: data_models/README.md")
print("   Schema: data_models/11_webhook_events.sql")

## Analytics and Reporting

The webhook events are stored in Delta Lake tables for comprehensive analytics.

In [ ]:
print("Databricks App Deployment Guide")
print("=" * 50)

print("The webhook processor is available as a complete Databricks App in:")
print("databricks_app/")
print("   ├── app.py                    # Main Flask application")
print("   ├── config.py                 # Configuration and secrets")
print("   ├── requirements.txt          # Dependencies") 
print("   ├── databricks.yml            # Deployment configuration")
print("   ├── test_webhook.py           # Testing utilities")
print("   └── README.md                 # Complete documentation")

print("\nDeployment Steps:")
print("1. Set up webhook signing secret in Databricks:")
print("   databricks secrets create-scope --scope customerio")
print("   databricks secrets put --scope customerio --key webhook_signing_key")

print("\n2. Deploy the app:")
print("   cd databricks_app")
print("   databricks apps deploy --config databricks.yml")

print("\n3. Get the app URL:")
print("   databricks apps list")
print("   # Use the URL for webhook configuration")

print("\n4. Test the deployment:")
print("   export CUSTOMERIO_WEBHOOK_SECRET='your_signing_key'")
print("   export WEBHOOK_BASE_URL='https://your-app-url'")
print("   python test_webhook.py")

print("\nApp Features:")
print("SUCCESS: HMAC-SHA256 signature verification")
print("SUCCESS: All 7 Customer.IO event types supported")
print("SUCCESS: Delta Lake storage integration")
print("SUCCESS: Health check endpoints")
print("SUCCESS: Comprehensive error handling")
print("SUCCESS: Production-ready logging")

print("\nMonitoring:")
print("Health endpoint: /health")
print("Event processing logs in Databricks")
print("Raw events stored in: /mnt/customerio/webhook_landing/")
print("Processed events in: /mnt/customerio/processed_events/")

print("\nSecurity:")
print("SUCCESS: Signature verification (prevents unauthorized access)")
print("SUCCESS: Timestamp validation (prevents replay attacks)")
print("SUCCESS: Secrets stored in Databricks secret scope")
print("SUCCESS: No hardcoded credentials in code")

print("\nFor complete deployment instructions, see:")
print("   databricks_app/README.md")

## Databricks App Deployment

Deploy the webhook receiver as a Databricks App for production use.

In [ ]:
# Webhook Configuration Example
# Note: This requires actual Customer.IO App API credentials

# Example configuration (replace with your actual values)
CUSTOMERIO_APP_API_TOKEN = "your_app_api_token_here"
DATABRICKS_WEBHOOK_URL = "https://your-databricks-app.cloud.databricks.com/webhook/customerio"

print("Webhook Configuration Manager")
print("=" * 50)

# Initialize webhook manager
webhook_manager = CustomerIOWebhookManager(
    api_token=CUSTOMERIO_APP_API_TOKEN,
    region="us"  # or "eu"
)

print(f"SUCCESS: Webhook manager initialized")
print(f"Region: US")
print(f"Target URL: {DATABRICKS_WEBHOOK_URL}")

# Example webhook events to subscribe to
webhook_events = [
    # Email events (most common)
    "email_sent", "email_delivered", "email_opened", "email_clicked",
    "email_bounced", "email_unsubscribed",
    
    # Customer events
    "customer_subscribed", "customer_unsubscribed",
    
    # SMS events
    "sms_sent", "sms_delivered", "sms_clicked",
    
    # Push events
    "push_sent", "push_delivered", "push_opened"
]

print(f"\nWebhook Events Configuration:")
print(f"   Total events: {len(webhook_events)}")
print(f"   Email events: {len([e for e in webhook_events if e.startswith('email')])}")
print(f"   Customer events: {len([e for e in webhook_events if e.startswith('customer')])}")
print(f"   SMS events: {len([e for e in webhook_events if e.startswith('sms')])}")
print(f"   Push events: {len([e for e in webhook_events if e.startswith('push')])}")

# Simulated webhook creation (requires real API token)
print(f"\nWebhook Setup Instructions:")
print(f"1. Set your Customer.IO App API token")
print(f"2. Deploy the Databricks App")
print(f"3. Get the webhook URL from Databricks")
print(f"4. Run: setup_databricks_webhook(api_token, webhook_url)")
print(f"5. Save the webhook signing secret for authentication")

# Example of what the setup would return
webhook_config_example = {
    "id": "webhook_12345",
    "name": "Databricks Analytics Webhook", 
    "endpoint": DATABRICKS_WEBHOOK_URL,
    "events": webhook_events,
    "disabled": False,
    "full_resolution": True,
    "with_content": True
}

print(f"\nExample Webhook Configuration:")
for key, value in webhook_config_example.items():
    if key == "events":
        print(f"   {key}: {len(value)} events subscribed")
    else:
        print(f"   {key}: {value}")

print(f"\nIMPORTANT:")
print(f"   Save the webhook signing secret for signature verification!")
print(f"   Store it securely in Databricks secrets or environment variables.")

## Webhook Configuration Management

Set up and manage Customer.IO webhooks using the configuration manager.

# Customer.IO Webhook Processing

This notebook demonstrates how to process Customer.IO webhooks using the complete webhook implementation, including:

- **Webhook Authentication**: HMAC-SHA256 signature verification per Customer.IO specification
- **Event Processing**: Handling all 7 Customer.IO event types (email, SMS, push, in-app, customer, Slack, webhook)
- **Databricks App Integration**: Setting up webhook endpoints for real-time event processing
- **Analytics Integration**: Processing events into Delta Lake for analytics and reporting

## Architecture Overview

```
Customer.IO → Webhook → Databricks App → Event Handlers → Delta Lake → Analytics
```

## Setup

First, let's import the complete webhook processing utilities.

In [ ]:
# Import webhook processing modules
from src.webhooks import (
    verify_signature,
    parse_event, 
    get_event_type,
    validate_webhook_headers,
    route_webhook_event,
    get_event_handler,
    CustomerIOWebhookManager,
    setup_databricks_webhook
)
import json
import time
import hmac
import hashlib

print("SUCCESS: Customer.IO Webhook processing utilities imported successfully")
print("SUCCESS: Event handlers for all 7 object types available")
print("SUCCESS: Webhook configuration manager ready")
print("SUCCESS: Ready for webhook processing!")

## Webhook Signature Verification

Customer.IO uses HMAC-SHA256 with the format `v0:timestamp:body` for webhook authentication.

In [ ]:
# Example webhook payload from Customer.IO
webhook_payload = json.dumps({
    "event_id": "01E4C4CT6YDC7Y5M7FE1GWWPQJ",
    "object_type": "email",
    "metric": "opened",
    "timestamp": int(time.time()),
    "data": {
        "customer_id": "12345",
        "delivery_id": "RPILAgUBcRhIBqSfeiIwdIYJKxTY",
        "campaign_id": 123,
        "action_id": 456,
        "subject": "Test Email Subject",
        "recipient": "test@example.com",
        "identifiers": {
            "id": "12345",
            "email": "test@example.com",
            "cio_id": "cio_03000001"
        }
    }
})

# Webhook configuration
webhook_secret = "your_webhook_secret_here"
timestamp = str(int(time.time()))

# Generate Customer.IO signature (v0:timestamp:body format)
signature_string = f"v0:{timestamp}:{webhook_payload}"
signature_hash = hmac.new(
    webhook_secret.encode('utf-8'),
    signature_string.encode('utf-8'),
    hashlib.sha256
).hexdigest()
webhook_signature = f"v0={signature_hash}"

print(f"Webhook payload: {len(webhook_payload)} characters")
print(f"Generated signature: {webhook_signature}")
print(f"Timestamp: {timestamp}")

# Verify the signature using our utility
is_valid = verify_signature(
    payload=webhook_payload,
    signature=webhook_signature,
    timestamp=timestamp,
    secret=webhook_secret
)

print(f"\n{'SUCCESS' if is_valid else 'ERROR'}: Webhook signature valid: {is_valid}")

# Test with invalid signature
invalid_signature = "v0=invalid_signature_hash"
is_invalid = verify_signature(webhook_payload, invalid_signature, timestamp, webhook_secret)
print(f"{'SUCCESS' if not is_invalid else 'ERROR'}: Invalid signature correctly rejected: {not is_invalid}")

## Event Parsing and Routing

Customer.IO uses `object_type` as the discriminator and `metric` for specific event types.

In [ ]:
# Parse the webhook event
event_data = parse_event(webhook_payload)
print(f"Parsed event data:")
print(f"   Event ID: {event_data.get('event_id')}")
print(f"   Object Type: {event_data.get('object_type')}")
print(f"   Metric: {event_data.get('metric')}")
print(f"   Customer ID: {event_data.get('data', {}).get('customer_id')}")

# Get the event type using Customer.IO format
object_type, metric = get_event_type(event_data)
print(f"\nEvent analysis:")
print(f"   Object Type: {object_type}")
print(f"   Metric: {metric}")

# Route the event
routed_event = route_webhook_event(event_data)
print(f"\nEvent routing:")
print(f"   Routed to: {routed_event['object_type']} handler")
print(f"   Metric: {routed_event['metric']}")
print(f"   Handler available: {'SUCCESS' if routed_event['object_type'] in ['email', 'customer', 'sms', 'push', 'in_app', 'slack', 'webhook'] else 'ERROR'}")

# Demonstrate header validation
headers = {
    "X-CIO-Timestamp": timestamp,
    "X-CIO-Signature": webhook_signature,
    "Content-Type": "application/json"
}

try:
    validated_timestamp, validated_signature = validate_webhook_headers(headers)
    print(f"\nHeader validation:")
    print(f"   Timestamp: {validated_timestamp}")
    print(f"   Signature: {validated_signature[:20]}...")
    print(f"   SUCCESS: Headers valid")
except ValueError as e:
    print(f"   ERROR: Header validation failed: {e}")

## Event Processing with Handlers

Demonstrate processing different types of Customer.IO webhook events using our event handlers.

In [ ]:
# Example 1: Email Opened Event
print("Processing Email Opened Event")
print("=" * 50)

email_handler = get_event_handler("email")
processed_email = email_handler.handle_event(event_data)

print(f"Event ID: {processed_email['event_id']}")
print(f"Customer: {processed_email['customer_id']}")
print(f"Email: {processed_email['email_address']}")
print(f"Subject: {processed_email['subject']}")
print(f"Campaign: {processed_email['campaign_id']}")
print(f"Processed At: {processed_email['processed_at']}")

# Example 2: Customer Subscription Event
print("\nProcessing Customer Subscription Event")
print("=" * 50)

customer_event = {
    "event_id": "01E4C4CT6YDC7Y5M7FE1GWWPQJ",
    "object_type": "customer",
    "metric": "subscribed",
    "timestamp": int(time.time()),
    "data": {
        "customer_id": "12345",
        "email_address": "test@example.com",
        "identifiers": {
            "id": "12345",
            "email": "test@example.com",
            "cio_id": "cio_03000001"
        }
    }
}

customer_handler = get_event_handler("customer")
processed_customer = customer_handler.handle_event(customer_event)

print(f"Event ID: {processed_customer['event_id']}")
print(f"Customer: {processed_customer['customer_id']}")
print(f"Email: {processed_customer['email_address']}")
print(f"Metric: {processed_customer['metric']}")

# Example 3: SMS Clicked Event
print("\nProcessing SMS Clicked Event")
print("=" * 50)

sms_event = {
    "event_id": "01E4C4CT6YDC7Y5M7FE1GWWPQJ",
    "object_type": "sms",
    "metric": "clicked",
    "timestamp": int(time.time()),
    "data": {
        "customer_id": "12345",
        "delivery_id": "sms_123",
        "recipient": "+1234567890",
        "href": "https://example.com/promo",
        "link_id": "link_456",
        "identifiers": {"id": "12345"}
    }
}

sms_handler = get_event_handler("sms")
processed_sms = sms_handler.handle_event(sms_event)

print(f"Event ID: {processed_sms['event_id']}")
print(f"Customer: {processed_sms['customer_id']}")
print(f"Phone: {processed_sms['recipient']}")
print(f"Link: {processed_sms['href']}")
print(f"Link ID: {processed_sms['link_id']}")

# Example 4: Push Notification Event
print("\nProcessing Push Notification Event")
print("=" * 50)

push_event = {
    "event_id": "01E4C4CT6YDC7Y5M7FE1GWWPQJ",
    "object_type": "push",
    "metric": "opened",
    "timestamp": int(time.time()),
    "data": {
        "customer_id": "12345",
        "delivery_id": "push_123",
        "recipients": [
            {"device_id": "device_abc", "device_platform": "ios"},
            {"device_id": "device_xyz", "device_platform": "android"}
        ],
        "identifiers": {"id": "12345"}
    }
}

push_handler = get_event_handler("push")
processed_push = push_handler.handle_event(push_event)

print(f"Event ID: {processed_push['event_id']}")
print(f"Customer: {processed_push['customer_id']}")
print(f"Primary Device: {processed_push['primary_device_id']}")
print(f"Total Recipients: {len(json.loads(processed_push['recipients']))}")

print("\nSUCCESS: All webhook events processed successfully!")
print("Events are ready for storage in Delta Lake tables")