# AB Testing Platform - Analysis Layer Setup

This notebook sets up our Iceberg table for storing and analyzing experiment results data.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

# Initialize Spark Session
spark = SparkSession.builder \
    .appName("AB Testing Platform Setup") \
    .getOrCreate()

## Create Experiment Results Table

This table stores the raw experiment data for analysis.

In [None]:
# Create the experiment results table
spark.sql("""
CREATE TABLE IF NOT EXISTS experiments.results (
    experiment_id STRING,
    variant STRING,
    user_id STRING,
    timestamp TIMESTAMP,
    value DOUBLE,
    metadata MAP<STRING, STRING>
)
USING iceberg
PARTITIONED BY (days(timestamp), experiment_id)
""")

## Create Analysis View

This view helps calculate key metrics for our experiments.

In [None]:
# Create a view for experiment analysis
spark.sql("""
CREATE OR REPLACE VIEW experiments.results_summary AS
SELECT 
    experiment_id,
    variant,
    COUNT(DISTINCT user_id) as users,
    COUNT(*) as observations,
    AVG(value) as mean,
    STDDEV(value) as std_dev,
    MIN(value) as min_value,
    MAX(value) as max_value
FROM experiments.results
GROUP BY experiment_id, variant
""")