In [None]:
import os
import shutil
import subprocess
import sys
from pathlib import Path


def _run(cmd):
    print(f">> {cmd}")
    subprocess.check_call(cmd, shell=True)


is_colab = "COLAB_RELEASE_TAG" in os.environ or "google.colab" in sys.modules
repo_root = Path.cwd()
is_repo = (repo_root / "pyproject.toml").exists() and (repo_root / "sidemantic").exists()

if is_repo:
    if shutil.which("uv"):
        _run('uv pip install -e ".[widget]"')
    else:
        _run('pip install -e ".[widget]"')
else:
    if shutil.which("uv"):
        _run('uv pip install "sidemantic[widget]"')
    else:
        _run('pip install "sidemantic[widget]"')

if is_colab:
    from google.colab import output

    output.enable_custom_widget_manager()

In [2]:
import duckdb

# Load real auction data (~1.4M rows)
conn = duckdb.connect(":memory:")
conn.execute("""
create table auctions as
select *
from 'https://sampledata.sidequery.dev/sidemantic-demo/auction_data.parquet'
--cross join range(2)
""")

# Preview
conn.execute("SELECT * FROM auctions LIMIT 5")

<duckdb.duckdb.DuckDBPyConnection at 0x10a363930>

In [3]:
from sidemantic.widget import MetricsExplorer

# Simple mode: just pass data (DuckDB relation)
max_cardinality = 50 if "COLAB_RELEASE_TAG" in os.environ else None
widget = MetricsExplorer(conn.table("auctions"), max_dimension_cardinality=max_cardinality)
widget

<sidemantic.widget._widget.MetricsExplorer object at 0x10a33df40>

In [4]:
# With cardinality threshold - skip dimensions with more than 50 unique values
widget2 = MetricsExplorer(conn.table("auctions"), max_dimension_cardinality=50)
widget2

<sidemantic.widget._widget.MetricsExplorer object at 0x115ba5580>

In [5]:
from sidemantic import Dimension, Metric, Model, SemanticLayer
from sidemantic.db.duckdb import DuckDBAdapter

# Define semantic model for auction analytics
auction_model = Model(
    name="auctions",
    table="auctions_with_id",
    primary_key="id",
    default_time_dimension="__time",  # Widget uses this for sparklines automatically!
    dimensions=[
        Dimension(name="__time", type="time", granularity="day"),
        Dimension(name="device_type", type="categorical"),
        Dimension(name="device_os", type="categorical"),
        Dimension(name="app_or_site", type="categorical"),
        Dimension(name="ad_position", type="categorical"),
        Dimension(name="placement_type", type="categorical"),
        Dimension(name="auction_type", type="categorical"),
        Dimension(name="bid_floor_bucket", type="categorical"),
        Dimension(name="platform_browser", type="categorical"),
        Dimension(name="video_max_duration_bucket", type="categorical"),
        Dimension(name="video_min_duration_bucket", type="categorical"),
        Dimension(name="ad_size", type="categorical"),
        Dimension(name="device_region", type="categorical"),
        Dimension(name="device_osv", type="categorical"),
        Dimension(name="app_site_cat", type="categorical"),
    ],
    metrics=[
        Metric(name="bid_requests", sql="sum(bid_request_cnt)"),
        Metric(name="bid_floor_requests", sql="sum(has_bid_floor_cnt)"),
        Metric(name="avg_bid_floor", sql="avg(bid_floor)"),
    ],
)

# Create semantic layer
adapter = DuckDBAdapter(":memory:")

# Copy data to new connection with id column
adapter.conn.execute("""
    CREATE TABLE auctions_with_id AS
    SELECT row_number() OVER () as id, *
    FROM 'https://sampledata.sidequery.dev/sidemantic-demo/auction_data.parquet'
""")

layer = SemanticLayer(connection=adapter)
layer.add_model(auction_model)

# Simple! Just pass the layer - time dimension and metrics auto-detected
widget3 = MetricsExplorer(layer)
widget3

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<sidemantic.widget._widget.MetricsExplorer object at 0x1154b1e50>

In [12]:
from sidemantic import PreAggregation

# Define model with pre-aggregations for faster widget queries
# Same dimensions as Mode 3 - preaggregations cover the most common queries
auction_model_preagg = Model(
    name="auctions",
    table="auctions_with_id",
    primary_key="id",
    default_time_dimension="__time",
    dimensions=[
        Dimension(name="__time", type="time", granularity="day"),
        Dimension(name="device_type", type="categorical"),
        Dimension(name="device_os", type="categorical"),
        Dimension(name="app_or_site", type="categorical"),
        Dimension(name="ad_position", type="categorical"),
        Dimension(name="placement_type", type="categorical"),
        Dimension(name="auction_type", type="categorical"),
        Dimension(name="bid_floor_bucket", type="categorical"),
        Dimension(name="platform_browser", type="categorical"),
        Dimension(name="video_max_duration_bucket", type="categorical"),
        Dimension(name="video_min_duration_bucket", type="categorical"),
        Dimension(name="ad_size", type="categorical"),
        Dimension(name="device_region", type="categorical"),
        Dimension(name="device_osv", type="categorical"),
        Dimension(name="app_site_cat", type="categorical"),
    ],
    metrics=[
        Metric(name="bid_requests", sql="sum(bid_request_cnt)"),
        Metric(name="bid_floor_requests", sql="sum(has_bid_floor_cnt)"),
        Metric(name="avg_bid_floor", sql="avg(bid_floor)"),
    ],
    pre_aggregations=[
        # Daily rollup for time-series (sparklines)
        PreAggregation(
            name="daily_metrics",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            time_dimension="__time",
            granularity="day",
        ),
        # Per-dimension rollups for leaderboards
        PreAggregation(
            name="by_device_type",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["device_type"],
        ),
        PreAggregation(
            name="by_device_os",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["device_os"],
        ),
        PreAggregation(
            name="by_app_or_site",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["app_or_site"],
        ),
        PreAggregation(
            name="by_ad_position",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["ad_position"],
        ),
        PreAggregation(
            name="by_placement_type",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["placement_type"],
        ),
        PreAggregation(
            name="by_auction_type",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["auction_type"],
        ),
        PreAggregation(
            name="by_bid_floor_bucket",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["bid_floor_bucket"],
        ),
        PreAggregation(
            name="by_platform_browser",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["platform_browser"],
        ),
        PreAggregation(
            name="by_video_max_duration_bucket",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["video_max_duration_bucket"],
        ),
        PreAggregation(
            name="by_video_min_duration_bucket",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["video_min_duration_bucket"],
        ),
        PreAggregation(
            name="by_ad_size",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["ad_size"],
        ),
        PreAggregation(
            name="by_device_region",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["device_region"],
        ),
        PreAggregation(
            name="by_device_osv",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["device_osv"],
        ),
        PreAggregation(
            name="by_app_site_cat",
            measures=["bid_requests", "bid_floor_requests", "avg_bid_floor"],
            dimensions=["app_site_cat"],
        ),
    ],
)

# Create layer with preaggregations enabled
adapter_preagg = DuckDBAdapter(":memory:")
adapter_preagg.conn.execute("""
    CREATE TABLE auctions_with_id AS
    SELECT row_number() OVER () as id, *
    FROM 'https://sampledata.sidequery.dev/sidemantic-demo/auction_data.parquet'
""")

layer_preagg = SemanticLayer(connection=adapter_preagg, use_preaggregations=True)
layer_preagg.add_model(auction_model_preagg)

# Materialize the pre-aggregations (creates actual tables)
for preagg in auction_model_preagg.pre_aggregations:
    table_name = preagg.get_table_name("auctions")
    source_sql = preagg.generate_materialization_sql(auction_model_preagg)
    adapter_preagg.conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS {source_sql}")
    print(f"Created pre-aggregation: {table_name}")

# Widget now uses pre-aggregated tables for compatible queries
widget4 = MetricsExplorer(layer_preagg)
widget4

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Created pre-aggregation: auctions_preagg_daily_metrics
Created pre-aggregation: auctions_preagg_by_device_type
Created pre-aggregation: auctions_preagg_by_device_os
Created pre-aggregation: auctions_preagg_by_app_or_site
Created pre-aggregation: auctions_preagg_by_ad_position
Created pre-aggregation: auctions_preagg_by_placement_type
Created pre-aggregation: auctions_preagg_by_auction_type
Created pre-aggregation: auctions_preagg_by_bid_floor_bucket
Created pre-aggregation: auctions_preagg_by_platform_browser
Created pre-aggregation: auctions_preagg_by_video_max_duration_bucket
Created pre-aggregation: auctions_preagg_by_video_min_duration_bucket
Created pre-aggregation: auctions_preagg_by_ad_size
Created pre-aggregation: auctions_preagg_by_device_region
Created pre-aggregation: auctions_preagg_by_device_osv
Created pre-aggregation: auctions_preagg_by_app_site_cat


<sidemantic.widget._widget.MetricsExplorer object at 0x113bba180>

In [13]:
# Load Foursquare Places dataset (106M rows)
# Join with categories to get human-readable category names
fsq_conn = duckdb.connect(":memory:")
fsq_conn.execute("""
    CREATE TABLE places AS
    SELECT
        p.country,
        p.region,
        p.locality,
        p.admin_region,
        c.level1_category_name as category_l1,
        c.level2_category_name as category_l2,
        c.level3_category_name as category_l3,
        CAST(p.date_created AS DATE) AS date_created,
        p.latitude,
        p.longitude
    FROM 'https://sampledata.sidequery.dev/sidemantic-demo/places.parquet' p
    LEFT JOIN 'https://sampledata.sidequery.dev/sidemantic-demo/categories.parquet' c
        ON c.category_id = p.fsq_category_ids[1]
""")

# Use cardinality threshold to skip high-cardinality dimensions like locality
widget5 = MetricsExplorer(
    fsq_conn.table("places"),
    max_dimension_cardinality=2500,  # Skip dimensions with >300 unique values
    time_dimension="date_created",
)
widget5

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<sidemantic.widget._widget.MetricsExplorer object at 0x113bbba40>

In [10]:
from sidemantic import Dimension, Metric, Model, PreAggregation, SemanticLayer
from sidemantic.db.duckdb import DuckDBAdapter

places_model_preagg = Model(
    name="places",
    table="places",
    primary_key="rowid",
    default_time_dimension="date_created",
    dimensions=[
        Dimension(name="date_created", type="time", granularity="day"),
        Dimension(name="country", type="categorical"),
        Dimension(name="region", type="categorical"),
        Dimension(name="admin_region", type="categorical"),
        Dimension(name="category_l1", type="categorical"),
        Dimension(name="category_l2", type="categorical"),
        Dimension(name="category_l3", type="categorical"),
    ],
    metrics=[
        Metric(name="row_count", agg="count"),
        Metric(name="sum_latitude", sql="sum(latitude)"),
        Metric(name="sum_longitude", sql="sum(longitude)"),
    ],
    pre_aggregations=[
        PreAggregation(
            name="daily_metrics",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            time_dimension="date_created",
            granularity="day",
        ),
        PreAggregation(
            name="by_country",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["country"],
        ),
        PreAggregation(
            name="by_region",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["region"],
        ),
        PreAggregation(
            name="by_admin_region",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["admin_region"],
        ),
        PreAggregation(
            name="by_category_l1",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["category_l1"],
        ),
        PreAggregation(
            name="by_category_l2",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["category_l2"],
        ),
        PreAggregation(
            name="by_category_l3",
            measures=["row_count", "sum_latitude", "sum_longitude"],
            dimensions=["category_l3"],
        ),
    ],
)

adapter_fsq_preagg = DuckDBAdapter(":memory:")
adapter_fsq_preagg.conn = fsq_conn

layer_fsq_preagg = SemanticLayer(connection=adapter_fsq_preagg, use_preaggregations=True)
layer_fsq_preagg.add_model(places_model_preagg)

for preagg in places_model_preagg.pre_aggregations:
    table_name = preagg.get_table_name("places")
    source_sql = preagg.generate_materialization_sql(places_model_preagg)
    adapter_fsq_preagg.conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS {source_sql}")
    print(f"Created pre-aggregation: {table_name}")

widget6 = MetricsExplorer(layer_fsq_preagg)
widget6

Created pre-aggregation: places_preagg_daily_metrics
Created pre-aggregation: places_preagg_by_country
Created pre-aggregation: places_preagg_by_region
Created pre-aggregation: places_preagg_by_admin_region
Created pre-aggregation: places_preagg_by_category_l1
Created pre-aggregation: places_preagg_by_category_l2
Created pre-aggregation: places_preagg_by_category_l3


<sidemantic.widget._widget.MetricsExplorer object at 0x113bba630>

In [11]:
# Auto-preaggregations (optional)
# Learns from widget usage and materializes rollups automatically
widget6_auto = MetricsExplorer(layer_fsq_preagg, auto_preaggregations=True)
widget6_auto

<sidemantic.widget._widget.MetricsExplorer object at 0x113bbe540>

In [None]:
# After interacting with the widget, check the filters
print("Current filters:", widget.filters)
print("Selected metric:", widget.selected_metric)
print("Date range:", widget.date_range)