In [4]:
import duckdb
import vegafusion as vf
import pandas as pd
import altair as alt

# Create DuckDB connection
conn = duckdb.connect()

# Pass DuckDB connection to VegaFusion's set_connection method
vf.runtime.set_connection(conn)

# Enable Mime Renderer
vf.enable()

# Read parquet file using the DuckDB connection
relation = conn.read_parquet(
    "https://vegafusion-datasets.s3.amazonaws.com/vega/movies_201k.parquet"
)

# Filter NULL values and register the result as a table named "movies"
relation.query("tbl", """
    SELECT * FROM tbl 
    WHERE Rotten_Tomatoes_Rating IS NOT NULL AND Imdb_Rating IS NOT NULL
""").to_table("movies")

# Create an Altair chart that references the registered DuckDB table
chart = alt.Chart("table://movies").mark_rect().encode(
    alt.X('IMDB_Rating:Q', bin=alt.Bin(maxbins=60)),
    alt.Y('Rotten_Tomatoes_Rating:Q', bin=alt.Bin(maxbins=40)),
    alt.Color('count():Q', scale=alt.Scale(scheme='greenblue'))
)
chart