# Diorama Demo

Interactive 2D/3D embedding visualization for JSON documents.

In [1]:
import numpy as np

import diorama


## Load data

In [8]:
npz = np.load("../electric_vehicles.npz", allow_pickle=True)
documents = list(npz["data"])[:100000]
embeddings = npz["embs"][:100000]

print(f"{len(documents)} documents, {embeddings.shape[1]}-dim embeddings")
documents[0]

100000 documents, 64-dim embeddings


{'Record Type': 'VEH',
 'Registration Class': 'PAS',
 'City': 'POINT LOOKOUT',
 'State': 'NY',
 'Zip': '11569',
 'County': 'NASSAU',
 'Model Year': 2026,
 'Make': 'HYUND',
 'Body Type': 'SUBN',
 'Unladen Weight': 6008,
 'Maximum Gross Weight': None,
 'Passengers': None,
 'Color': 'BK',
 'Scofflaw Indicator': 'N',
 'Suspension Indicator': 'N',
 'Revocation Indicator': 'N'}

## Reduce once, explore fast

UMAP is the expensive step. Run it once and reuse the result.

In [None]:
reduced_2d = diorama.reduce(embeddings, n_components=2, method="umap", subsample=10_000, show_progress=True)
reduced_3d = diorama.reduce(embeddings, n_components=3, method="umap", subsample=10_000, show_progress=True)

print(f"2D: {reduced_2d.shape}, 3D: {reduced_3d.shape}")

UMAP(angular_rp_forest=True, metric='cosine', verbose=True)
Thu Feb 12 20:24:05 2026 Construct fuzzy simplicial set
Thu Feb 12 20:24:05 2026 Finding Nearest Neighbors
Thu Feb 12 20:24:05 2026 Building RP forest with 21 trees
Thu Feb 12 20:24:05 2026 NN descent for 17 iterations
	 1  /  17
	 2  /  17
	 3  /  17
	Stopping threshold met -- exiting after 3 iterations
Thu Feb 12 20:24:06 2026 Finished Nearest Neighbor Search
Thu Feb 12 20:24:06 2026 Construct embedding


KeyboardInterrupt: 

## Color by a single field

In [None]:
diorama.show(reduced_2d, documents, color_by="Make", height=1000)

## 3D view

In [None]:
diorama.show(reduced_3d, documents, color_by="Make", height=800)

## Dropdown with multiple coloring fields

In [None]:
diorama.show(reduced_2d, documents, color_by=["Make", "Body Type", "Color", "County", "Model Year"], height=800)

## Continuous coloring

`Unladen Weight` is numeric with many unique values — auto-detected as continuous.

In [None]:
diorama.show(reduced_2d, documents, color_by="Unladen Weight", height=800)

## Filtering with MongoDB syntax

Only show Tesla vehicles from 2024+.

In [None]:
diorama.show(
    reduced_2d, documents, color_by="Body Type", filter={"Make": "TESLA", "Model Year": {"$gte": 2024}}, height=800
)

## Compare different filters

Points stay in stable positions across filters since we pre-computed the reduction.

In [None]:
diorama.show(
    reduced_2d,
    documents,
    color_by="Make",
    filter={"County": {"$in": ["NASSAU", "SUFFOLK", "QUEENS", "KINGS"]}},
)

## Auto-discover fields

Pass `color_by=None` (default) to auto-discover the top fields by coverage.

In [None]:
diorama.show(reduced_2d, documents)

## Dark mode

In [None]:
diorama.show(
    reduced_2d,
    documents,
    color_by="Make",
    color_scheme="dark",
)

## Dash app

Launch an interactive web UI with filter input, field dropdown, and 2D/3D toggle.
Runs at http://127.0.0.1:8050 — stop the cell to shut it down.

In [7]:
diorama.app(embeddings, documents)

TypeError: 'module' object is not callable