# LanceQL - Lance Format Viewer

- **Virtual Scrolling** - Lazy load large datasets
- **Vector Sparklines** - Mini charts for embeddings
- **Model Detection** - MiniLM (384d), CLIP (512d), BERT (768d)
- **Image Preview** - Hover URLs for thumbnails

In [None]:
import metal0.lanceql as lanceql
import metal0.lanceql.display
import lance
import pyarrow as pa
import numpy as np
import tempfile
import os

---
## 1. Lance Dataset with Embeddings

MiniLM-L6 badge appears for 384d vectors.

In [None]:
n_rows = 1000
dim = 384  # MiniLM dimension

embeddings = np.random.randn(n_rows, dim).astype(np.float32)
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

table = pa.table({
    "id": pa.array(range(n_rows)),
    "text": pa.array([f"Document {i}" for i in range(n_rows)]),
    "embedding": pa.FixedSizeListArray.from_arrays(
        pa.array(embeddings.flatten(), type=pa.float32()), dim
    ),
    "score": pa.array(np.random.uniform(0.5, 1.0, n_rows).round(3)),
})

path = os.path.join(tempfile.gettempdir(), "embeddings.lance")
lance.write_dataset(table, path, mode="overwrite")

# Display with virtual scrolling + sparklines
lance.dataset(path).to_table()

---
## 2. Virtual Scroll - 2M Images

Scroll through 2 million rows. Hover URLs for image preview.

In [None]:
# 2 million images - virtual scroll demo
n = 2_000_000

table = pa.table({
    "id": pa.array(range(n)),
    "image_url": pa.array([f"https://picsum.photos/seed/{i % 1000}/200/150" for i in range(n)]),
    "caption": pa.array([f"Photo {i}" for i in range(n)]),
    "width": pa.array([200] * n),
    "height": pa.array([150] * n),
    "score": pa.array(np.random.uniform(0.5, 1.0, n).round(3).astype(np.float32)),
})

path = os.path.join(tempfile.gettempdir(), "images.lance")
lance.write_dataset(table, path, mode="overwrite")
print(f"Created {n:,} rows")

lance.dataset(path).to_table()

---
## 3. Multiple Embedding Models

Different dimensions show different model badges.

In [None]:
def make_embedding(n, dim):
    v = np.random.randn(n, dim).astype(np.float32)
    return v / np.linalg.norm(v, axis=1, keepdims=True)

n = 50
table = pa.table({
    "id": pa.array(range(n)),
    "text": pa.array([f"Item {i}" for i in range(n)]),
    # 384d = MiniLM-L6
    "minilm": pa.FixedSizeListArray.from_arrays(
        pa.array(make_embedding(n, 384).flatten(), type=pa.float32()), 384
    ),
    # 512d = CLIP
    "clip": pa.FixedSizeListArray.from_arrays(
        pa.array(make_embedding(n, 512).flatten(), type=pa.float32()), 512
    ),
    # 768d = BERT
    "bert": pa.FixedSizeListArray.from_arrays(
        pa.array(make_embedding(n, 768).flatten(), type=pa.float32()), 768
    ),
})

path = os.path.join(tempfile.gettempdir(), "multi_embedding.lance")
lance.write_dataset(table, path, mode="overwrite")

lance.dataset(path).to_table()

---
## 4. Query Engine

Filter, vector search - results display as tables.

In [None]:
# Create product dataset
n = 500
embeddings = np.random.randn(n, 384).astype(np.float32)
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

categories = ["electronics", "clothing", "home", "sports"]
products = ["headphones", "shoes", "lamp", "racket", "jacket", "speaker"]

table = pa.table({
    "id": pa.array(range(n)),
    "product": pa.array([products[i % len(products)] for i in range(n)]),
    "category": pa.array([categories[i % len(categories)] for i in range(n)]),
    "price": pa.array(np.random.uniform(10, 500, n).round(2)),
    "rating": pa.array(np.random.uniform(3.0, 5.0, n).round(1)),
    "embedding": pa.FixedSizeListArray.from_arrays(
        pa.array(embeddings.flatten(), type=pa.float32()), 384
    ),
})

path = os.path.join(tempfile.gettempdir(), "products.lance")
lance.write_dataset(table, path, mode="overwrite")

In [None]:
# Connect and query
db = lanceql.connect(path)

# Filter + Select -> display as table
db.table().filter("category = 'electronics'").filter("rating >= 4.5").select("product", "price", "rating").limit(5).to_arrow()

In [None]:
# Filter by price
db.table().filter("price < 100").select("product", "price", "rating").order_by("price").limit(5).to_arrow()

In [None]:
# Vector search -> display as table
query = embeddings[0]
db.table().similar("embedding", query, k=5).select("product", "category", "price").to_arrow()