Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/snippets/tables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export const PyCreateEmptyTable = "import pyarrow as pa\n\nschema = pa.schema(\n

export const PyCreateEmptyTablePydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Item(LanceModel):\n vector: Vector(2)\n item: str\n price: float\n\ndb = tmp_db\ntbl = db.create_table(\n \"test_empty_table_new\", schema=Item.to_arrow_schema(), mode=\"overwrite\"\n)\n";

export const PyCreateTableConflictHandling = "# Idempotent open: reuse the existing table if it exists.\n# The provided data is ignored; the schema is validated against the\n# existing table and a mismatch raises an error.\ntbl = db.create_table(\"conflict_table\", data, exist_ok=True)\n\n# Overwrite: drop the existing table and create a new one with the\n# provided data. This permanently discards the old table's data.\ntbl = db.create_table(\"conflict_table\", data, mode=\"overwrite\")\n";

export const PyCreateTableCustomSchema = "import pyarrow as pa\n\ncustom_schema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\"lat\", pa.float32()),\n pa.field(\"long\", pa.float32()),\n ]\n)\n\ndata = [\n {\"vector\": [1.1, 1.2, 1.3, 1.4], \"lat\": 45.5, \"long\": -122.7},\n {\"vector\": [0.2, 1.8, 0.4, 3.6], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ntbl = db.create_table(\n \"my_table_custom_schema\", data, schema=custom_schema, mode=\"overwrite\"\n)\n";

export const PyCreateTableFromArrow = "import numpy as np\nimport pyarrow as pa\n\ndim = 16\ntotal = 2\nschema = pa.schema(\n [pa.field(\"vector\", pa.list_(pa.float16(), dim)), pa.field(\"text\", pa.string())]\n)\ndata = pa.Table.from_arrays(\n [\n pa.array(\n [np.random.randn(dim).astype(np.float16) for _ in range(total)],\n pa.list_(pa.float16(), dim),\n ),\n pa.array([\"foo\", \"bar\"]),\n ],\n [\"vector\", \"text\"],\n)\ndb = tmp_db\ntbl = db.create_table(\"f16_tbl\", data, schema=schema, mode=\"overwrite\")\n";
Expand Down Expand Up @@ -116,6 +118,8 @@ export const PyVersioningListAllVersions = "versions = table.list_versions()\nfo

export const PyVersioningRollback = "table.restore(version_after_mod)\nversions = table.list_versions()\nversion_count_after_rollback = len(versions)\nprint(f\"Total number of versions after rollback: {version_count_after_rollback}\")\n";

export const PyVersioningTags = "# Create a tag pointing at a specific version\ntable.tags.create(\"baseline\", 1)\ntable.tags.create(\"with-edits\", table.version)\n\n# List all tags on this table\nprint(table.tags.list())\n\n# Look up the version a tag points at\nprint(table.tags.get_version(\"baseline\"))\n\n# Move an existing tag to a different version\ntable.tags.update(\"baseline\", 2)\n\n# Check out a version by tag name\ntable.checkout(\"baseline\")\nprint(table.version)\n\n# Delete a tag (does not delete the underlying version)\ntable.tags.delete(\"with-edits\")\n\n# Return to the latest version\ntable.checkout_latest()\n";

export const PyVersioningUpdateData = "table.update(where=\"author='Richard'\", values={\"author\": \"Richard Daniel Sanchez\"})\nrows_after_update = table.count_rows(\"author = 'Richard Daniel Sanchez'\")\nprint(f\"Rows updated to Richard Daniel Sanchez: {rows_after_update}\")\n";

export const TsAddColumnsCalculated = "// Add a discounted price column (10% discount)\nawait schemaAddTable.addColumns([\n {\n name: \"discounted_price\",\n valueSql: \"cast((price * 0.9) as float)\",\n },\n]);\n";
Expand Down Expand Up @@ -144,6 +148,8 @@ export const TsConsistencyStrong = "const strongWriterDb = await lancedb.connect

export const TsCreateEmptyTable = "const emptySchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 2,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"item\", new arrow.Utf8()),\n new arrow.Field(\"price\", new arrow.Float32()),\n]);\nconst emptyTable = await db.createEmptyTable(\n \"test_empty_table\",\n emptySchema,\n {\n mode: \"overwrite\",\n },\n);\n";

export const TsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet conflictTable = await db.createTable(\"conflict_table\", data, {\n existOk: true,\n});\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nconflictTable = await db.createTable(\"conflict_table\", data, {\n mode: \"overwrite\",\n});\n";

export const TsCreateTableCustomSchema = "const customSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 4,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"lat\", new arrow.Float32()),\n new arrow.Field(\"long\", new arrow.Float32()),\n]);\n\nconst customSchemaData = lancedb.makeArrowTable(\n [\n { vector: [1.1, 1.2, 1.3, 1.4], lat: 45.5, long: -122.7 },\n { vector: [0.2, 1.8, 0.4, 3.6], lat: 40.1, long: -74.1 },\n ],\n { schema: customSchema },\n);\nconst customSchemaTable = await db.createTable(\n \"my_table_custom_schema\",\n customSchemaData,\n { mode: \"overwrite\" },\n);\n";

export const TsCreateTableFromArrow = "const arrowSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 16,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"text\", new arrow.Utf8()),\n]);\nconst arrowData = lancedb.makeArrowTable(\n [\n { vector: Array(16).fill(0.1), text: \"foo\" },\n { vector: Array(16).fill(0.2), text: \"bar\" },\n ],\n { schema: arrowSchema },\n);\nconst arrowTable = await db.createTable(\"f32_tbl\", arrowData, {\n mode: \"overwrite\",\n});\n";
Expand Down Expand Up @@ -206,6 +212,8 @@ export const TsVersioningListAllVersions = "const allVersions = await table.list

export const TsVersioningRollback = "await table.checkout(versionAfterMod);\nawait table.restore();\nconst versionsAfterRollback = await table.listVersions();\nconst versionCountAfterRollback = versionsAfterRollback.length;\nconsole.log(\n `Total number of versions after rollback: ${versionCountAfterRollback}`,\n);\n";

export const TsVersioningTags = "const tags = await tagsTable.tags();\n\n// Create a tag pointing at a specific version\nawait tags.create(\"baseline\", 1);\nawait tags.create(\"with-edits\", await tagsTable.version());\n\n// List all tags on this table\nconsole.log(await tags.list());\n\n// Look up the version a tag points at\nconsole.log(await tags.getVersion(\"baseline\"));\n\n// Move an existing tag to a different version\nawait tags.update(\"baseline\", 2);\n\n// Check out a version by tag name\nawait tagsTable.checkout(\"baseline\");\nconsole.log(await tagsTable.version());\n\n// Delete a tag (does not delete the underlying version)\nawait tags.delete(\"with-edits\");\n\n// Return to the latest version\nawait tagsTable.checkoutLatest();\n";

export const TsVersioningUpdateData = "await table.update({\n where: \"author = 'Richard'\",\n values: { author: \"Richard Daniel Sanchez\" },\n});\nconst rowsAfterUpdate = await table.countRows(\n \"author = 'Richard Daniel Sanchez'\",\n);\nconsole.log(`Rows updated to Richard Daniel Sanchez: ${rowsAfterUpdate}`);\n";

export const RsAddColumnsCalculated = "// Add a discounted price column (10% discount)\nschema_add_table\n .add_columns(\n NewColumnTransform::SqlExpressions(vec![(\n \"discounted_price\".to_string(),\n \"cast((price * 0.9) as float)\".to_string(),\n )]),\n None,\n )\n .await\n .unwrap();\n";
Expand Down Expand Up @@ -234,6 +242,8 @@ export const RsConsistencyStrong = "let strong_writer_db = connect(&db_uri).exec

export const RsCreateEmptyTable = "let empty_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"item\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float32, false),\n]));\nlet empty_table = db\n .create_empty_table(\"test_empty_table\", empty_schema)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";

export const RsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet _conflict_table = db\n .create_table(\"conflict_table\", exist_ok_reader)\n .mode(CreateTableMode::exist_ok(|req| req))\n .execute()\n .await\n .unwrap();\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nlet conflict_table = db\n .create_table(\"conflict_table\", overwrite_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";

export const RsCreateTableCustomSchema = "let custom_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet custom_batch = RecordBatch::try_new(\n custom_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![\n Some(vec![Some(1.1), Some(1.2), Some(1.3), Some(1.4)]),\n Some(vec![Some(0.2), Some(1.8), Some(0.4), Some(3.6)]),\n ],\n 4,\n ),\n ),\n Arc::new(Float32Array::from(vec![45.5, 40.1])),\n Arc::new(Float32Array::from(vec![-122.7, -74.1])),\n ],\n)\n.unwrap();\nlet custom_reader =\n RecordBatchIterator::new(vec![Ok(custom_batch)].into_iter(), custom_schema.clone());\nlet custom_table = db\n .create_table(\"my_table_custom_schema\", custom_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";

export const RsCreateTableFromArrow = "let arrow_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 16),\n false,\n ),\n Field::new(\"text\", DataType::Utf8, false),\n]));\n\nlet arrow_batch = RecordBatch::try_new(\n arrow_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![Some(vec![Some(0.1); 16]), Some(vec![Some(0.2); 16])],\n 16,\n ),\n ),\n Arc::new(StringArray::from(vec![\"foo\", \"bar\"])),\n ],\n)\n.unwrap();\nlet arrow_reader =\n RecordBatchIterator::new(vec![Ok(arrow_batch)].into_iter(), arrow_schema.clone());\nlet arrow_table = db\n .create_table(\"arrow_table_example\", arrow_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";
Expand Down Expand Up @@ -300,5 +310,7 @@ export const RsVersioningMakeQuotesReader = "fn make_quotes_reader(rows: Vec<(i6

export const RsVersioningRollback = "table.checkout(version_after_mod).await.unwrap();\ntable.restore().await.unwrap();\nlet versions_after_rollback = table.list_versions().await.unwrap();\nlet version_count_after_rollback = versions_after_rollback.len();\nprintln!(\n \"Total number of versions after rollback: {}\",\n version_count_after_rollback\n);\n";

export const RsVersioningTags = "let mut tags = tags_table.tags().await.unwrap();\n\n// Create a tag pointing at a specific version\ntags.create(\"baseline\", 1).await.unwrap();\nlet current_version = tags_table.version().await.unwrap();\ntags.create(\"with-edits\", current_version).await.unwrap();\n\n// List all tags on this table\nlet all_tags = tags.list().await.unwrap();\nprintln!(\"Tags: {:?}\", all_tags);\n\n// Look up the version a tag points at\nlet baseline_version = tags.get_version(\"baseline\").await.unwrap();\nprintln!(\"baseline -> v{}\", baseline_version);\n\n// Move an existing tag to a different version\ntags.update(\"baseline\", 2).await.unwrap();\n\n// Check out a version by tag name (separate method in Rust)\ntags_table.checkout_tag(\"baseline\").await.unwrap();\nprintln!(\"Current version: {}\", tags_table.version().await.unwrap());\n\n// Delete a tag (does not delete the underlying version)\ntags.delete(\"with-edits\").await.unwrap();\n\n// Return to the latest version\ntags_table.checkout_latest().await.unwrap();\n";

export const RsVersioningUpdateData = "table\n .update()\n .only_if(\"author = 'Richard'\")\n .column(\"author\", \"'Richard Daniel Sanchez'\")\n .execute()\n .await\n .unwrap();\nlet rows_after_update = table\n .count_rows(Some(\"author = 'Richard Daniel Sanchez'\".to_string()))\n .await\n .unwrap();\nprintln!(\n \"Rows updated to Richard Daniel Sanchez: {}\",\n rows_after_update\n);\n";

36 changes: 36 additions & 0 deletions docs/tables/create.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import {
PyCreateTableFromDicts as CreateTableFromDicts,
TsCreateTableFromDicts as TsCreateTableFromDicts,
RsCreateTableFromDicts as RsCreateTableFromDicts,
PyCreateTableConflictHandling as CreateTableConflictHandling,
TsCreateTableConflictHandling as TsCreateTableConflictHandling,
RsCreateTableConflictHandling as RsCreateTableConflictHandling,
PyCreateTableFromPandas as CreateTableFromPandas,
PyCreateTableCustomSchema as CreateTableCustomSchema,
TsCreateTableCustomSchema as TsCreateTableCustomSchema,
Expand Down Expand Up @@ -82,6 +85,39 @@ support lists/arrays of dictionaries, while the Rust SDK supports lists of struc
</CodeBlock>
</CodeGroup>

### Handle existing tables

By default, `create_table` raises an error if a table with the same name already exists.
You can change this behavior with two parameters that resolve the conflict in different ways:

- **Idempotent open**: return the existing table without modifying it. Use when your
code may run more than once (notebooks, retries, init scripts) and you want to reuse
the table on subsequent runs. The provided data is ignored, but the schema is
validated against the existing table and a mismatch raises an error.
- **Overwrite**: drop the existing table and create a new one with the provided data.
Use this for test fixtures or when you intentionally want to replace prior contents.
This permanently discards the old table's data.

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{CreateTableConflictHandling}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsCreateTableConflictHandling}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsCreateTableConflictHandling}
</CodeBlock>
</CodeGroup>

<Note>
`exist_ok` / `existOk` does not append the provided data to an existing table. Use
[`table.add()`](/tables/update) for that. If you need to ensure a table exists *and*
contains specific rows, prefer the [empty-table-then-add pattern](#create-empty-table).
</Note>

### From a custom schema

You can define a custom Arrow schema for the table. This is useful when you want to have more control over the column types and metadata.
Expand Down
38 changes: 38 additions & 0 deletions docs/tables/versioning.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import {
PyVersioningDeleteData as VersioningDeleteData,
TsVersioningDeleteData as TsVersioningDeleteData,
RsVersioningDeleteData as RsVersioningDeleteData,
PyVersioningTags as VersioningTags,
TsVersioningTags as TsVersioningTags,
RsVersioningTags as RsVersioningTags,
RsVersioningMakeQuotesReader as RsVersioningMakeQuotesReader,
} from '/snippets/tables.mdx';

Expand Down Expand Up @@ -184,6 +187,37 @@ Now let's restore a captured version snapshot:
</CodeBlock>
</CodeGroup>

## Tag-Based Versioning

Numeric table versions like `v3` or `v17` are precise but hard to remember. Tags
let you attach human-readable labels (e.g., `"prod"`, `"baseline"`,
`"q3-evaluation"`) to specific versions and check those out by name. They are
conceptually similar to git tags, and unlike numeric versions, **tagged versions
are preserved when old versions are pruned** (see the cleanup note at the bottom
of this page).

The tags API supports the standard CRUD operations — create, list, update, delete —
plus checking out by tag name.

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{VersioningTags}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsVersioningTags}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsVersioningTags}
</CodeBlock>
</CodeGroup>

<Note>
Deleting a tag only removes the label, not the version it points to. After
deletion, the underlying table version becomes eligible for cleanup again.
</Note>

## Delete Data From the Table

Let's demonstrate how deletions also create new versions:
Expand Down Expand Up @@ -242,4 +276,8 @@ Read-only and checkout operations shown here (`list_versions`/`listVersions`, `v
System operations like `optimize()`, index updates, and table compaction also increment table version numbers.
In LanceDB OSS and Enterprise, `optimize()` can prune older versions based on its retention setting (`cleanup_older_than`, 7 days by default),
which is when old-version files are removed and disk space is reclaimed.

**Tagged versions are exempt from cleanup.** A version with a tag pointing at it is
retained regardless of age, and its files are not removed by `optimize()`. To make
a tagged version eligible for pruning, [delete the tag](#tag-based-versioning) first.
</Note>
Loading
Loading