lancedb · erik-wang-lancedb · Dec 10, 2025 · Dec 7, 2025 · Dec 7, 2025 · Dec 7, 2025
diff --git a/docs/.cursor/rules/.cursorrules b/docs/.cursor/rules/.cursorrules
@@ -0,0 +1,9 @@
+## MDX frontmatter
+- Every page must include exactly these Mintlify frontmatter fields: `title`, `sidebarTitle`, and `description` (no extras unless specifically required).
+  - Labels like `weight` and `aliases` should not appear, these are from older docs
+
+## Code examples
+- When using code groups, wrap them in `<CodeGroup>` with nested `<CodeBlock>` elements.
+- Use `<CodeBlock filename="Python" language="python" icon="python">` for Python examples.
+- Use `<CodeBlock filename="TypeScript" language="typescript" icon="square-js">` for TypeScript examples.
+- Use the same pattern for Rust examples with appropriate filename/language/icon values.
diff --git a/docs/docs.json b/docs/docs.json
@@ -92,6 +92,7 @@
                   "user-guides/indexing/fts-index",
                   "user-guides/indexing/scalar-index",
                   "user-guides/indexing/gpu-indexing",
+                  "user-guides/indexing/quantization",
                   "user-guides/indexing/reindexing"
                 ]
               },

diff --git a/docs/snippets/indexing.mdx b/docs/snippets/indexing.mdx
@@ -1,5 +1,7 @@
 {/* Auto-generated by scripts/mdx_snippets_gen.py. Do not edit manually. */}
 
+export const PyFtsIndexAsync = "import asyncio\nimport lancedb\nimport polars as pl\nfrom lancedb.index import FTS\n\ndata = pl.DataFrame(\n    {\n        \"id\": [1, 2],\n        \"text\": [\n            \"His first language is spanish\",\n            \"Her first language is english\",\n        ],\n    }\n)\n\nasync def main(data: pl.DataFrame):\n    uri = \"ex_lancedb\"\n    db = await lancedb.connect_async(uri)\n    tbl = await db.create_table(\"my_text\", data=data, mode=\"overwrite\")\n\n    await tbl.create_index(\"text\", config=FTS(language=\"English\"))\n\n    response = await tbl.search(\"spanish\", query_type=\"fts\")\n    result = await response.limit(1).to_polars()\n    print(result)\n\nif __name__ == \"__main__\":\n    asyncio.run(main(data))\n";
+
 export const PyFtsIndexCreate = "import lancedb\n\ndb = tmp_db\ntable_name = \"fts_index_create\"\ntable = db.open_table(table_name)\ntable.create_fts_index(\"text\")\n";
 
 export const PyFtsIndexWait = "index_name = \"text_idx\"\ntable.wait_for_index([index_name])\n";

diff --git a/docs/snippets/quickstart.mdx b/docs/snippets/quickstart.mdx
@@ -14,6 +14,8 @@ export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to
 
 export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n";
 
+export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n";
+
 export const TsQuickstartAddData = "const moreData = [\n  { id: \"7\", text: \"mage\", vector: [0.6, 0.3, 0.4] },\n  { id: \"8\", text: \"bard\", vector: [0.3, 0.8, 0.4] },\n];\n\n// Add data to table\nawait table.add(moreData);\n";
 
 export const TsQuickstartCreateTable = "const data = [\n  { id: \"1\", text: \"knight\", vector: [0.9, 0.4, 0.8] },\n  { id: \"2\", text: \"ranger\", vector: [0.8, 0.4, 0.7] },\n  { id: \"9\", text: \"priest\", vector: [0.6, 0.2, 0.6] },\n  { id: \"4\", text: \"rogue\", vector: [0.7, 0.4, 0.7] },\n];\nlet table = await db.createTable(\"adventurers\", data, { mode: \"overwrite\" });\n";
@@ -24,8 +26,6 @@ export const TsQuickstartOpenTable = "table = await db.openTable(\"adventurers\"
 
 export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\n";
 
-export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n";
-
 export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\n";
 
 export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.log(results);\n";

diff --git a/docs/snippets/search.mdx b/docs/snippets/search.mdx
@@ -8,10 +8,10 @@ export const PyBasicHybridSearch = "data = [\n    {\"text\": \"rebel spaceships
 
 export const PyBasicHybridSearchAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\ndata = [\n    {\"text\": \"rebel spaceships striking from a hidden base\"},\n    {\"text\": \"have won their first victory against the evil Galactic Empire\"},\n    {\"text\": \"during the battle rebel spies managed to steal secret plans\"},\n    {\"text\": \"to the Empire's ultimate weapon the Death Star\"},\n]\nasync_tbl = await async_db.create_table(\"documents_async\", schema=Documents)\n# ingest docs with auto-vectorization\nawait async_tbl.add(data)\n# Create a fts index before the hybrid search\nawait async_tbl.create_index(\"text\", config=FTS())\ntext_query = \"flower moon\"\n# hybrid search with default re-ranker\nawait (await async_tbl.search(\"flower moon\", query_type=\"hybrid\")).to_pandas()\n";
 
-export const PyClassDocuments = "class Documents(LanceModel):\n    vector: Vector(embeddings.ndims()) = embeddings.VectorField()\n    text: str = embeddings.SourceField()\n";
-
 export const PyClassDefinition = "class Metadata(BaseModel):\n    source: str\n    timestamp: datetime\n\n\nclass Document(BaseModel):\n    content: str\n    meta: Metadata\n\n\nclass LanceSchema(LanceModel):\n    id: str\n    vector: Vector(1536)\n    payload: Document\n";
 
+export const PyClassDocuments = "class Documents(LanceModel):\n    vector: Vector(embeddings.ndims()) = embeddings.VectorField()\n    text: str = embeddings.SourceField()\n";
+
 export const PyCreateTableAsyncWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n    LanceSchema(\n        id=f\"id{i}\",\n        vector=np.random.randn(1536),\n        payload=Document(\n            content=f\"document{i}\",\n            meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n        ),\n    )\n    for i in range(100)\n]\n\nasync_tbl = await async_db.create_table(\"documents_async\", data=data)\n";
 
 export const PyCreateTableWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n    LanceSchema(\n        id=f\"id{i}\",\n        vector=np.random.randn(1536),\n        payload=Document(\n            content=f\"document{i}\",\n            meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n        ),\n    )\n    for i in range(100)\n]\n\n# Synchronous client\ntbl = db.create_table(\"documents\", data=data)\n";

diff --git a/docs/user-guides/indexing/fts-index.mdx b/docs/user-guides/indexing/fts-index.mdx
@@ -2,14 +2,11 @@
 title: "Full-Text Search (FTS) Index"
 sidebarTitle: "Full-Text Index"
 description: "Create and tune BM25-based full-text search indexes in LanceDB."
-weight: 2
-aliases:
-  - "/docs/concepts/indexing/fts-index/"
-  - "/docs/concepts/indexing/fts-index"
+mode: "wide"
 ---
-import { PyFtsIndexCreate as FtsIndexCreate, PyFtsIndexWait as FtsIndexWait } from '/snippets/indexing.mdx';
+import { PyFtsIndexAsync as FtsIndexAsync, PyFtsIndexCreate as FtsIndexCreate, PyFtsIndexWait as FtsIndexWait } from '/snippets/indexing.mdx';
 
-LanceDB Cloud and Enterprise provide performant full-text search based on BM25 so you can incorporate keyword-based search into retrieval solutions.
+LanceDB Cloud and Enterprise provide performant full-text search based on BM25, allowing you to incorporate keyword-based search in your retrieval solutions.
 
 <Note>
 The `create_fts_index` API returns immediately, but index building happens asynchronously.
@@ -31,6 +28,20 @@ Check FTS index status using the API:
     </CodeBlock>
 </CodeGroup>
 
+<Note>
+In LanceDB OSS, `create_fts_index` is not supported with `AsyncTable`. When working with async connections, use `create_index` with the `FTS` configuration instead.
+
+<Expandable title="Code Example">
+
+<CodeGroup>
+    <CodeBlock filename="Python" language="Python" icon="python">
+    {FtsIndexAsync}
+    </CodeBlock>
+</CodeGroup>
+
+</Expandable>
+</Note>
+
 ## Configuration Options
 
 ### FTS Parameters

diff --git a/docs/user-guides/indexing/gpu-indexing.mdx b/docs/user-guides/indexing/gpu-indexing.mdx
@@ -2,28 +2,28 @@
 title: "GPU-Powered Vector Indexing in LanceDB"
 sidebarTitle: "GPU Indexing"
 description: "Accelerate IVF and HNSW index builds with GPU acceleration in LanceDB."
-weight: 4
-aliases:
-  - "/docs/concepts/indexing/gpu-indexing/"
-  - "/docs/concepts/indexing/gpu-indexing"
 ---
 import { PyGpuIndexCuda as GpuIndexCuda, PyGpuIndexMps as GpuIndexMps } from '/snippets/indexing.mdx';
 
-With LanceDB's GPU-powered indexing you can build vector indexes for billions of rows in just a few hours—dramatically improving ingestion speed.
+With LanceDB's GPU-powered indexing you can create vector indexes for billions of rows in just a few hours. This can significantly accelerate your vector search operations. 
 
-> Internal tests show GPU indexing processing billions of vectors in under four hours.
+> In our tests, LanceDB's GPU-powered indexing can process billions of vectors in under four hours, providing significant performance improvements over CPU-based indexing.
 
 ## Automatic GPU Indexing in LanceDB Enterprise
 
 <Info>
-Automatic GPU indexing is currently available only in LanceDB Enterprise. [Contact us](mailto:contact@lancedb.com) to enable the feature.
+Automatic GPU Indexing is currently only available in LanceDB Enterprise. Please [contact us](mailto:contact@lancedb.com) to enable this feature for your deployment.
 </Info>
 
-Whenever you call `create_index`, Enterprise automatically selects GPU resources to build IVF or HNSW indexes. Indexing is asynchronous; call `wait_for_index()` to block until completion.
+Whenever you call `create_index`, the backend will use GPU resources to build either the IVF or HNSW indexes. The system automatically selects the optimal GPU configuration based on your data size and available hardware.
+
+This process is also asynchronous by default, but you can use `wait_for_index` to convert it into a synchronous process by waiting until the index is built.
 
 ## Manual GPU Indexing in LanceDB OSS
 
-Use the Python SDK with [PyTorch ≥ 2.0](https://pytorch.org/) to manually create IVF_PQ indexes on GPUs. GPU indexing currently requires the synchronous SDK. Specify the device via the `accelerator` parameter (`"cuda"` on Linux/NVIDIA, `"mps"` on Apple Silicon).
+You can use the Python SDK to manually create the IVF_PQ index. You will need [PyTorch>2.0](https://pytorch.org/). Please keep in mind that GPU based indexing is currently only supported by the synchronous SDK.
+
+You can specify the GPU device to train IVF partitions via `accelerator`. Specify parameters `cuda` or `mps` (on Apple Silicon) to enable GPU training.
 
 ### GPU Indexing on Linux
 
@@ -43,12 +43,12 @@ Use the Python SDK with [PyTorch ≥ 2.0](https://pytorch.org/) to manually crea
 
 ## Performance Considerations
 
-- GPU memory usage scales with `num_partitions` and vector dimension.
-- Ensure GPU memory comfortably exceeds the dataset you're indexing.
-- Batch size is tuned automatically based on available GPU memory.
-- Larger batches further improve throughput.
+- GPU memory usage scales with `num_partitions` and vector dimensions
+- For optimal performance, ensure GPU memory exceeds dataset size
+- Batch size is automatically tuned based on available GPU memory
+- Indexing speed improves with larger batch sizes
 
 ## Troubleshooting
 
-If you encounter `AssertionError: Torch not compiled with CUDA enabled`, [install a PyTorch build that includes CUDA support](https://pytorch.org/get-started/locally/).
+If you encounter the error `AssertionError: Torch not compiled with CUDA enabled`, you need to [install PyTorch with CUDA support](https://pytorch.org/get-started/locally/).