From f6e167c8105853494a0a667cd51f694fa7373e72 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 2 Jul 2025 22:47:07 -0700 Subject: [PATCH] feat: add docs setup --- .github/workflows/publish.yml | 98 +++++++++++++++++++++++++++++++++++ .gitignore | 97 ++++++++++++++++++++++++++++++++++ docs/Makefile | 20 +++++++ docs/all-modules.rst | 14 +++++ docs/basic-io.rst | 13 +++++ docs/conf.py | 80 ++++++++++++++++++++++++++++ docs/data-evolution.rst | 10 ++++ docs/dataset.rst | 8 +++ docs/index-and-search.rst | 11 ++++ docs/index.rst | 17 ++++++ docs/make.bat | 35 +++++++++++++ docs/random-access.rst | 9 ++++ docs/requirements.txt | 2 + 13 files changed, 414 insertions(+) create mode 100644 .github/workflows/publish.yml create mode 100644 .gitignore create mode 100644 docs/Makefile create mode 100644 docs/all-modules.rst create mode 100644 docs/basic-io.rst create mode 100644 docs/conf.py create mode 100644 docs/data-evolution.rst create mode 100644 docs/dataset.rst create mode 100644 docs/index-and-search.rst create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/random-access.rst create mode 100644 docs/requirements.txt diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..1da58e8 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,98 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Publish Doc + +on: + push: + branches: [main] + schedule: + - cron: '0 */2 * * *' # every 2 hours + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: write + +env: + # This env var is used by Swatinem/rust-cache@v2 for the cache + # key, so we set it to make sure it is always consistent. + CARGO_TERM_COLOR: always + # Disable full debug symbol generation to speed up CI build and keep memory down + # "1" means line tables only, which is useful for panic tracebacks. + RUSTFLAGS: "-C debuginfo=1" + RUST_BACKTRACE: "1" + # according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html + # CI builds are faster with incremental disabled. + CARGO_INCREMENTAL: "0" + CARGO_BUILD_JOBS: "1" + +jobs: + build-and-deploy: + # scheduled run should only happen on main repo not forked ones. + if: | + github.event_name != 'schedule' || github.repository == 'lancedb/lance-python-doc' + runs-on: ubuntu-24.04 + timeout-minutes: 60 + strategy: + matrix: + python-version: [ 3.11 ] # Ray does not support 3.12 yet. + steps: + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y protobuf-compiler libssl-dev + # pin the toolchain version to avoid surprises + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + - uses: rui314/setup-mold@v1 + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + - name: Checkout lance-python-doc repo + uses: actions/checkout@v4 + - name: Checkout lance repo + uses: actions/checkout@v4 + with: + repository: lancedb/lance + path: lance + - uses: Swatinem/rust-cache@v2 + with: + workspaces: lance/python + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Copy docs to Lance + run: | + mv docs lance/python/docs + - name: Set up Python virtual environment + run: | + python -m venv venv + source venv/bin/activate + pip install maturin + pip install -r lance/python/docs/requirements.txt + - name: Build Doc + working-directory: lance/python + run: | + source ../../venv/bin/activate + maturin develop + sphinx-build -b html docs docs/_build/html + - name: Add .nojekyll + run: | + touch lance/python/docs/_build/html/.nojekyll + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: lance/python/docs/_build/html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7caa32f --- /dev/null +++ b/.gitignore @@ -0,0 +1,97 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Tracing files +trace-*.json + +**/*~ +**/__pycache__ +build/ +_build/ +dist/ +*.egg-info/ +.python-version + +.idea +cmake-build-* +.vscode +.DS_Store + +python/lance/_*.cpp + +bin/ + + +*.parquet +*.parq + +python/thirdparty/arrow/ +python/wheels +python/benchmark_data + +logs +*.ckpt + +docs/_build +docs/api/python + +**/.ipynb_checkpoints/ +docs/notebooks + +notebooks/sift +notebooks/image_data/data +benchmarks/sift/sift +benchmarks/sift/sift.lance +benchmarks/sift/lance_ivf*.csv +**/sift.tar.gz + +wheelhouse + +# pandas testing +.hypothesis + + +**/df.json + +# Rust +target +**/sccache.log + +# c++ lsp +.ccls-cache/ + +python/venv +test_data/venv + +**/*.profraw +*.lance diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..269cadc --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/all-modules.rst b/docs/all-modules.rst new file mode 100644 index 0000000..6250e56 --- /dev/null +++ b/docs/all-modules.rst @@ -0,0 +1,14 @@ +All Modules +=========== + +.. automodule:: lance + :members: + :undoc-members: + +.. automodule:: lance.dataset + :members: + :undoc-members: + +.. automodule:: lance.fragment + :members: + :undoc-members: diff --git a/docs/basic-io.rst b/docs/basic-io.rst new file mode 100644 index 0000000..d022def --- /dev/null +++ b/docs/basic-io.rst @@ -0,0 +1,13 @@ +Basic IOs +========= + +The following functions are used to read and write data in Lance format. + +.. automethod:: lance.dataset.LanceDataset.insert + :noindex: +.. automethod:: lance.dataset.LanceDataset.scanner + :noindex: +.. automethod:: lance.dataset.LanceDataset.to_batches + :noindex: +.. automethod:: lance.dataset.LanceDataset.to_table + :noindex: \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..baa267e --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright The Lance Authors + +# Configuration file for the Sphinx documentation builder. + +import sys +import os + +# -- Project information ----------------------------------------------------- + +project = "pylance" +copyright = "%Y, Lance Developer" +author = "Lance Developer" + +sys.path.insert(0, os.path.abspath("../")) + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.duration', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + 'sphinx.ext.autosummary', + "sphinx.ext.napoleon", +] + +napoleon_google_docstring = False +napoleon_numpy_docstring = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = False + +autodoc_typehints = "signature" + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "sphinx": ("https://www.sphinx-doc.org/en/master/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "ray": ("https://docs.ray.io/en/latest/", None), +} +intersphinx_disabled_domains = ['std'] + +# -- Options for HTML output ------------------------------------------------- + +html_theme = 'sphinx_rtd_theme' +html_theme_options = { + "collapse_navigation": False, # Show all entries expanded + "navigation_depth": 4, # Show nested headings + "titles_only": False # Show both page titles and section titles +} + +# -- doctest configuration --------------------------------------------------- + +doctest_global_setup = """ +import os +import shutil +from typing import Iterator + +import lance +import pyarrow as pa +import numpy as np +import pandas as pd +""" + +# Only test code examples in rst files +doctest_test_doctest_blocks = "" diff --git a/docs/data-evolution.rst b/docs/data-evolution.rst new file mode 100644 index 0000000..1c8081a --- /dev/null +++ b/docs/data-evolution.rst @@ -0,0 +1,10 @@ +Data Evolution +============== + +Lance supports zero-copy data evolution, which means that you can add new columns and +backfill column data to the dataset cheaply. + +.. automethod:: lance.dataset.LanceDataset.add_columns + :noindex: +.. automethod:: lance.dataset.LanceDataset.drop_columns + :noindex: \ No newline at end of file diff --git a/docs/dataset.rst b/docs/dataset.rst new file mode 100644 index 0000000..16eeaa6 --- /dev/null +++ b/docs/dataset.rst @@ -0,0 +1,8 @@ +Lance Dataset +============= + +The core of Lance is the ``LanceDataset`` class. User can open a dataset by using +:py:meth:`lance.dataset`. + +.. autofunction:: lance.dataset + :noindex: \ No newline at end of file diff --git a/docs/index-and-search.rst b/docs/index-and-search.rst new file mode 100644 index 0000000..83b31bb --- /dev/null +++ b/docs/index-and-search.rst @@ -0,0 +1,11 @@ +Indexing and Searching +====================== + +.. automethod:: lance.dataset.LanceDataset.create_index + :noindex: +.. automethod:: lance.dataset.LanceDataset.create_scalar_index + :noindex: +.. automethod:: lance.dataset.LanceDataset.drop_index + :noindex: +.. automethod:: lance.dataset.LanceDataset.scanner + :noindex: diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..93c65d5 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,17 @@ +pylance +=========== + +Lance is a modern columnar data format for ML and LLMs. +This is the auto-generated Python documentation for its Python SDK `pylance`. +For more detailed quickstart and user guide, please visit the `official Lance documentation website `_. + +.. toctree:: + :maxdepth: 2 + :caption: Contents + + dataset + basic-io + random-access + data-evolution + index-and-search + all-modules diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..5394189 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd \ No newline at end of file diff --git a/docs/random-access.rst b/docs/random-access.rst new file mode 100644 index 0000000..577fc17 --- /dev/null +++ b/docs/random-access.rst @@ -0,0 +1,9 @@ +Random Access +============= + +Lance stands out with its super fast random access, unlike other table or file formats. + +.. automethod:: lance.dataset.LanceDataset.take + :noindex: +.. automethod:: lance.dataset.LanceDataset.take_blobs + :noindex: \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..825dc44 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx>8 +sphinx-rtd-theme \ No newline at end of file