Skip to content

Commit

Permalink
feat: Initial remote table implementation for rust (lancedb#1024)
Browse files Browse the repository at this point in the history
This will eventually replace the remote table implementations in python
and node.
  • Loading branch information
westonpace committed Feb 29, 2024
1 parent debad0f commit 1cc982e
Show file tree
Hide file tree
Showing 18 changed files with 376 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
python-version: "3.11"
- name: Install ruff
run: |
pip install ruff
pip install ruff==0.2.2
- name: Format check
run: ruff format --check .
- name: Lint
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/remote-integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: LanceDb Cloud Integration Test

on:
workflow_run:
workflows: [Rust]
types:
- completed

env:
LANCEDB_PROJECT: ${{ secrets.LANCEDB_PROJECT }}
LANCEDB_API_KEY: ${{ secrets.LANCEDB_API_KEY }}
LANCEDB_REGION: ${{ secrets.LANCEDB_REGION }}

jobs:
test:
timeout-minutes: 30
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
working-directory: rust
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: cargo build --all-features
- name: Run Integration test
run: cargo test --tests -- --ignored
1 change: 1 addition & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,4 @@ jobs:
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo build
cargo test
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ dist
## Rust
target

**/sccache.log

Cargo.lock
11 changes: 1 addition & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,8 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.277
rev: v0.2.2
hooks:
- id: ruff
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
6 changes: 3 additions & 3 deletions python/python/lancedb/embeddings/instructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ class Schema(LanceModel):
# convert_to_numpy: bool = True # Hardcoding this as numpy can be ingested directly

source_instruction: str = "represent the document for retrieval"
query_instruction: (
str
) = "represent the document for retrieving the most similar documents"
query_instruction: str = (
"represent the document for retrieving the most similar documents"
)

@weak_lru(maxsize=1)
def ndims(self):
Expand Down
1 change: 1 addition & 0 deletions python/python/lancedb/fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# limitations under the License.

"""Full text search index using tantivy-py"""

import os
from typing import List, Tuple

Expand Down
1 change: 1 addition & 0 deletions python/python/lancedb/remote/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def submit(name, q):
f = Future()
f.set_result(self._conn._client.query(name, q))
return f

else:

def submit(name, q):
Expand Down
1 change: 1 addition & 0 deletions python/python/lancedb/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# limitations under the License.

"""Schema related utilities."""

import pyarrow as pa


Expand Down
6 changes: 4 additions & 2 deletions python/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
match &self {
Ok(_) => Ok(self.unwrap()),
Err(err) => match err {
LanceError::InvalidInput { .. } => self.value_error(),
LanceError::InvalidTableName { .. } => self.value_error(),
LanceError::TableNotFound { .. } => self.value_error(),
LanceError::TableAlreadyExists { .. } => self.runtime_error(),
LanceError::Schema { .. } => self.value_error(),
LanceError::CreateDir { .. } => self.os_error(),
LanceError::TableAlreadyExists { .. } => self.runtime_error(),
LanceError::Store { .. } => self.runtime_error(),
LanceError::Lance { .. } => self.runtime_error(),
LanceError::Schema { .. } => self.value_error(),
LanceError::Runtime { .. } => self.runtime_error(),
LanceError::Http { .. } => self.runtime_error(),
},
}
}
Expand Down
10 changes: 9 additions & 1 deletion rust/lancedb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,19 @@ async-trait = "0"
bytes = "1"
futures.workspace = true
num-traits.workspace = true
url = { workspace = true }
url.workspace = true
serde = { version = "^1" }
serde_json = { version = "1" }

# For remote feature

reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }

[dev-dependencies]
tempfile = "3.5.0"
rand = { version = "0.8.3", features = ["small_rng"] }
walkdir = "2"

[features]
default = ["remote"]
remote = ["dep:reqwest"]
40 changes: 36 additions & 4 deletions rust/lancedb/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ impl OpenTableBuilder {
}

#[async_trait::async_trait]
trait ConnectionInternal: Send + Sync + std::fmt::Debug + 'static {
pub(crate) trait ConnectionInternal: Send + Sync + std::fmt::Debug + 'static {
async fn table_names(&self) -> Result<Vec<String>>;
async fn do_create_table(&self, options: CreateTableBuilder<true>) -> Result<TableRef>;
async fn do_open_table(&self, options: OpenTableBuilder) -> Result<TableRef>;
Expand Down Expand Up @@ -365,14 +365,46 @@ impl ConnectBuilder {
self
}

/// Establishes a connection to the database
pub async fn execute(self) -> Result<Connection> {
let internal = Arc::new(Database::connect_with_options(&self).await?);
#[cfg(feature = "remote")]
fn execute_remote(self) -> Result<Connection> {
let region = self.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
})?;
let api_key = self.api_key.ok_or_else(|| Error::InvalidInput {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
&self.uri,
&api_key,
&region,
self.host_override,
)?);
Ok(Connection {
internal,
uri: self.uri,
})
}

#[cfg(not(feature = "remote"))]
fn execute_remote(self) -> Result<Connection> {
Err(Error::Runtime {
message: "cannot connect to LanceDb Cloud unless the 'remote' feature is enabled"
.to_string(),
})
}

/// Establishes a connection to the database
pub async fn execute(self) -> Result<Connection> {
if self.uri.starts_with("db") {
self.execute_remote()
} else {
let internal = Arc::new(Database::connect_with_options(&self).await?);
Ok(Connection {
internal,
uri: self.uri,
})
}
}
}

/// Connect to a LanceDB database.
Expand Down
22 changes: 22 additions & 0 deletions rust/lancedb/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use snafu::Snafu;
pub enum Error {
#[snafu(display("LanceDBError: Invalid table name: {name}"))]
InvalidTableName { name: String },
#[snafu(display("LanceDBError: Invalid input, {message}"))]
InvalidInput { message: String },
#[snafu(display("LanceDBError: Table '{name}' was not found"))]
TableNotFound { name: String },
#[snafu(display("LanceDBError: Table '{name}' already exists"))]
Expand All @@ -31,6 +33,8 @@ pub enum Error {
path: String,
source: std::io::Error,
},
#[snafu(display("LanceDBError: Http error: {message}"))]
Http { message: String },
#[snafu(display("LanceDBError: {message}"))]
Store { message: String },
#[snafu(display("LanceDBError: {message}"))]
Expand Down Expand Up @@ -82,3 +86,21 @@ impl<T> From<PoisonError<T>> for Error {
}
}
}

#[cfg(feature = "remote")]
impl From<reqwest::Error> for Error {
fn from(e: reqwest::Error) -> Self {
Self::Http {
message: e.to_string(),
}
}
}

#[cfg(feature = "remote")]
impl From<url::ParseError> for Error {
fn from(e: url::ParseError) -> Self {
Self::Http {
message: e.to_string(),
}
}
}
2 changes: 2 additions & 0 deletions rust/lancedb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ pub mod index;
pub mod io;
pub mod ipc;
pub mod query;
#[cfg(feature = "remote")]
pub(crate) mod remote;
pub mod table;
pub mod utils;

Expand Down
21 changes: 21 additions & 0 deletions rust/lancedb/src/remote.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This module contains a remote client for a LanceDB server. This is used
//! to communicate with LanceDB cloud. It can also serve as an example for
//! building client/server applications with LanceDB or as a client for some
//! other custom LanceDB service.

pub mod client;
pub mod db;
Loading

0 comments on commit 1cc982e

Please sign in to comment.