Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 110 additions & 79 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ geo-types = "0.7.16"
http = "1.1.0"
humantime = "2.2.0"
hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
io-uring = "0.7"
itertools = "0.13"
jieba-rs = { version = "0.8.1", default-features = false }
jsonb = { version = "0.5.3", default-features = false, features = ["databend"] }
Expand Down
14 changes: 14 additions & 0 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions rust/lance-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ deepsize.workspace = true
futures.workspace = true
http.workspace = true
log.workspace = true
moka.workspace = true
pin-project.workspace = true
prost.workspace = true
serde.workspace = true
Expand All @@ -49,6 +50,10 @@ path_abs.workspace = true
rand.workspace = true
tempfile.workspace = true

[target.'cfg(target_os = "linux")'.dependencies]
libc = { workspace = true }
io-uring = { workspace = true }

[dev-dependencies]
criterion.workspace = true
test-log.workspace = true
Expand Down
2 changes: 2 additions & 0 deletions rust/lance-io/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ pub mod stream;
#[cfg(test)]
pub mod testing;
pub mod traits;
#[cfg(target_os = "linux")]
pub mod uring;
pub mod utils;

pub use scheduler::{bytes_read_counter, iops_counter};
Expand Down
52 changes: 52 additions & 0 deletions rust/lance-io/src/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ use tokio::io::AsyncWriteExt;
use url::Url;

use super::local::LocalObjectReader;
#[cfg(target_os = "linux")]
use crate::uring::{UringCurrentThreadReader, UringReader};
mod list_retry;
pub mod providers;
pub mod storage_options;
Expand Down Expand Up @@ -597,6 +599,31 @@ impl ObjectStore {
)
.await
}
#[cfg(target_os = "linux")]
"file+uring" => {
// Check if current-thread mode enabled
let use_current_thread = std::env::var("LANCE_URING_CURRENT_THREAD")
.map(|v| str_is_truthy(&v))
.unwrap_or(false);

if use_current_thread {
UringCurrentThreadReader::open(
path,
self.block_size,
None,
Arc::new(self.io_tracker.clone()),
)
.await
} else {
UringReader::open(
path,
self.block_size,
None,
Arc::new(self.io_tracker.clone()),
)
.await
}
}
_ => Ok(Box::new(CloudObjectReader::new(
self.inner.clone(),
path.clone(),
Expand Down Expand Up @@ -634,6 +661,31 @@ impl ObjectStore {
)
.await
}
#[cfg(target_os = "linux")]
"file+uring" => {
// Check if current-thread mode enabled
let use_current_thread = std::env::var("LANCE_URING_CURRENT_THREAD")
.map(|v| str_is_truthy(&v))
.unwrap_or(false);
Comment thread
westonpace marked this conversation as resolved.

if use_current_thread {
UringCurrentThreadReader::open(
path,
self.block_size,
Some(known_size),
Arc::new(self.io_tracker.clone()),
)
.await
} else {
UringReader::open(
path,
self.block_size,
Some(known_size),
Arc::new(self.io_tracker.clone()),
)
.await
}
}
_ => Ok(Box::new(CloudObjectReader::new(
self.inner.clone(),
path.clone(),
Expand Down
3 changes: 3 additions & 0 deletions rust/lance-io/src/object_store/providers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pub struct ObjectStoreRegistryStats {
/// - `file`: A local file object store, with optimized code paths.
/// - `file-object-store`: A local file object store that uses the ObjectStore API,
/// for all operations. Used for testing with ObjectStore wrappers.
/// - `file+uring`: A local file object store using io_uring (Linux only).
/// - `s3`: An S3 object store.
/// - `s3+ddb`: An S3 object store with DynamoDB for metadata.
/// - `az`: An Azure Blob Storage object store.
Expand Down Expand Up @@ -301,6 +302,8 @@ impl Default for ObjectStoreRegistry {
"file-object-store".into(),
Arc::new(local::FileStoreProvider),
);
#[cfg(target_os = "linux")]
providers.insert("file+uring".into(), Arc::new(local::FileStoreProvider));

#[cfg(feature = "aws")]
{
Expand Down
84 changes: 84 additions & 0 deletions rust/lance-io/src/uring.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

//! io_uring-based I/O for disks with high IOPS capacity (e.g. NVMe)
//!
//! This module provides two implementations of the [`Reader`](crate::traits::Reader) trait
//! using Linux's io_uring interface for asynchronous I/O.
//!
//! One of these uses a pool of dedicated background threads which each own an io_uring instance.
//! Read requests are submitted to a background thread's pool.
//!
//! The other implementation uses a thread-local io_uring instance. This only works if the future
//! is polled by the same thread that submitted the request. This means that the runtime must be
//! a single-threaded runtime.
//!
//! # Configuration
//!
//! The io_uring reader is enabled by using the `file+uring://` URI scheme instead of `file://`.
//! Additional tuning parameters are controlled by environment variables:
//!
//! - `LANCE_URING_CURRENT_THREAD` - Use thread-local io_uring (default: false)
//! - `LANCE_URING_BLOCK_SIZE` - Block size in bytes (default: 4KB)
//! - `LANCE_URING_IO_PARALLELISM` - Max concurrent operations (default: 128)
//! - `LANCE_URING_QUEUE_DEPTH` - io_uring queue depth (default: 16K)
//! - `LANCE_URING_THREAD_COUNT` - Number of io_uring threads to use (default: 2)
//! - `LANCE_URING_SUBMIT_BATCH_SIZE` - Number of requests to batch before submitting (default: 128)
//! - `LANCE_URING_POLL_TIMEOUT_MS` - Thread poll timeout in milliseconds (default: 10)
//!
//! Note: the block size and io parallelism are not actually used by the io_uring implementation. These
//! variables just control what the filesystem reports up to Lance.
//!
//! # Platform Support
//!
//! This module is only available on Linux and requires kernel 5.1 or newer.
//! On other platforms, the code falls back to [`LocalObjectReader`](crate::local::LocalObjectReader).
//!
//! # Example
//!
//! ```no_run
//! # use lance_io::object_store::ObjectStore;
//! # async fn example() -> lance_core::Result<()> {
//! // Enable io_uring by using the file+uring:// scheme
//! let uri = "file+uring:///path/to/file.dat";
//! let (store, path) = ObjectStore::from_uri(uri).await?;
//! let reader = store.open(&path).await?;
//!
//! // Reader will use io_uring
//! let data = reader.get_range(0..1024).await?;
//! # Ok(())
//! # }
//! ```

mod future;
mod reader;
mod requests;
mod thread;

// Thread-local io_uring implementation for current-thread runtimes
pub(crate) mod current_thread;
pub(crate) mod current_thread_future;

#[cfg(test)]
mod tests;

use std::sync::LazyLock;

pub(crate) use current_thread::UringCurrentThreadReader;
pub use reader::UringReader;

/// Default block size for io_uring reads (4KB)
pub const DEFAULT_URING_BLOCK_SIZE: usize = 4 * 1024;

/// Default I/O parallelism for io_uring (128 concurrent operations)
pub const DEFAULT_URING_IO_PARALLELISM: usize = 128;

/// Default io_uring queue depth (16K entries)
pub const DEFAULT_URING_QUEUE_DEPTH: usize = 16 * 1024;

/// Cached `LANCE_URING_BLOCK_SIZE` env var, read once at first access.
pub(crate) static URING_BLOCK_SIZE: LazyLock<Option<usize>> = LazyLock::new(|| {
std::env::var("LANCE_URING_BLOCK_SIZE")
.ok()
.and_then(|s| s.parse().ok())
});
Loading
Loading