diff --git a/Cargo.lock b/Cargo.lock index f8e7a0523..d319c82f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1384,9 +1384,9 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.13.9" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e" +checksum = "ff0da1a70ec91e66731c1752deb9fda3044f1154fe4ceb5873e3f96ed34cafa3" dependencies = [ "libloading", ] @@ -2565,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.53.4", ] [[package]] @@ -2972,7 +2972,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" dependencies = [ - "proc-macro-crate 1.3.1", + "proc-macro-crate 3.4.0", "proc-macro2", "quote", "syn 2.0.101", diff --git a/Cargo.toml b/Cargo.toml index 43f2aea9a..a0b824a62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } uint = "0.8" ceno_gpu = { path = "utils/cuda_hal", package = "cuda_hal" } +cudarc = { version = "0.17.3", features = ["driver", "cuda-version-from-build-system"] } [profile.dev] lto = "thin" diff --git a/ceno_zkvm/Cargo.toml b/ceno_zkvm/Cargo.toml index 07d1394ac..728756197 100644 --- a/ceno_zkvm/Cargo.toml +++ b/ceno_zkvm/Cargo.toml @@ -15,7 +15,7 @@ ceno-examples = { path = "../examples-builder" } ceno_emul = { path = "../ceno_emul" } ceno_gpu = { workspace = true, optional = true } ceno_host = { path = "../ceno_host" } -cudarc = { version = "0.13.0", features = ["driver", "cuda-version-from-build-system"], optional = true } +cudarc = { workspace = true, optional = true } either.workspace = true ff_ext.workspace = true gkr_iop = { path = "../gkr_iop" } diff --git a/gkr_iop/Cargo.toml b/gkr_iop/Cargo.toml index 248e1e2ce..77bbe6716 100644 --- a/gkr_iop/Cargo.toml +++ b/gkr_iop/Cargo.toml @@ -12,7 +12,7 @@ version.workspace = true [dependencies] bincode.workspace = true ceno_gpu = { workspace = true, optional = true } -cudarc = { version = "0.13.0", features = ["driver", "cuda-version-from-build-system"], optional = true } +cudarc = { workspace = true, optional = true } either.workspace = true ff_ext.workspace = true itertools.workspace = true diff --git a/gkr_iop/src/gpu/mod.rs b/gkr_iop/src/gpu/mod.rs index 8c00e6a20..68fef16b5 100644 --- a/gkr_iop/src/gpu/mod.rs +++ b/gkr_iop/src/gpu/mod.rs @@ -27,44 +27,32 @@ pub mod gpu_prover { common::{ basefold::utils::convert_ceno_to_gpu_basefold_commitment, buffer::BufferImpl, + get_ceno_gpu_device_id, mle::{ build_mle_as_ceno, ordered_sparse32_selector_gpu, rotation_next_base_mle_gpu, rotation_selector_gpu, }, }, }; - use cudarc::driver::{CudaDevice, DriverError}; + use once_cell::sync::Lazy; use std::sync::{Arc, Mutex, MutexGuard}; pub type BB31Base = p3::babybear::BabyBear; pub type BB31Ext = ff_ext::BabyBearExt4; - pub static CUDA_DEVICE: Lazy, DriverError>> = - Lazy::new(|| CudaDevice::new(0)); - #[allow(clippy::type_complexity)] pub static CUDA_HAL: Lazy< Result>, Box>, > = Lazy::new(|| { - let device = CUDA_DEVICE - .as_ref() - .map_err(|e| format!("Device init failed: {:?}", e))?; - device.bind_to_thread()?; - - CudaHalBB31::new() + // can be overridden by env variable `CENO_GPU_DEVICE_ID` + let device_id: usize = get_ceno_gpu_device_id(0); + CudaHalBB31::new(device_id) .map(|hal| Arc::new(Mutex::new(hal))) .map_err(|e| Box::new(e) as Box) }); pub fn get_cuda_hal() -> Result, String> { - let device = CUDA_DEVICE - .as_ref() - .map_err(|e| format!("Device not available: {:?}", e))?; - device - .bind_to_thread() - .map_err(|e| format!("Failed to bind device to thread: {:?}", e))?; - let hal_arc = CUDA_HAL .as_ref() .map_err(|e| format!("HAL not available: {:?}", e))?;