Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crates/cuda-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@ ctor.workspace = true
[build-dependencies]
openvm-cuda-builder.workspace = true

[dev-dependencies]
tokio = { workspace = true, features = ["full"] }

[features]
touchemall = []
3 changes: 3 additions & 0 deletions crates/cuda-common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub enum MemoryError {

#[error("Invalid pointer: pointer not found in allocation table")]
InvalidPointer,

#[error("Failed to reserve virtual address space (bytes: {size}, page size: {page_size})")]
ReserveFailed { size: usize, page_size: usize },
}

#[derive(Error, Debug)]
Expand Down
9 changes: 2 additions & 7 deletions crates/cuda-common/src/memory_manager/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,8 @@ extern "C" {
fn _vpmm_release(h: CUmemGenericAllocationHandle) -> i32;
}

pub(super) unsafe fn vpmm_check_support(device_ordinal: i32) -> Result<bool, CudaError> {
let status = _vpmm_check_support(device_ordinal);
if status == 0 {
Ok(true)
} else {
Err(CudaError::new(status))
}
pub(super) unsafe fn vpmm_check_support(device_ordinal: i32) -> Result<(), CudaError> {
CudaError::from_result(_vpmm_check_support(device_ordinal))
}

pub(super) unsafe fn vpmm_min_granularity(device_ordinal: i32) -> Result<usize, CudaError> {
Expand Down
46 changes: 30 additions & 16 deletions crates/cuda-common/src/memory_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ mod cuda;
mod vm_pool;
use vm_pool::VirtualMemoryPool;

#[cfg(test)]
mod tests;

#[link(name = "cudart")]
extern "C" {
fn cudaMallocAsync(dev_ptr: *mut *mut c_void, size: usize, stream: cudaStream_t) -> i32;
Expand All @@ -37,6 +40,9 @@ pub struct MemoryManager {
max_used_size: usize,
}

/// # Safety
/// `MemoryManager` is not internally synchronized. These impls are safe because
/// the singleton instance is wrapped in `Mutex` via `MEMORY_MANAGER`.
unsafe impl Send for MemoryManager {}
unsafe impl Sync for MemoryManager {}

Expand All @@ -59,21 +65,28 @@ impl MemoryManager {
let mut tracked_size = size;
let ptr = if size < self.pool.page_size {
let mut ptr: *mut c_void = std::ptr::null_mut();
check(unsafe { cudaMallocAsync(&mut ptr, size, cudaStreamPerThread) })?;
self.allocated_ptrs
.insert(NonNull::new(ptr).expect("cudaMalloc returned null"), size);
Ok(ptr)
check(unsafe { cudaMallocAsync(&mut ptr, size, cudaStreamPerThread) }).map_err(
|e| {
tracing::error!("cudaMallocAsync failed: size={}: {:?}", size, e);
MemoryError::from(e)
},
)?;
self.allocated_ptrs.insert(
NonNull::new(ptr).expect("BUG: cudaMallocAsync returned null"),
size,
);
ptr
} else {
tracked_size = size.next_multiple_of(self.pool.page_size);
self.pool
.malloc_internal(tracked_size, current_stream_id()?)
let stream_id = current_stream_id()?;
self.pool.malloc_internal(tracked_size, stream_id)?
};

self.current_size += tracked_size;
if self.current_size > self.max_used_size {
self.max_used_size = self.current_size;
}
ptr
Ok(ptr)
}

/// # Safety
Expand All @@ -84,9 +97,14 @@ impl MemoryManager {

if let Some(size) = self.allocated_ptrs.remove(&nn) {
self.current_size -= size;
check(unsafe { cudaFreeAsync(ptr, cudaStreamPerThread) })?;
check(unsafe { cudaFreeAsync(ptr, cudaStreamPerThread) }).map_err(|e| {
tracing::error!("cudaFreeAsync failed: ptr={:p}: {:?}", ptr, e);
MemoryError::from(e)
})?;
} else {
self.current_size -= self.pool.free_internal(ptr, current_stream_id()?)?;
let stream_id = current_stream_id()?;
let freed_size = self.pool.free_internal(ptr, stream_id)?;
self.current_size -= freed_size;
}

Ok(())
Expand All @@ -97,13 +115,9 @@ impl Drop for MemoryManager {
fn drop(&mut self) {
let ptrs: Vec<*mut c_void> = self.allocated_ptrs.keys().map(|nn| nn.as_ptr()).collect();
for &ptr in &ptrs {
unsafe { self.d_free(ptr).unwrap() };
}
if !self.allocated_ptrs.is_empty() {
println!(
"Error: {} allocations were automatically freed on MemoryManager drop",
self.allocated_ptrs.len()
);
if let Err(e) = unsafe { self.d_free(ptr) } {
tracing::error!("MemoryManager drop: failed to free {:p}: {:?}", ptr, e);
}
}
}
}
Expand Down
Loading