Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions cuda-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ fn main() {
.clang_arg("c++")
.clang_arg("-std=gnu++20")
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
// Allow the specified functions and types
.allowlist_function("cu.*")
.allowlist_function("CU.*")
.allowlist_type("cu.*")
.allowlist_type("CU.*")
// Allow the specified functions and types (CUDA Runtime API only)
.allowlist_function("cuda.*")
.allowlist_function("CUDA.*")
.allowlist_type("cuda.*")
.allowlist_type("CUDA.*")
// Use newtype enum style
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
Expand Down Expand Up @@ -78,7 +78,6 @@ fn main() {
}
};
println!("cargo:rustc-link-search=native={}", cuda_lib_dir);
println!("cargo:rustc-link-lib=cuda");
println!("cargo:rustc-link-lib=cudart");

// Generate bindings - fail fast if this doesn't work
Expand Down
17 changes: 0 additions & 17 deletions cuda-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,3 @@ mod inner {
}

pub use inner::*;

#[cfg(test)]
mod tests {
use std::mem::MaybeUninit;

use super::*;

#[test]
fn sanity() {
// SAFETY: testing bindings
unsafe {
let mut version = MaybeUninit::<i32>::uninit();
let result = cuDriverGetVersion(version.as_mut_ptr());
assert_eq!(result, cudaError_enum(0));
}
}
}
1 change: 0 additions & 1 deletion cuda-sys/src/wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@

#pragma once

#include <cuda.h>
#include <cuda_runtime.h>
91 changes: 51 additions & 40 deletions monarch_rdma/examples/cuda_ping_pong/src/cuda_ping_pong.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,45 +269,51 @@ impl Actor for CudaRdmaActor {
// For this example, we'll use a regular Rust allocation as a placeholder
// The actual CUDA allocation would be handled by the monarch_rdma library
unsafe {
cu_check!(cuda_sys::cuInit(0));
let mut dptr: cuda_sys::CUdeviceptr = std::mem::zeroed();
let mut handle: cuda_sys::CUmemGenericAllocationHandle = std::mem::zeroed();

let mut device: cuda_sys::CUdevice = std::mem::zeroed();
cu_check!(cuda_sys::cuDeviceGet(&mut device, device_id as i32));
cu_check!(rdmaxcel_sys::rdmaxcel_cuInit(0));
let mut dptr: rdmaxcel_sys::CUdeviceptr = std::mem::zeroed();
let mut handle: rdmaxcel_sys::CUmemGenericAllocationHandle = std::mem::zeroed();

let mut device: rdmaxcel_sys::CUdevice = std::mem::zeroed();
cu_check!(rdmaxcel_sys::rdmaxcel_cuDeviceGet(
&mut device,
device_id as i32
));

let mut context: cuda_sys::CUcontext = std::mem::zeroed();
cu_check!(cuda_sys::cuCtxCreate_v2(&mut context, 0, device_id as i32));
cu_check!(cuda_sys::cuCtxSetCurrent(context));
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
&mut context,
0,
device_id as i32
));
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));

let mut granularity: usize = 0;
let mut prop: cuda_sys::CUmemAllocationProp = std::mem::zeroed();
prop.type_ = cuda_sys::CUmemAllocationType::CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type_ = cuda_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE;
let mut prop: rdmaxcel_sys::CUmemAllocationProp = std::mem::zeroed();
prop.type_ = rdmaxcel_sys::CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type_ = rdmaxcel_sys::CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = device;
prop.allocFlags.gpuDirectRDMACapable = 1;
prop.requestedHandleTypes =
cuda_sys::CUmemAllocationHandleType::CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
prop.requestedHandleTypes = rdmaxcel_sys::CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;

cu_check!(cuda_sys::cuMemGetAllocationGranularity(
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemGetAllocationGranularity(
&mut granularity as *mut usize,
&prop,
cuda_sys::CUmemAllocationGranularity_flags::CU_MEM_ALLOC_GRANULARITY_MINIMUM,
rdmaxcel_sys::CU_MEM_ALLOC_GRANULARITY_MINIMUM,
));

// ensure our size is aligned
let padded_size: usize = ((buffer_size - 1) / granularity + 1) * granularity;
assert!(padded_size == buffer_size);

cu_check!(cuda_sys::cuMemCreate(
&mut handle as *mut cuda_sys::CUmemGenericAllocationHandle,
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemCreate(
&mut handle as *mut rdmaxcel_sys::CUmemGenericAllocationHandle,
padded_size,
&prop,
0
));
// reserve and map the memory
cu_check!(cuda_sys::cuMemAddressReserve(
&mut dptr as *mut cuda_sys::CUdeviceptr,
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemAddressReserve(
&mut dptr as *mut rdmaxcel_sys::CUdeviceptr,
padded_size,
0,
0,
Expand All @@ -317,23 +323,28 @@ impl Actor for CudaRdmaActor {
assert!(padded_size % granularity == 0);

// fails if a add cu_check macro; but passes if we don't
let err = cuda_sys::cuMemMap(
dptr as cuda_sys::CUdeviceptr,
let err = rdmaxcel_sys::rdmaxcel_cuMemMap(
dptr as rdmaxcel_sys::CUdeviceptr,
padded_size,
0,
handle as cuda_sys::CUmemGenericAllocationHandle,
handle as rdmaxcel_sys::CUmemGenericAllocationHandle,
0,
);
if err != cuda_sys::CUresult::CUDA_SUCCESS {
if err != rdmaxcel_sys::CUDA_SUCCESS {
panic!("failed reserving and mapping memory {:?}", err);
}

// set access
let mut access_desc: cuda_sys::CUmemAccessDesc = std::mem::zeroed();
access_desc.location.type_ = cuda_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE;
let mut access_desc: rdmaxcel_sys::CUmemAccessDesc = std::mem::zeroed();
access_desc.location.type_ = rdmaxcel_sys::CU_MEM_LOCATION_TYPE_DEVICE;
access_desc.location.id = device;
access_desc.flags = cuda_sys::CUmemAccess_flags::CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
cu_check!(cuda_sys::cuMemSetAccess(dptr, padded_size, &access_desc, 1));
access_desc.flags = rdmaxcel_sys::CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemSetAccess(
dptr,
padded_size,
&access_desc,
1
));
Ok(Self {
device_id,
cpu_buffer,
Expand Down Expand Up @@ -385,15 +396,15 @@ impl Handler<InitializeBuffer> for CudaRdmaActor {
self.cpu_buffer.fill(value);

unsafe {
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
cu_check!(cuda_sys::cuCtxCreate_v2(
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
&mut context,
0,
self.device_id as i32
));
cu_check!(cuda_sys::cuCtxSetCurrent(context));
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
cuda_sys::cudaDeviceSynchronize();
cu_check!(cuda_sys::cuMemcpyHtoD_v2(
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemcpyHtoD_v2(
self.cu_ptr as u64,
self.cpu_buffer.as_ptr() as *const std::ffi::c_void,
self.cpu_buffer.len()
Expand Down Expand Up @@ -459,13 +470,13 @@ impl Handler<PerformPingPong> for CudaRdmaActor {

validate_execution_context().await?;
unsafe {
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
cu_check!(cuda_sys::cuCtxCreate_v2(
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
&mut context,
0,
self.device_id as i32
));
cu_check!(cuda_sys::cuCtxSetCurrent(context));
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
}
let qp = self
.rdma_manager
Expand Down Expand Up @@ -532,17 +543,17 @@ impl Handler<VerifyBuffer> for CudaRdmaActor {
VerifyBuffer(expected_values, reply): VerifyBuffer,
) -> Result<(), anyhow::Error> {
unsafe {
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
cu_check!(cuda_sys::cuCtxCreate_v2(
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
&mut context,
0,
self.device_id as i32
));
cu_check!(cuda_sys::cuCtxSetCurrent(context));
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
cuda_sys::cudaDeviceSynchronize();
cu_check!(cuda_sys::cuMemcpyDtoH_v2(
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemcpyDtoH_v2(
self.cpu_buffer.as_mut_ptr() as *mut std::ffi::c_void,
self.cu_ptr as cuda_sys::CUdeviceptr,
self.cu_ptr as rdmaxcel_sys::CUdeviceptr,
self.cpu_buffer.len(),
));
}
Expand Down
4 changes: 2 additions & 2 deletions monarch_rdma/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#[macro_export]
macro_rules! cu_check {
($result:expr) => {
if $result != cuda_sys::CUresult::CUDA_SUCCESS {
if $result != rdmaxcel_sys::CUDA_SUCCESS {
let mut error_string: *const std::os::raw::c_char = std::ptr::null();
cuda_sys::cuGetErrorString($result, &mut error_string);
rdmaxcel_sys::rdmaxcel_cuGetErrorString($result, &mut error_string);
panic!(
"cuda failure {}:{} {:?} '{}'",
file!(),
Expand Down
16 changes: 8 additions & 8 deletions monarch_rdma/src/rdma_manager_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,13 @@ impl RdmaManagerActor {
) -> Result<(RdmaMemoryRegionView, String), anyhow::Error> {
unsafe {
let mut mem_type: i32 = 0;
let ptr = addr as cuda_sys::CUdeviceptr;
let err = cuda_sys::cuPointerGetAttribute(
let ptr = addr as rdmaxcel_sys::CUdeviceptr;
let err = rdmaxcel_sys::rdmaxcel_cuPointerGetAttribute(
&mut mem_type as *mut _ as *mut std::ffi::c_void,
cuda_sys::CUpointer_attribute_enum::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
rdmaxcel_sys::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
ptr,
);
let is_cuda = err == cuda_sys::CUresult::CUDA_SUCCESS;
let is_cuda = err == rdmaxcel_sys::CUDA_SUCCESS;

let mut selected_rdma_device = None;

Expand Down Expand Up @@ -457,11 +457,11 @@ impl RdmaManagerActor {
mrv = maybe_mrv.unwrap();
} else if is_cuda {
let mut fd: i32 = -1;
cuda_sys::cuMemGetHandleForAddressRange(
&mut fd as *mut i32 as *mut std::ffi::c_void,
addr as cuda_sys::CUdeviceptr,
rdmaxcel_sys::rdmaxcel_cuMemGetHandleForAddressRange(
&mut fd,
addr as rdmaxcel_sys::CUdeviceptr,
size,
cuda_sys::CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
rdmaxcel_sys::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
0,
);
mr = rdmaxcel_sys::ibv_reg_dmabuf_mr(domain_pd, 0, size, 0, fd, access.0 as i32);
Expand Down
Loading