-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Numba patch: Support linking code from memory (#60)
Adds support for linking code from memory to Numba's `@cuda.jit` decorator (in addition to the already-supported linking files from disk). New classes are added to Numba's top-level `cuda` module: * `Archive`: An archive of objects * `CUSource`: A CUDA C/C++ source * `Cubin`: A cubin ELF * `Fatbin`: A fatbin ELF * `Object`: An object file * `PTXSource`: PTX assembly source code. These are all used by constructing them with a single argument, the code in memory to use. Once created, they can then be passed to the `link=` kwarg of the `@cuda.jit` decorator. An example showing a use case with a CUDA C/C++ source is added. This implementation is inspired by the approach outlined by @ed-o-saurus in numba/numba#9470. Notes on changes: * Various tests now run on the GPU, so test binaries need to be generated using the relevant compute capability - this change is applied in the `Makefile`, along with some refactoring to tidy it up a little. * Test for new functionality are added. In addition, existing tests that used the test binaries are all modified such that they use a relevant compute capability (usually the one in the test machine, for most of them) because the test binaries are now built for the CC of the current GPU - it's no longer sufficient to hard code CCs like 7.5 or 7.0 for tests. Since fixtures are needed across test files, I've started moving them into `conftest.py`. --------- Co-authored-by: Bradley Dice <bdice@bradleydice.com> Co-authored-by: jakirkham <jakirkham@gmail.com>
- Loading branch information
1 parent
2dae4c2
commit a92e60f
Showing
8 changed files
with
464 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
# Demonstrates the use of a CUSource object to link code to a @cuda.jit | ||
# function where the linked source is supplied as a string in memory, rather | ||
# than on-disk. The CUSource object is passed in the `link` list, just as paths | ||
# to files are passed when linking source files from disk. | ||
# | ||
# In addition to CUSource files, PTXSource objects can be used to link PTX from | ||
# memory (not shown in this example). | ||
|
||
from numba import cuda | ||
from pynvjitlink import patch | ||
|
||
import numpy as np | ||
|
||
patch.patch_numba_linker() | ||
|
||
source = cuda.CUSource( | ||
""" | ||
typedef unsigned int uint32_t; | ||
extern "C" __device__ int cu_add(uint32_t* result, uint32_t a, uint32_t b) | ||
{ | ||
*result = a + b; | ||
return 0; | ||
} | ||
""" | ||
) | ||
|
||
|
||
cu_add = cuda.declare_device("cu_add", "uint32(uint32, uint32)") | ||
|
||
|
||
@cuda.jit(link=[source]) | ||
def kernel(result, a, b): | ||
result[0] = cu_add(a, b) | ||
|
||
|
||
a = 1 | ||
b = 2 | ||
result = np.zeros(1, dtype=np.uint32) | ||
kernel[1, 1](result, a, b) | ||
|
||
print(f"According to a CUDA kernel, {a} + {b} = {result[0]}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import os | ||
import pytest | ||
|
||
from numba import cuda | ||
from pynvjitlink.patch import Archive, Cubin, CUSource, Fatbin, Object, PTXSource | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def gpu_compute_capability(): | ||
"""Compute capability of the current GPU""" | ||
return cuda.get_current_device().compute_capability | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def alt_gpu_compute_capability(gpu_compute_capability): | ||
"""A compute capability that does not match the current GPU""" | ||
# For sufficient incompatibility for the test suites, the major number of | ||
# the compute capabilities must differ (for example one can load a 7.0 | ||
# cubin when linking for 7.5) | ||
if gpu_compute_capability[0] == 7: | ||
return (8, 0) | ||
else: | ||
return (7, 0) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def absent_gpu_compute_capability(gpu_compute_capability, alt_gpu_compute_capability): | ||
"""A compute capability not used in any cubin or fatbin test binary""" | ||
cc_majors = {6, 7, 8} | ||
cc_majors.remove(gpu_compute_capability[0]) | ||
cc_majors.remove(alt_gpu_compute_capability[0]) | ||
return (cc_majors.pop(), 0) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def gpu_arch_flag(gpu_compute_capability): | ||
"""nvJitLink arch flag to link for the current GPU""" | ||
major, minor = gpu_compute_capability | ||
return f"-arch=sm_{major}{minor}" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def alt_gpu_arch_flag(alt_gpu_compute_capability): | ||
"""nvJitLink arch flag to link for a different kind of GPU to the current | ||
one""" | ||
major, minor = alt_gpu_compute_capability | ||
return f"-arch=sm_{major}{minor}" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def absent_gpu_arch_flag(absent_gpu_compute_capability): | ||
"""nvJitLink arch flag to link for an architecture not in any cubin or | ||
fatbin""" | ||
major, minor = absent_gpu_compute_capability | ||
return f"-arch=sm_{major}{minor}" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_archive(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.a") | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_cubin(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.cubin") | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_cusource(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.cu") | ||
with open(path, "r") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_fatbin(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.fatbin") | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_object(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.o") | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def device_functions_ptx(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
path = os.path.join(test_dir, "test_device_functions.ptx") | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def undefined_extern_cubin(): | ||
test_dir = os.path.dirname(os.path.abspath(__file__)) | ||
fatbin_path = os.path.join(test_dir, "undefined_extern.cubin") | ||
with open(fatbin_path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_archive(device_functions_archive): | ||
return Archive(device_functions_archive) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_cubin(device_functions_cubin): | ||
return Cubin(device_functions_cubin) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_cusource(device_functions_cusource): | ||
return CUSource(device_functions_cusource) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_fatbin(device_functions_fatbin): | ||
return Fatbin(device_functions_fatbin) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_object(device_functions_object): | ||
return Object(device_functions_object) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def linkable_code_ptx(device_functions_ptx): | ||
return PTXSource(device_functions_ptx) |
Oops, something went wrong.