Skip to content

Commit

Permalink
Numba patch: Support linking code from memory (#60)
Browse files Browse the repository at this point in the history
Adds support for linking code from memory to Numba's `@cuda.jit`
decorator (in addition to the already-supported linking files from
disk). New classes are added to Numba's top-level `cuda` module:

* `Archive`: An archive of objects
* `CUSource`: A CUDA C/C++ source
* `Cubin`: A cubin ELF
* `Fatbin`: A fatbin ELF
* `Object`: An object file
* `PTXSource`: PTX assembly source code.

These are all used by constructing them with a single argument, the code
in memory to use. Once created, they can then be passed to the `link=`
kwarg of the `@cuda.jit` decorator. An example showing a use case with a
CUDA C/C++ source is added.

This implementation is inspired by the approach outlined by @ed-o-saurus
in numba/numba#9470.

Notes on changes:
* Various tests now run on the GPU, so test binaries need to be
generated using the relevant compute capability - this change is applied
in the `Makefile`, along with some refactoring to tidy it up a little.
* Test for new functionality are added. In addition, existing tests that
used the test binaries are all modified such that they use a relevant
compute capability (usually the one in the test machine, for most of
them) because the test binaries are now built for the CC of the current
GPU - it's no longer sufficient to hard code CCs like 7.5 or 7.0 for
tests. Since fixtures are needed across test files, I've started moving
them into `conftest.py`.

---------

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
Co-authored-by: jakirkham <jakirkham@gmail.com>
  • Loading branch information
3 people committed Mar 19, 2024
1 parent 2dae4c2 commit a92e60f
Show file tree
Hide file tree
Showing 8 changed files with 464 additions and 81 deletions.
44 changes: 44 additions & 0 deletions examples/jit_link_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

# Demonstrates the use of a CUSource object to link code to a @cuda.jit
# function where the linked source is supplied as a string in memory, rather
# than on-disk. The CUSource object is passed in the `link` list, just as paths
# to files are passed when linking source files from disk.
#
# In addition to CUSource files, PTXSource objects can be used to link PTX from
# memory (not shown in this example).

from numba import cuda
from pynvjitlink import patch

import numpy as np

patch.patch_numba_linker()

source = cuda.CUSource(
"""
typedef unsigned int uint32_t;
extern "C" __device__ int cu_add(uint32_t* result, uint32_t a, uint32_t b)
{
*result = a + b;
return 0;
}
"""
)


cu_add = cuda.declare_device("cu_add", "uint32(uint32, uint32)")


@cuda.jit(link=[source])
def kernel(result, a, b):
result[0] = cu_add(a, b)


a = 1
b = 2
result = np.zeros(1, dtype=np.uint32)
kernel[1, 1](result, a, b)

print(f"According to a CUDA kernel, {a} + {b} = {result[0]}")
99 changes: 99 additions & 0 deletions pynvjitlink/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

if _numba_version_ok:
from numba.core import config
from numba import cuda
from numba.cuda.cudadrv import nvrtc
from numba.cuda.cudadrv.driver import (
driver,
Expand All @@ -42,6 +43,61 @@
Linker = object


class LinkableCode:
"""An object that can be passed in the `link` list argument to `@cuda.jit`
kernels to supply code to be linked from memory."""

def __init__(self, data, name=None):
self.data = data
self._name = name

@property
def name(self):
return self._name or self.default_name


class PTXSource(LinkableCode):
"""PTX Source code in memory"""

kind = FILE_EXTENSION_MAP["ptx"]
default_name = "<unnamed-ptx>"


class CUSource(LinkableCode):
"""CUDA C/C++ Source code in memory"""

kind = "cu"
default_name = "<unnamed-cu>"


class Fatbin(LinkableCode):
"""A fatbin ELF in memory"""

kind = FILE_EXTENSION_MAP["fatbin"]
default_name = "<unnamed-fatbin>"


class Cubin(LinkableCode):
"""A cubin ELF in memory"""

kind = FILE_EXTENSION_MAP["cubin"]
default_name = "<unnamed-cubin>"


class Archive(LinkableCode):
"""An archive of objects in memory"""

kind = FILE_EXTENSION_MAP["a"]
default_name = "<unnamed-archive>"


class Object(LinkableCode):
"""An object file in memory"""

kind = FILE_EXTENSION_MAP["o"]
default_name = "<unnamed-object>"


class PatchedLinker(Linker):
def __init__(
self,
Expand Down Expand Up @@ -91,6 +147,34 @@ def add_ltoir(self, ltoir, name="<external-ltoir>"):
def add_object(self, obj, name="<external-object>"):
self._linker.add_object(obj, name)

def add_file_guess_ext(self, path_or_code):
# Numba's add_file_guess_ext expects to always be passed a path to a
# file that it will load from the filesystem to link. We augment it
# here with the ability to provide a file from memory.

# To maintain compatibility with the original interface, all strings
# are treated as paths in the filesystem.
if isinstance(path_or_code, str):
# Upstream numba does not yet recognize LTOIR, so handle that
# separately here.
extension = pathlib.Path(path_or_code).suffix
if extension == ".ltoir":
self.add_file(path_or_code, "ltoir")
else:
# Use Numba's logic for non-LTOIR
super().add_file_guess_ext(path_or_code)

return

# Otherwise, we should have been given a LinkableCode object
if not isinstance(path_or_code, LinkableCode):
raise TypeError("Expected path to file or a LinkableCode object")

if path_or_code.kind == "cu":
self.add_cu(path_or_code.data, path_or_code.name)
else:
self.add_data(path_or_code.data, path_or_code.kind, path_or_code.name)

def add_file(self, path, kind):
try:
with open(path, "rb") as f:
Expand All @@ -99,6 +183,9 @@ def add_file(self, path, kind):
raise LinkerError(f"{path} not found")

name = pathlib.Path(path).name
self.add_data(data, kind, name)

def add_data(self, data, kind, name):
if kind == FILE_EXTENSION_MAP["cubin"]:
fn = self._linker.add_cubin
elif kind == FILE_EXTENSION_MAP["fatbin"]:
Expand All @@ -109,6 +196,8 @@ def add_file(self, path, kind):
return self.add_ptx(data, name)
elif kind == FILE_EXTENSION_MAP["o"]:
fn = self._linker.add_object
elif kind == "ltoir":
fn = self._linker.add_ltoir
else:
raise LinkerError(f"Don't know how to link {kind}")

Expand Down Expand Up @@ -159,4 +248,14 @@ def patch_numba_linker():
msg = f"Cannot patch Numba: {_numba_error}"
raise RuntimeError(msg)

# Replace the built-in linker that uses the Driver API with our linker that
# uses nvJitLink
Linker.new = new_patched_linker

# Add linkable code objects to Numba's top-level API
cuda.Archive = Archive
cuda.CUSource = CUSource
cuda.Cubin = Cubin
cuda.Fatbin = Fatbin
cuda.Object = Object
cuda.PTXSource = PTXSource
143 changes: 143 additions & 0 deletions pynvjitlink/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import os
import pytest

from numba import cuda
from pynvjitlink.patch import Archive, Cubin, CUSource, Fatbin, Object, PTXSource


@pytest.fixture(scope="session")
def gpu_compute_capability():
"""Compute capability of the current GPU"""
return cuda.get_current_device().compute_capability


@pytest.fixture(scope="session")
def alt_gpu_compute_capability(gpu_compute_capability):
"""A compute capability that does not match the current GPU"""
# For sufficient incompatibility for the test suites, the major number of
# the compute capabilities must differ (for example one can load a 7.0
# cubin when linking for 7.5)
if gpu_compute_capability[0] == 7:
return (8, 0)
else:
return (7, 0)


@pytest.fixture(scope="session")
def absent_gpu_compute_capability(gpu_compute_capability, alt_gpu_compute_capability):
"""A compute capability not used in any cubin or fatbin test binary"""
cc_majors = {6, 7, 8}
cc_majors.remove(gpu_compute_capability[0])
cc_majors.remove(alt_gpu_compute_capability[0])
return (cc_majors.pop(), 0)


@pytest.fixture(scope="session")
def gpu_arch_flag(gpu_compute_capability):
"""nvJitLink arch flag to link for the current GPU"""
major, minor = gpu_compute_capability
return f"-arch=sm_{major}{minor}"


@pytest.fixture(scope="session")
def alt_gpu_arch_flag(alt_gpu_compute_capability):
"""nvJitLink arch flag to link for a different kind of GPU to the current
one"""
major, minor = alt_gpu_compute_capability
return f"-arch=sm_{major}{minor}"


@pytest.fixture(scope="session")
def absent_gpu_arch_flag(absent_gpu_compute_capability):
"""nvJitLink arch flag to link for an architecture not in any cubin or
fatbin"""
major, minor = absent_gpu_compute_capability
return f"-arch=sm_{major}{minor}"


@pytest.fixture(scope="session")
def device_functions_archive():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.a")
with open(path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def device_functions_cubin():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.cubin")
with open(path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def device_functions_cusource():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.cu")
with open(path, "r") as f:
return f.read()


@pytest.fixture(scope="session")
def device_functions_fatbin():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.fatbin")
with open(path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def device_functions_object():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.o")
with open(path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def device_functions_ptx():
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, "test_device_functions.ptx")
with open(path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def undefined_extern_cubin():
test_dir = os.path.dirname(os.path.abspath(__file__))
fatbin_path = os.path.join(test_dir, "undefined_extern.cubin")
with open(fatbin_path, "rb") as f:
return f.read()


@pytest.fixture(scope="session")
def linkable_code_archive(device_functions_archive):
return Archive(device_functions_archive)


@pytest.fixture(scope="session")
def linkable_code_cubin(device_functions_cubin):
return Cubin(device_functions_cubin)


@pytest.fixture(scope="session")
def linkable_code_cusource(device_functions_cusource):
return CUSource(device_functions_cusource)


@pytest.fixture(scope="session")
def linkable_code_fatbin(device_functions_fatbin):
return Fatbin(device_functions_fatbin)


@pytest.fixture(scope="session")
def linkable_code_object(device_functions_object):
return Object(device_functions_object)


@pytest.fixture(scope="session")
def linkable_code_ptx(device_functions_ptx):
return PTXSource(device_functions_ptx)

0 comments on commit a92e60f

Please sign in to comment.