Skip to content

Commit

Permalink
Don't import allocators by default
Browse files Browse the repository at this point in the history
This will make importing RMM faster if you don't want the hooks for
cupy, numba, or pytorch.

Before, a sampling trace of `import rmm` with pyinstrument shows:

    $ pyinstrument -i 0.01 importrmm.py

      _     ._   __/__   _ _  _  _ _/_   Recorded: 10:19:56  Samples:  67
     /_//_/// /_\ / //_// / //_'/ //     Duration: 0.839     CPU time: 0.837
    /   _/                      v4.4.0

    Program: importrmm.py

    0.839 <module>  importrmm.py:1
    └─ 0.839 <module>  rmm/__init__.py:1
       ├─ 0.315 <module>  rmm/allocators/torch.py:1
       │  └─ 0.315 <module>  torch/__init__.py:1
       │        [96 frames hidden]  torch, <built-in>, enum, inspect, tok...
       ├─ 0.297 <module>  rmm/mr.py:1
       │  └─ 0.297 <module>  rmm/_lib/__init__.py:1
       │     ├─ 0.216 <module>  numba/__init__.py:1
       │     │     [140 frames hidden]  numba, abc, <built-in>, importlib, em...
       │     ├─ 0.040 <module>  numba/cuda/__init__.py:1
       │     │     [34 frames hidden]  numba, asyncio, ssl, <built-in>, re, ...
       │     ├─ 0.030 __new__  enum.py:180
       │     │     [5 frames hidden]  enum, <built-in>
       │     └─ 0.011 [self]  None
       └─ 0.227 <module>  rmm/allocators/cupy.py:1
          └─ 0.227 <module>  cupy/__init__.py:1
                [123 frames hidden]  cupy, pytest, _pytest, attr, <built-i...

After:

    $ pyinstrument -i 0.01 importrmm.py

      _     ._   __/__   _ _  _  _ _/_   Recorded: 10:20:10  Samples:  28
     /_//_/// /_\ / //_// / //_'/ //     Duration: 0.297     CPU time: 0.297
    /   _/                      v4.4.0

    Program: importrmm.py

    0.296 <module>  importrmm.py:1
    └─ 0.296 <module>  rmm/__init__.py:1
       └─ 0.296 <module>  rmm/mr.py:1
          └─ 0.296 <module>  rmm/_lib/__init__.py:1
             ├─ 0.216 <module>  numba/__init__.py:1
             │     [141 frames hidden]  numba, <built-in>, importlib, email, ...
             ├─ 0.040 <module>  numba/cuda/__init__.py:1
             │     [19 frames hidden]  numba, asyncio, ssl, <built-in>, unit...
             ├─ 0.031 [self]  None
             └─ 0.010 __new__  enum.py:180
                   [4 frames hidden]  enum, <built-in>
  • Loading branch information
wence- committed Feb 23, 2023
1 parent 6f204e0 commit 359fcc0
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 17 deletions.
6 changes: 0 additions & 6 deletions python/rmm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@

from rmm import mr
from rmm._lib.device_buffer import DeviceBuffer
from rmm.allocators.cupy import rmm_cupy_allocator
from rmm.allocators.numba import RMMNumbaManager, _numba_memory_manager
from rmm.allocators.torch import rmm_torch_allocator
from rmm.mr import disable_logging, enable_logging, get_log_filenames
from rmm.rmm import (
RMMError,
Expand All @@ -29,16 +26,13 @@
__all__ = [
"DeviceBuffer",
"RMMError",
"RMMNumbaManager",
"disable_logging",
"enable_logging",
"get_log_filenames",
"is_initialized",
"mr",
"register_reinitialize_hook",
"reinitialize",
"rmm_cupy_allocator",
"rmm_torch_allocator",
"unregister_reinitialize_hook",
]

Expand Down
2 changes: 1 addition & 1 deletion python/rmm/allocators/cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def rmm_cupy_allocator(nbytes):
Examples
--------
>>> import rmm
>>> from rmm.allocators.cupy import rmm_cupy_allocator
>>> import cupy
>>> cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
"""
Expand Down
18 changes: 10 additions & 8 deletions python/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import rmm
import rmm._cuda.stream
from rmm.allocators.cupy import rmm_cupy_allocator
from rmm.allocators.numba import RMMNumbaManager

if sys.version_info < (3, 8):
try:
Expand All @@ -33,7 +35,7 @@
else:
import pickle

cuda.set_memory_manager(rmm.RMMNumbaManager)
cuda.set_memory_manager(RMMNumbaManager)

_driver_version = rmm._cuda.gpu.driverGetVersion()
_runtime_version = rmm._cuda.gpu.runtimeGetVersion()
Expand Down Expand Up @@ -303,17 +305,17 @@ def test_rmm_pool_numba_stream(stream):
def test_rmm_cupy_allocator():
cupy = pytest.importorskip("cupy")

m = rmm.rmm_cupy_allocator(42)
m = rmm_cupy_allocator(42)
assert m.mem.size == 42
assert m.mem.ptr != 0
assert isinstance(m.mem._owner, rmm.DeviceBuffer)

m = rmm.rmm_cupy_allocator(0)
m = rmm_cupy_allocator(0)
assert m.mem.size == 0
assert m.mem.ptr == 0
assert isinstance(m.mem._owner, rmm.DeviceBuffer)

cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
cupy.cuda.set_allocator(rmm_cupy_allocator)
a = cupy.arange(10)
assert isinstance(a.data.mem._owner, rmm.DeviceBuffer)

Expand All @@ -323,20 +325,20 @@ def test_rmm_pool_cupy_allocator_with_stream(stream):
cupy = pytest.importorskip("cupy")

rmm.reinitialize(pool_allocator=True)
cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
cupy.cuda.set_allocator(rmm_cupy_allocator)

if stream == "null":
stream = cupy.cuda.stream.Stream.null
else:
stream = cupy.cuda.stream.Stream()

with stream:
m = rmm.rmm_cupy_allocator(42)
m = rmm_cupy_allocator(42)
assert m.mem.size == 42
assert m.mem.ptr != 0
assert isinstance(m.mem._owner, rmm.DeviceBuffer)

m = rmm.rmm_cupy_allocator(0)
m = rmm_cupy_allocator(0)
assert m.mem.size == 0
assert m.mem.ptr == 0
assert isinstance(m.mem._owner, rmm.DeviceBuffer)
Expand All @@ -355,7 +357,7 @@ def test_rmm_pool_cupy_allocator_stream_lifetime():
cupy = pytest.importorskip("cupy")

rmm.reinitialize(pool_allocator=True)
cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
cupy.cuda.set_allocator(rmm_cupy_allocator)

stream = cupy.cuda.stream.Stream()

Expand Down
4 changes: 2 additions & 2 deletions python/rmm/tests/test_rmm_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

import rmm
from rmm.allocators.torch import rmm_torch_allocator

torch = pytest.importorskip("torch")

Expand All @@ -13,7 +13,7 @@ def torch_allocator():
from torch.cuda.memory import change_current_allocator
except ImportError:
pytest.skip("pytorch pluggable allocator not available")
change_current_allocator(rmm.rmm_torch_allocator)
change_current_allocator(rmm_torch_allocator)


def test_rmm_torch_allocator(torch_allocator, stats_mr):
Expand Down

0 comments on commit 359fcc0

Please sign in to comment.