Merge pull request #7789 from esc/cherry_pick_for_0.55.1

Cherry pick for 0.55.1
numba · Jan 27, 2022 · 76720bf · 76720bf
2 parents d44b8f4 + f8d6978
commit 76720bf
Show file tree

Hide file tree

Showing 45 changed files with 726 additions and 244 deletions.
diff --git a/.flake8 b/.flake8
@@ -173,7 +173,6 @@ exclude =
     numba/tests/test_profiler.py
     numba/tests/test_numpyadapt.py
     numba/tests/test_stencils.py
-    numba/tests/test_annotations.py
     numba/tests/cache_usecases.py
     numba/tests/true_div_usecase.py
     numba/tests/test_dataflow.py

diff --git a/CHANGE_LOG b/CHANGE_LOG
@@ -1,4 +1,42 @@
-Version 0.55.0 (13 January, 2021)
+Version 0.55.1 (27 January, 2022)
+---------------------------------
+
+This is a bugfix release that closes all the remaining issues from the
+accelerated release of 0.55.0 and also any release critical regressions
+discovered since then.
+
+CUDA target deprecation notices:
+
+* Support for CUDA toolkits < 10.2 is deprecated and will be removed in Numba
+  0.56.
+* Support for devices with Compute Capability < 5.3 is deprecated and will be
+  removed in Numba 0.56.
+
+
+Pull-Requests:
+
+* PR `#7755 <https://github.com/numba/numba/pull/7755>`_: CUDA: Deprecate support for CC < 5.3 and CTK < 10.2 (`Graham Markall <https://github.com/gmarkall>`_)
+* PR `#7749 <https://github.com/numba/numba/pull/7749>`_: Refactor threading layer priority tests to not use stdout/stderr (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7744 <https://github.com/numba/numba/pull/7744>`_: Fix issues with locating/parsing source during DebugInfo emission. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7712 <https://github.com/numba/numba/pull/7712>`_: Fixing issue 7693 (`Graham Markall <https://github.com/gmarkall>`_ `luk-f-a <https://github.com/luk-f-a>`_ `stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7729 <https://github.com/numba/numba/pull/7729>`_: Handle Omitted/OmittedArgDataModel in DI generation. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7788 <https://github.com/numba/numba/pull/7788>`_: Avoid issue with DI gen for arrayexprs. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7752 <https://github.com/numba/numba/pull/7752>`_: Fix #7751: Use original filename for array exprs (`Graham Markall <https://github.com/gmarkall>`_)
+* PR `#7748 <https://github.com/numba/numba/pull/7748>`_: Fix #7713: Ensure _prng_random_hash return has correct bitwidth (`Graham Markall <https://github.com/gmarkall>`_)
+* PR `#7745 <https://github.com/numba/numba/pull/7745>`_: Fix the release year for Numba 0.55 change log entry. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7740 <https://github.com/numba/numba/pull/7740>`_: CUDA Python 11.6 support (`Graham Markall <https://github.com/gmarkall>`_)
+* PR `#7724 <https://github.com/numba/numba/pull/7724>`_: Update URLs in error messages to refer to RTD docs. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7709 <https://github.com/numba/numba/pull/7709>`_: CUDA: Fixes missing type annotation pass following #7704 (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7704 <https://github.com/numba/numba/pull/7704>`_: Move the type annotation pass to post legalization. (`stuartarchibald <https://github.com/stuartarchibald>`_)
+* PR `#7619 <https://github.com/numba/numba/pull/7619>`_: CUDA: Fix linking with PTX when compiling lazily (`Graham Markall <https://github.com/gmarkall>`_)
+
+Authors:
+
+* `Graham Markall <https://github.com/gmarkall>`_
+* `luk-f-a <https://github.com/luk-f-a>`_
+* `stuartarchibald <https://github.com/stuartarchibald>`_
+
+Version 0.55.0 (13 January, 2022)
 ---------------------------------
 
 This release includes a significant number important dependency upgrades along

diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
@@ -56,6 +56,8 @@ requirements:
     - cudatoolkit >=9.2
     # scipy 1.0 or later
     - scipy >=1.0
+    # CUDA Python 11.6 or later
+    - cuda-python >=11.6
 
 test:
   requires:

diff --git a/buildscripts/gpuci/build.sh b/buildscripts/gpuci/build.sh
@@ -40,7 +40,7 @@ gpuci_mamba_retry create -n numba_ci -y \
                   "python=${PYTHON_VER}" \
                   "cudatoolkit=${CUDA_TOOLKIT_VER}" \
                   "numba/label/dev::llvmlite" \
-                  "numpy" \
+                  "numpy=1.21" \
                   "scipy" \
                   "cffi" \
                   "psutil" \

diff --git a/docs/source/cuda/bindings.rst b/docs/source/cuda/bindings.rst
@@ -4,28 +4,40 @@ CUDA Bindings
 Numba supports two bindings to the CUDA Driver APIs: its own internal bindings
 based on ctypes, and the official `NVIDIA CUDA Python bindings
 <https://nvidia.github.io/cuda-python/>`_. Functionality is equivalent between
-the two bindings, with two exceptions:
-
-* the NVIDIA bindings presently do not support Per-Thread Default Streams
-  (PTDS), and an exception will be raised on import if PTDS is enabled along
-  with the NVIDIA bindings.
-* The profiling APIs are not available with the NVIDIA bindings.
+the two bindings.
 
 The internal bindings are used by default. If the NVIDIA bindings are installed,
 then they can be used by setting the environment variable
 ``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once
 Numba has been imported, the selected binding cannot be changed.
 
 
+Per-Thread Default Streams
+--------------------------
+
+Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to
+the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings,
+set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
+``1`` instead of Numba's environmnent variable
+:envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
+
+.. seealso::
+
+   The `Default Stream section
+   <https://nvidia.github.io/cuda-python/release/11.6.0-notes.html#default-stream>`_
+   in the NVIDIA Bindings documentation.
+
+
 Roadmap
 -------
 
+In Numba 0.56, the NVIDIA Bindings will be used by default, if they are
+installed.
+
 In future versions of Numba:
 
-- The NVIDIA Bindings will be used by default, if they are installed.
 - The internal bindings will be deprecated.
 - The internal bindings will be removed.
 
-It is expected that the NVIDIA bindings will be the default in Numba 0.56; at
-present, no specific release is planned for the deprecation or removal of the
-internal bindings.
+At present, no specific release is planned for the deprecation or removal of
+the internal bindings.
diff --git a/docs/source/cuda/overview.rst b/docs/source/cuda/overview.rst
@@ -37,24 +37,39 @@ Requirements
 Supported GPUs
 --------------
 
-Numba supports CUDA-enabled GPU with compute capability 3.0 or above with an
-up-to-data Nvidia driver.
+Numba supports CUDA-enabled GPUs with Compute Capability 3.0 or greater.
+Support for devices with Compute Capability less than 5.3 is deprecated, and
+will be removed in the next Numba release (0.56).
+
+Devices with Compute Capability 5.3 or greater include (but are not limited to):
+
+- Embedded platforms: NVIDIA Jetson Nano, TX1, TX2, Xavier NX, AGX Xavier.
+- Desktop / Server GPUs: All GPUs with Pascal microarchitecture or later. E.g.
+  GTX 10 / 16 series, RTX 20 / 30 series, Quadro P / V / RTX series, RTX A series.
+- Laptop GPUs: All GPUs with Pascal microarchitecture or later. E.g. MX series,
+  Quadro P / T series (mobile), RTX 20 / 30 series (mobile), RTX A series (mobile).
 
 Software
 --------
 
-Numba aims to support CUDA Toolkit versions released within the last 3 years. At
-the present time, you will need the CUDA toolkit version 9.2 or later installed.
+Numba aims to support CUDA Toolkit versions released within the last 3 years.
+An NVIDIA driver sufficient for the toolkit version is also required.
+Presently:
 
-CUDA is supported on 64-bit Linux and Windows. 32-bit platforms, and macOS are
-unsupported.
+* 9.2 is the minimum required toolkit version.
+* Support for versions less than 10.2 is deprecated, and will be removed in the
+  next Numba release (0.56).
+* 11.2 or later is recommended, as it uses an NVVM version based on LLVM 7 (as
+  opposed to 3.4 in earlier releases).
+
+CUDA is supported on 64-bit Linux and Windows.
 
 If you are using Conda, you can install the CUDA toolkit with::
 
    $ conda install cudatoolkit
 
 If you are not using Conda or if you want to use a different version of CUDA
-toolkit, the following describe how Numba searches for a CUDA toolkit
+toolkit, the following describes how Numba searches for a CUDA toolkit
 installation.
 
 .. _cuda-bindings:
@@ -64,10 +79,12 @@ CUDA Bindings
 
 Numba supports interacting with the CUDA Driver API via the `NVIDIA CUDA Python
 bindings <https://nvidia.github.io/cuda-python/>`_ and its own ctypes-based
-binding. The ctypes-based binding is presently the default as Per-Thread
-Default Streams and the profiler APIs are not supported with the NVIDIA
-bindings, but otherwise functionality is equivalent between the two. You can
-install the NVIDIA bindings with::
+bindings. Functionality is equivalent between the two bindings. The
+ctypes-based bindings are presently the default, but the NVIDIA bindings will
+be used by default (if they are available in the environment) in a future Numba
+release.
+
+You can install the NVIDIA bindings with::
 
    $ conda install nvidia::cuda-python
 

diff --git a/docs/source/reference/deprecation.rst b/docs/source/reference/deprecation.rst
@@ -328,3 +328,27 @@ Schedule
 
 - In Numba 0.55: ``add_user_function()`` will be deprecated.
 - In Numba 0.56: ``add_user_function()`` will be removed.
+
+
+Deprecation of CUDA Toolkits < 10.2 and devices with CC < 5.3
+=============================================================
+
+Support for:
+
+- Devices with Compute Capability < 5.3, and
+- CUDA toolkits less than 10.2
+
+is deprecated and will be removed in future.
+
+Recommendations
+---------------
+
+- For devices of Compute Capability 3.0 - 5.2, Numba 0.55.1 or earlier will be
+  required.
+- CUDA toolkit 10.2 or later (ideally 11.2 or later) should be installed.
+
+Schedule
+--------
+
+- In Numba 0.55.1: support for CC < 5.3 and CUDA toolkits < 10.2 are deprecated.
+- In Numba 0.56: support for CC < 5.3 and CUDA toolkits < 10.2 will be removed.
diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst
@@ -485,11 +485,20 @@ GPU support
 
    When set to 1, the default stream is the per-thread default stream. When set
    to 0, the default stream is the legacy default stream. This defaults to 0,
-   for the legacy default stream. It may default to 1 in a future release of
-   Numba. See `Stream Synchronization Behavior
+   for the legacy default stream. See `Stream Synchronization Behavior
    <https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html>`_
    for an explanation of the legacy and per-thread default streams.
 
+   This variable only takes effect when using Numba's internal CUDA bindings;
+   when using the NVIDIA bindings, use the environment variable
+   ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` instead.
+
+   .. seealso::
+
+      The `Default Stream section
+      <https://nvidia.github.io/cuda-python/release/11.6.0-notes.html#default-stream>`_
+      in the NVIDIA Bindings documentation.
+
 .. envvar:: NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS
 
    Enable warnings if the grid size is too small relative to the number of

diff --git a/docs/source/user/installing.rst b/docs/source/user/installing.rst
@@ -15,7 +15,9 @@ Our supported platforms are:
 * Windows 7 and later (32-bit and 64-bit)
 * OS X 10.9 and later (64-bit and unofficial support on M1/Arm64)
 * \*BSD (unofficial support only)
-* NVIDIA GPUs of compute capability 3.0 and later
+* NVIDIA GPUs of compute capability 5.3 and later
+
+  * Compute capabilities 3.0 - 5.2 are supported, but deprecated.
 * ARMv7 (32-bit little-endian, such as Raspberry Pi 2 and 3)
 * ARMv8 (64-bit little-endian, such as the NVIDIA Jetson)
 
@@ -248,7 +250,7 @@ vary with target operating system and hardware. The following lists them all
   * ``typeguard`` - used by ``runtests.py`` for
     :ref:`runtime type-checking <type_anno_check>`.
   * ``cuda-python`` - The NVIDIA CUDA Python bindings. See :ref:`cuda-bindings`.
-    Numba is tested with Version 11.5 of the bindings.
+    Numba requires Version 11.6 or greater.
 
 * To build the documentation:
 
@@ -264,12 +266,11 @@ Checking your installation
 You should be able to import Numba from the Python prompt::
 
     $ python
-    Python 3.8.1 (default, Jan 8  2020, 16:15:59)
-    [Clang 4.0.1 (tags/RELEASE_401/final)] :: Anaconda, Inc. on darwin
+    Python 3.10.2 | packaged by conda-forge | (main, Jan 14 2022, 08:02:09) [GCC 9.4.0] on linux
     Type "help", "copyright", "credits" or "license" for more information.
     >>> import numba
     >>> numba.__version__
-    '0.48.0'
+    '0.55.1'
 
 You can also try executing the ``numba --sysinfo`` (or ``numba -s`` for short)
 command to report information about your system capabilities. See :ref:`cli` for
@@ -281,36 +282,41 @@ further information.
     System info:
     --------------------------------------------------------------------------------
     __Time Stamp__
-    2018-08-28 15:46:24.631054
+    Report started (local time)                   : 2022-01-18 10:35:08.981319
 
     __Hardware Information__
-    Machine                             : x86_64
-    CPU Name                            : haswell
-    CPU Features                        :
-    aes avx avx2 bmi bmi2 cmov cx16 f16c fma fsgsbase lzcnt mmx movbe pclmul popcnt
-    rdrnd sse sse2 sse3 sse4.1 sse4.2 ssse3 xsave xsaveopt
+    Machine                                       : x86_64
+    CPU Name                                      : skylake-avx512
+    CPU Count                                     : 12
+    CPU Features                                  :
+    64bit adx aes avx avx2 avx512bw avx512cd avx512dq avx512f avx512vl bmi bmi2
+    clflushopt clwb cmov cx16 cx8 f16c fma fsgsbase fxsr invpcid lzcnt mmx
+    movbe pclmul pku popcnt prfchw rdrnd rdseed rtm sahf sse sse2 sse3 sse4.1
+    sse4.2 ssse3 xsave xsavec xsaveopt xsaves
 
     __OS Information__
-    Platform                            : Darwin-17.6.0-x86_64-i386-64bit
-    Release                             : 17.6.0
-    System Name                         : Darwin
-    Version                             : Darwin Kernel Version 17.6.0: Tue May  8 15:22:16 PDT 2018; root:xnu-4570.61.1~1/RELEASE_X86_64
-    OS specific info                    : 10.13.5   x86_64
+    Platform Name                                 : Linux-5.4.0-94-generic-x86_64-with-glibc2.31
+    Platform Release                              : 5.4.0-94-generic
+    OS Name                                       : Linux
+    OS Version                                    : #106-Ubuntu SMP Thu Jan 6 23:58:14 UTC 2022
 
     __Python Information__
-    Python Compiler                     : GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)
-    Python Implementation               : CPython
-    Python Version                      : 2.7.15
-    Python Locale                       : en_US UTF-8
+    Python Compiler                               : GCC 9.4.0
+    Python Implementation                         : CPython
+    Python Version                                : 3.10.2
+    Python Locale                                 : en_GB.UTF-8
 
     __LLVM information__
-    LLVM version                        : 6.0.0
+    LLVM Version                                  : 11.1.0
 
     __CUDA Information__
     Found 1 CUDA devices
-    id 0         GeForce GT 750M                              [SUPPORTED]
-                          compute capability: 3.0
-                               pci device id: 0
-                                  pci bus id: 1
+    id 0      b'Quadro RTX 8000'                              [SUPPORTED]
+                          Compute Capability: 7.5
+                               PCI Device ID: 0
+                                  PCI Bus ID: 21
+                                        UUID: GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643
+                                    Watchdog: Enabled
+                 FP32/FP64 Performance Ratio: 32
 
 (output truncated due to length)
diff --git a/numba/core/codegen.py b/numba/core/codegen.py
@@ -1282,7 +1282,7 @@ def _check_llvm_bugs(self):
             raise RuntimeError(
                 "LLVM will produce incorrect floating-point code "
                 "in the current locale %s.\nPlease read "
-                "https://numba.pydata.org/numba-doc/latest/user/faq.html#llvm-locale-bug "
+                "https://numba.readthedocs.io/en/stable/user/faq.html#llvm-locale-bug "
                 "for more information."
                 % (loc,))
         raise AssertionError("Unexpected IR:\n%s\n" % (ir_out,))

diff --git a/numba/core/compiler.py b/numba/core/compiler.py
@@ -558,7 +558,8 @@ def define_nopython_lowering_pipeline(state, name='nopython_lowering'):
                     "ensure features that are in use are in a valid form")
         pm.add_pass(IRLegalization,
                     "ensure IR is legal prior to lowering")
-
+        # Annotate only once legalized
+        pm.add_pass(AnnotateTypes, "annotate types")
         # lower
         pm.add_pass(NativeLowering, "native lowering")
         pm.add_pass(NoPythonBackend, "nopython mode backend")
@@ -572,7 +573,6 @@ def define_typed_pipeline(state, name="typed"):
         pm = PassManager(name)
         # typing
         pm.add_pass(NopythonTypeInference, "nopython frontend")
-        pm.add_pass(AnnotateTypes, "annotate types")
 
         # strip phis
         pm.add_pass(PreLowerStripPhis, "remove phis nodes")
@@ -657,8 +657,8 @@ def define_objectmode_pipeline(state, name='object'):
         # convert any remaining closures into functions
         pm.add_pass(MakeFunctionToJitFunction,
                     "convert make_function into JIT functions")
-        pm.add_pass(AnnotateTypes, "annotate types")
         pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering")
+        pm.add_pass(AnnotateTypes, "annotate types")
         pm.add_pass(ObjectModeBackEnd, "object mode backend")
         pm.finalize()
         return pm

diff --git a/numba/core/config.py b/numba/core/config.py
@@ -135,7 +135,11 @@ def validate(self):
                 CUDA_USE_NVIDIA_BINDING = False
 
             if CUDA_PER_THREAD_DEFAULT_STREAM:  # noqa: F821
-                warnings.warn("PTDS is not supported with CUDA Python")
+                warnings.warn("PTDS support is handled by CUDA Python when "
+                              "using the NVIDIA binding. Please set the "
+                              "environment variable "
+                              "CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 "
+                              "instead.")
 
     def process_environ(self, environ):
         def _readenv(name, ctor, default):

diff --git a/numba/core/datamodel/models.py b/numba/core/datamodel/models.py
@@ -160,8 +160,11 @@ class OmittedArgDataModel(DataModel):
     A data model for omitted arguments.  Only the "argument" representation
     is defined, other representations raise a NotImplementedError.
     """
-    # Omitted arguments don't produce any LLVM function argument.
+    # Omitted arguments are using a dummy value type
+    def get_value_type(self):
+        return ir.LiteralStructType([])
 
+    # Omitted arguments don't produce any LLVM function argument.
     def get_argument_type(self):
         return ()