RuntimeError: nms_impl: implementation for device cuda:0 not found. #6765

daydayup-bit · 2021-12-12T14:27:32Z

When I run /usr/src/app/demo/inference_demo.ipynb, an error reported:

/usr/src/app/mmdet/datasets/utils.py:65: UserWarning: "ImageToTensor" pipeline is replaced by "DefaultFormatBundle" for batch inference. It is recommended to manually replace it in the test data pipeline in your config file.
  warnings.warn(
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-4d78d4937daf> in <module>
      1 # test a single image
      2 img = 'demo.jpg'
----> 3 result = inference_detector(model, img)

/usr/src/app/mmdet/apis/inference.py in inference_detector(model, imgs)
    145     # forward the model
    146     with torch.no_grad():
--> 147         results = model(return_loss=False, rescale=True, **data)
    148 
    149     if not is_batch:

/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1013         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1014                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1015             return forward_call(*input, **kwargs)
   1016         # Do not call functions when jit is used
   1017         full_backward_hooks, non_full_backward_hooks = [], []

/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
     96                                 'method of nn.Module')
     97             if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
---> 98                 return old_func(*args, **kwargs)
     99 
    100             # get the arg spec of the decorated method

/usr/src/app/mmdet/models/detectors/base.py in forward(self, img, img_metas, return_loss, **kwargs)
    172             return self.forward_train(img, img_metas, **kwargs)
    173         else:
--> 174             return self.forward_test(img, img_metas, **kwargs)
    175 
    176     def _parse_losses(self, losses):

/usr/src/app/mmdet/models/detectors/base.py in forward_test(self, imgs, img_metas, **kwargs)
    145             if 'proposals' in kwargs:
    146                 kwargs['proposals'] = kwargs['proposals'][0]
--> 147             return self.simple_test(imgs[0], img_metas[0], **kwargs)
    148         else:
    149             assert imgs[0].size(0) == 1, 'aug test does not support ' \

/usr/src/app/mmdet/models/detectors/two_stage.py in simple_test(self, img, img_metas, proposals, rescale)
    177         x = self.extract_feat(img)
    178         if proposals is None:
--> 179             proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
    180         else:
    181             proposal_list = proposals

/usr/src/app/mmdet/models/dense_heads/dense_test_mixins.py in simple_test_rpn(self, x, img_metas)
    128         """
    129         rpn_outs = self(x)
--> 130         proposal_list = self.get_bboxes(*rpn_outs, img_metas=img_metas)
    131         return proposal_list
    132 

/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
    184                                 'method of nn.Module')
    185             if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
--> 186                 return old_func(*args, **kwargs)
    187             # get the arg spec of the decorated method
    188             args_info = getfullargspec(old_func)

/usr/src/app/mmdet/models/dense_heads/base_dense_head.py in get_bboxes(self, cls_scores, bbox_preds, score_factors, img_metas, cfg, rescale, with_nms, **kwargs)
     91                 score_factor_list = [None for _ in range(num_levels)]
     92 
---> 93             results = self._get_bboxes_single(cls_score_list, bbox_pred_list,
     94                                               score_factor_list, mlvl_priors,
     95                                               img_meta, cfg, rescale, with_nms,

/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _get_bboxes_single(self, cls_score_list, bbox_pred_list, score_factor_list, mlvl_anchors, img_meta, cfg, rescale, with_nms, **kwargs)
    183                                 dtype=torch.long))
    184 
--> 185         return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds,
    186                                        mlvl_valid_anchors, level_ids, cfg,
    187                                        img_shape)

/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors, level_ids, cfg, img_shape, **kwargs)
    230 
    231         if proposals.numel() > 0:
--> 232             dets, _ = batched_nms(proposals, scores, ids, cfg.nms)
    233         else:
    234             return proposals.new_zeros(0, 5)

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic)
    305     # Won't split to multiple nms nodes when exporting to onnx
    306     if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
--> 307         dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
    308         boxes = boxes[keep]
    309         # -1 indexing works abnormal in TensorRT

/opt/conda/lib/python3.8/site-packages/mmcv/utils/misc.py in new_func(*args, **kwargs)
    338 
    339             # apply converted arguments to the decorated method
--> 340             output = old_func(*args, **kwargs)
    341             return output
    342 

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in nms(boxes, scores, iou_threshold, offset, score_threshold, max_num)
    169         inds = ext_module.nms(*indata_list, **indata_dict)
    170     else:
--> 171         inds = NMSop.apply(boxes, scores, iou_threshold, offset,
    172                            score_threshold, max_num)
    173     dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num)
     24                 valid_mask, as_tuple=False).squeeze(dim=1)
     25 
---> 26         inds = ext_module.nms(
     27             bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
     28 

RuntimeError: nms_impl: implementation for device cuda:0 not found.

My environment:

sys.platform: linux
Python: 3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.3.r11.3/compiler.29745058_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.9.0a0+2ecb2c7
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v1.8.0 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86
  - CuDNN 8.2
  - Magma 2.5.2
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, 

TorchVision: 0.9.0a0
OpenCV: 3.4.11
MMCV: 1.4.0
MMCV Compiler: GCC 9.3
MMCV CUDA Compiler: not available
MMDetection: 2.19.0+f3817df

The text was updated successfully, but these errors were encountered:

jshilong · 2021-12-13T03:42:15Z

MMCV CUDA Compiler: not available
Please use mmcv-full

TangChiaHsin · 2021-12-15T02:14:28Z

How to output mmcv environment like yours?

My environment:

sys.platform: linux
Python: 3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.3.r11.3/compiler.29745058_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.9.0a0+2ecb2c7
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v1.8.0 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86
  - CuDNN 8.2
  - Magma 2.5.2
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, 

TorchVision: 0.9.0a0
OpenCV: 3.4.11
MMCV: 1.4.0
MMCV Compiler: GCC 9.3
MMCV CUDA Compiler: not available
MMDetection: 2.19.0+f3817df

daydayup-bit · 2021-12-15T02:46:16Z

Run python mmdet/utils/collect_env.py

daydayup-bit · 2021-12-15T02:47:13Z

@EkAugust
Run python mmdet/utils/collect_env.py

TangChiaHsin · 2021-12-15T03:40:54Z

@daydayup-bit
Thanks for your timely reply.
I have successfully compiled mmcv and mmdetection in a docker container with CUDA and cuDNN. However, the host gpu driver version is too low to support CUDA 11.3. I re-install the host gpu driver to support CUDA. When I run a demo, this issue happened. I guess my env is the same as yours, i.e., 'MMCV CUDA Compiler: not available'. So I ask how to get envs like yours.

Then, I re-compiled mmcv directly. But I didn't know how to uninstall current mmcv building from source code. Thus, the recompilation failed. I can not find out the error reason. I delete the docker images and containers.

Finally, I re-create my docker to solve it just like some unknown problems happen on your PC you and finally you reboot.

gaojundong · 2022-02-21T11:46:32Z

hello, did you solve this problem? I also meet this problem, and I am sure I installed mmcv-full, but it is also not work.
this is my envy:
sys.platform: linux
Python: 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 06:08:21) [GCC 9.4.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 2080 Ti
CUDA_HOME: None
GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
PyTorch: 1.10.2
PyTorch compiling details: PyTorch built with:

GCC 7.3
C++ Version: 201402
Intel(R) oneAPI Math Kernel Library Version 2022.0-Product Build 20211112 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
OpenMP 201511 (a.k.a. OpenMP 4.5)
LAPACK is enabled (usually provided by MKL)
NNPACK is enabled
CPU capability usage: AVX512
CUDA Runtime 10.2
NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
CuDNN 7.6.5
Magma 2.5.2
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.2, CUDNN_VERSION=7.6.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.10.2, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,

TorchVision: 0.11.3
OpenCV: 4.5.5
MMCV: 1.4.5
MMCV Compiler: GCC 7.3
MMCV CUDA Compiler: not available
MMDetection: 2.19.0+f08548b

dengandong · 2022-05-09T02:47:59Z

you guys all got 'MMCV CUDA Compiler: not available'

YFENG2 · 2022-05-25T02:05:38Z

I got this error too.
My solution is re-install the nvidia driver and re-install mmcv-full again.
And I pass this test.
Good luck.

xiuxiuing · 2023-04-06T16:40:12Z

install mmcv-full use pip3 install mmcv-full==1.7.0 -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10/index.html
instead of pip3 install mmcv-full==1.7.0.
Specific compatible versions, check：https://mmcv.readthedocs.io/zh_CN/latest/get_started/installation.html#id1

jeethesh-pai · 2023-05-23T12:18:37Z

In docker containers, we need to set environment variables as follows before executing pip install:

ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1

This solved the problem for me

matcosta23 · 2023-05-25T14:34:27Z

In docker containers, we need to set environment variables as follows before executing pip install:
ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1
This solved the problem for me

Thanks for your help! By using those flags when installing the MMCV project from source also worked for me!

However, when doing it in a Dockerfile, the CUDA runtime was not recognized and the docker build fails. Which hardware are you using, @jeethesh-pai ? I'm using a Jetson AGX Xavier.

maxpain · 2023-10-18T13:51:06Z

I have the same problem when using mmcv==2.1.0 (installed via mim) with PyTorch 2.1.0 and CUDA 12.1 in a Dockerfile.
If I install the mmcv package from source (mim install "git+https://github.com/open-mmlab/mmcv.git@6299bc02bde35f96e0b57a6cc94ed0fda177c478") everything works fine.

Setting these ENV variables in Dockerfile doesn't help:

ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1

Example:

from mmcv.ops import batched_nms
import torch


def check_mmcv():

    device = torch.device('cuda:0')

    bboxes = torch.randn(2, 4, device=device)
    scores = torch.randn(2, device=device)
    labels = torch.zeros(2, dtype=torch.long, device=device)
    det_bboxes, keep_idxs = batched_nms(bboxes.to(torch.float32), scores.to(torch.float32), labels, {
        'type': 'nms',
        'iou_threshold': 0.6
    })

    print('OK.')


if __name__ == '__main__':
    check_mmcv()

Error:

Traceback (most recent call last):
  File "/app/test.py", line 21, in <module>
    check_mmcv()
  File "/app/test.py", line 12, in check_mmcv
    det_bboxes, keep_idxs = batched_nms(bboxes.to(torch.float32), scores.to(torch.float32), labels, {
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 303, in batched_nms
    dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
  File "/usr/local/lib/python3.10/dist-packages/mmengine/utils/misc.py", line 395, in new_func
    output = old_func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 127, in nms
    inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold,
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 539, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 27, in forward
    inds = ext_module.nms(
RuntimeError: nms_impl: implementation for device cuda:0 not found.

rayryeng · 2023-10-18T17:04:33Z

I have the same problem when using mmcv==2.1.0 (installed via mim) with PyTorch 2.1.0 and CUDA 12.1 in a Dockerfile. If I install the mmcv package from source (mim install "git+https://github.com/open-mmlab/mmcv.git@6299bc02bde35f96e0b57a6cc94ed0fda177c478") everything works fine.

Setting these ENV variables in Dockerfile doesn't help:

ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1

Example:

from mmcv.ops import batched_nms
import torch


def check_mmcv():

    device = torch.device('cuda:0')

    bboxes = torch.randn(2, 4, device=device)
    scores = torch.randn(2, device=device)
    labels = torch.zeros(2, dtype=torch.long, device=device)
    det_bboxes, keep_idxs = batched_nms(bboxes.to(torch.float32), scores.to(torch.float32), labels, {
        'type': 'nms',
        'iou_threshold': 0.6
    })

    print('OK.')


if __name__ == '__main__':
    check_mmcv()

Error:

Traceback (most recent call last):
  File "/app/test.py", line 21, in <module>
    check_mmcv()
  File "/app/test.py", line 12, in check_mmcv
    det_bboxes, keep_idxs = batched_nms(bboxes.to(torch.float32), scores.to(torch.float32), labels, {
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 303, in batched_nms
    dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
  File "/usr/local/lib/python3.10/dist-packages/mmengine/utils/misc.py", line 395, in new_func
    output = old_func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 127, in nms
    inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold,
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 539, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
  File "/usr/local/lib/python3.10/dist-packages/mmcv/ops/nms.py", line 27, in forward
    inds = ext_module.nms(
RuntimeError: nms_impl: implementation for device cuda:0 not found.

Same results for me. I am installing mmcv with the aforementioned fix and I still get this error. In my Dockerfile:

ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1
RUN python -m pip install -U openmim
RUN python -m pip install 'git+https://github.com/cocodataset/panopticapi.git'
RUN mim install mmengine
RUN mim install "mmcv>=2.0.0"
RUN mim install mmdet

kmisiarz · 2023-11-21T09:07:30Z

any updates on that? I'm getting the same issue with running @rayryeng snippet with env:

➜  mmdetection3d git:(main) ✗ python mmdet3d/utils/collect_env.py                                                                                                                                                 
sys.platform: linux
Python: 3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0]
CUDA available: True
numpy_random_seed: 2147483648
GPU 0: NVIDIA GeForce RTX 2080 Ti
CUDA_HOME: /usr/local/cuda-12.1
NVCC: Cuda compilation tools, release 12.1, V12.1.105
GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
PyTorch: 2.1.1
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
  - CuDNN 8.9.2
  - Magma 2.6.1
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.1, CUDNN_VERSION=8.9.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.1.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, 

TorchVision: 0.16.1
OpenCV: 4.8.1
MMEngine: 0.10.0
MMDetection: 3.2.0
MMDetection3D: 1.3.0+5c0613b
spconv2.0: True

aRibra · 2024-01-09T23:13:05Z

mmcv-full

Thank you jshilong

I installed mmcv with mim install "mmcv>=2.0.0"
uninstalled mmcv mim uninstall mmcv
installed mmcv-full mim install mmcv-full
and mmcv again with mim install mmcv

If you get the following error:

AssertionError: MMCV==1.7.2 is used but incompatible. Please install mmcv>=2.0.0rc4, <2.2.0.

Then uninstall both mmcv and mmcv-full, then install them both without specifying the mmcv version.

TangChiaHsin · 2024-01-09T23:34:30Z

mmcv-full

Thank you jshilong

I installed mmcv with mim install "mmcv>=2.0.0"

uninstalled mmcv mim uninstall mmcv

installed mmcv-full 'mim install mmcv-full'

and mmcv again with mim install mmcv

If you get the following error:
AssertionError: MMCV==1.7.2 is used but incompatible. Please install mmcv>=2.0.0rc4, <2.2.0.
Then uninstall both mmcv and mmcv-full, then install them both without specifying the mmcv version.

@aRibra MMCV v2.0.0 official version was released on April 6, 2023. In version 2.x, it removed components related to the training process and added a data transformation module. Also, starting from 2.x, it renamed the package names mmcv to mmcv-lite and mmcv-full to mmcv. For details, see Compatibility Documentation.

shani-sony · 2024-02-22T09:30:56Z

I installed mmcv with mim install mmcv==2.0.0 instead if pip install.. and everything works now :)

rvandeghen · 2024-03-17T09:25:28Z

Hi,
I followed everything from here.
Here is the output of collect_env:

sys.platform: linux
Python: 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:53:32) [GCC 12.3.0]
CUDA available: True
MUSA available: False
numpy_random_seed: 2147483648
GPU 0: NVIDIA A100-SXM4-40GB
CUDA_HOME: None
GCC: gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-10)
PyTorch: 2.2.1+cu121
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.3.2 (Git Hash 2dc95a2ad0841e29db8b22fbccaf3e5da7992b01)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
  - CuDNN 8.9.2
  - Magma 2.6.1
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.1, CUDNN_VERSION=8.9.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.2.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, 

TorchVision: 0.17.1+cu121
OpenCV: 4.9.0
MMEngine: 0.10.3
MMDetection: 3.3.0+cfd5d3a

I don't know how to solve the issue..

Bapapa1 · 2024-04-23T11:38:53Z

安装mmcv-完全使用pip3 install mmcv-full==1.7.0 -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10/index.html 而不是pip3 install mmcv-full==1.7.0。具体兼容版本请查看：https://mmcv.readthedocs.io/zh_CN/latest/get_started/installation.html#id1

it works, thank you

jpiane21 · 2024-05-05T00:11:25Z

I was getting the same error on mmcv 2.1.0 and this fixed it for me.

<tldr;>

uninstall mmcv then reinstall with the command
mim install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.30/index.html --no-cache-dir
Note: --no-cache-dir forces it to rebuild with wheel. This is necessary. Update the cu and torch versions based on your collect_env results

Details:
From collect_env:

sys.platform: linux
Python: 3.8.19 (default, Mar 20 2024, 19:58:24) [GCC 11.2.0]
CUDA available: True
MUSA available: False
numpy_random_seed: 2147483648
GPU 0: NVIDIA GeForce GTX 1650
CUDA_HOME: /usr/local/cuda-11.8
NVCC: Cuda compilation tools, release 11.8, V11.8.89
GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
PyTorch: 2.3.0
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.3.6 (Git Hash 86e6af5974177e513fd3fee58425e1063e7f1361)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_37,code=compute_37
  - CuDNN 8.7
  - Magma 2.6.1
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.3.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, 

TorchVision: 0.18.0
OpenCV: 4.9.0
MMEngine: 0.10.4
MMDetection: 3.3.0+

Missing from collect_env: is MCV CUDA Compiler You have read it from:

from mmcv.ops import get_compiler_version, get_compiling_cuda_version

print ('MCV CUDA Compiler: ' + get_compiling_cuda_version())

When I did this, I was getting: MCV CUDA Compiler: not available which is mentioned earlier. The fix from before needs to updated to apply to mmcv >=2.0.0

ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1

Adding didn't seem to have an impact...

BUT

What actually fixed it was:
uninstall mmcv then reinstall with the command
mim install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.30/index.html --no-cache-dir
Note: --no-cache-dir forces it to rebuild with wheel. This is necessary. Update the cu and torch versions based on your collect_env results

openmmlab-bot assigned jshilong Dec 12, 2021

jshilong closed this as completed Dec 15, 2021

BIGWangYuDong mentioned this issue Apr 22, 2022

RuntimeError: nms_impl: implementation for device cuda:0 not found. #7788

Closed

BIGWangYuDong mentioned this issue Apr 29, 2022

RuntimeError: nms_impl: implementation for device cuda:0 not found. #7874

Closed

ttppss mentioned this issue Mar 17, 2023

nsm_impl error during validation process with PyTorch 2.0.0 open-mmlab/mmyolo#674

Closed

bismillahkani mentioned this issue Jul 12, 2023

RuntimeError: nms_impl: implementation for device cuda:0 not found. openvinotoolkit/training_extensions#2343

Closed

wangxingjun778 mentioned this issue Aug 16, 2023

nms_impl: implementation for device cuda:0 not found modelscope/facechain#18

Closed

zkyseu mentioned this issue Sep 22, 2023

Size mismatch in the formward pass of the SegDecoder zkyseu/O2SFormer#17

Closed

HaoranZhuExplorer mentioned this issue Apr 3, 2024

MMCV, RuntimeError: modulated_deformable_im2col_impl: implementation for device cuda:0 not found. OpenDriveLab/ViDAR#22

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

RuntimeError: nms_impl: implementation for device cuda:0 not found. #6765

RuntimeError: nms_impl: implementation for device cuda:0 not found. #6765

daydayup-bit commented Dec 12, 2021

jshilong commented Dec 13, 2021

TangChiaHsin commented Dec 15, 2021

daydayup-bit commented Dec 15, 2021

daydayup-bit commented Dec 15, 2021

TangChiaHsin commented Dec 15, 2021

gaojundong commented Feb 21, 2022

dengandong commented May 9, 2022

YFENG2 commented May 25, 2022

xiuxiuing commented Apr 6, 2023

jeethesh-pai commented May 23, 2023

matcosta23 commented May 25, 2023

maxpain commented Oct 18, 2023 •

edited

Loading

rayryeng commented Oct 18, 2023

kmisiarz commented Nov 21, 2023 •

edited

Loading

aRibra commented Jan 9, 2024 •

edited

Loading

TangChiaHsin commented Jan 9, 2024

shani-sony commented Feb 22, 2024 •

edited

Loading

rvandeghen commented Mar 17, 2024

Bapapa1 commented Apr 23, 2024

jpiane21 commented May 5, 2024

RuntimeError: nms_impl: implementation for device cuda:0 not found. #6765

RuntimeError: nms_impl: implementation for device cuda:0 not found. #6765

Comments

daydayup-bit commented Dec 12, 2021

jshilong commented Dec 13, 2021

TangChiaHsin commented Dec 15, 2021

daydayup-bit commented Dec 15, 2021

daydayup-bit commented Dec 15, 2021

TangChiaHsin commented Dec 15, 2021

gaojundong commented Feb 21, 2022

dengandong commented May 9, 2022

YFENG2 commented May 25, 2022

xiuxiuing commented Apr 6, 2023

jeethesh-pai commented May 23, 2023

matcosta23 commented May 25, 2023

maxpain commented Oct 18, 2023 • edited Loading

rayryeng commented Oct 18, 2023

kmisiarz commented Nov 21, 2023 • edited Loading

aRibra commented Jan 9, 2024 • edited Loading

TangChiaHsin commented Jan 9, 2024

shani-sony commented Feb 22, 2024 • edited Loading

rvandeghen commented Mar 17, 2024

Bapapa1 commented Apr 23, 2024

jpiane21 commented May 5, 2024

maxpain commented Oct 18, 2023 •

edited

Loading

kmisiarz commented Nov 21, 2023 •

edited

Loading

aRibra commented Jan 9, 2024 •

edited

Loading

shani-sony commented Feb 22, 2024 •

edited

Loading