You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am trying to use a gpu at "cuda:1" instead of "cuda:0", and it seems that whenever I do so the knn calculation fails, throwing this error:
/usr/local/lib/python3.7/dist-packages/torch_cluster/knn.py in knn(x, y, k, batch_x, batch_y)
55
56 if x.is_cuda:
---> 57 return torch_cluster.knn_cuda.knn(x, y, k, batch_x, batch_y)
58
59 # Rescale x and y.
RuntimeError: CUDA error: an illegal memory access was encountered (free_blocks at /pytorch/aten/src/THC/THCCachingAllocator.cpp:439)
frame #0: std::function<std::string ()>::operator()() const + 0x11 (0x7f68e9bbb021 in /usr/local/lib/python3.7/dist-packages/torch/lib/libc10.so)
frame #1: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x2a (0x7f68e9bba8ea in /usr/local/lib/python3.7/dist-packages/torch/lib/libc10.so)
frame #2: + 0x13f295d (0x7f68f513b95d in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #3: + 0x13f79df (0x7f68f51409df in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #4: + 0x13f900a (0x7f68f514200a in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #5: at::native::empty_cuda(c10::ArrayRef, at::TensorOptions const&) + 0x36b (0x7f68f64007fb in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #6: at::CUDALongType::empty(c10::ArrayRef, at::TensorOptions const&) const + 0x4d (0x7f68f50fadad in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #7: torch::autograd::VariableType::empty(c10::ArrayRef, at::TensorOptions const&) const + 0x179 (0x7f68e8e7c2b9 in /usr/local/lib/python3.7/dist-packages/torch/lib/libtorch.so.1)
frame #8: at::native::zeros(c10::ArrayRef, at::TensorOptions const&) + 0x40 (0x7f68ea74c470 in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2.so)
frame #9: + 0x1dc5e (0x7f68c1aadc5e in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #10: degree(at::Tensor, long) + 0x63 (0x7f68c1aae405 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #11: knn_cuda(at::Tensor, at::Tensor, unsigned long, at::Tensor, at::Tensor) + 0xf7 (0x7f68c1aaf226 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #12: knn(at::Tensor, at::Tensor, unsigned long, at::Tensor, at::Tensor) + 0x134 (0x7f68c1aa1324 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #13: + 0x1cc7b (0x7f68c1aacc7b in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #14: + 0x19ed7 (0x7f68c1aa9ed7 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #15: _PyMethodDef_RawFastCallKeywords + 0x24c (0x560d17abfc9c in /usr/bin/python3.7)
frame #16: _PyCFunction_FastCallKeywords + 0x20 (0x560d17abfa30 in /usr/bin/python3.7)
frame #17: _PyEval_EvalFrameDefault + 0x4ae9 (0x560d17b351f9 in /usr/bin/python3.7)
frame #18: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #19: _PyFunction_FastCallKeywords + 0x1ea (0x560d17ac13ea in /usr/bin/python3.7)
frame #20: _PyEval_EvalFrameDefault + 0x43e (0x560d17b30b4e in /usr/bin/python3.7)
frame #21: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #22: _PyFunction_FastCallKeywords + 0x1ea (0x560d17ac13ea in /usr/bin/python3.7)
frame #23: _PyEval_EvalFrameDefault + 0x12db (0x560d17b319eb in /usr/bin/python3.7)
frame #24: _PyEval_EvalCodeWithName + 0x475 (0x560d17b2fe65 in /usr/bin/python3.7)
frame #25: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #26: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #27: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #28: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #29: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #30: + 0x17a999 (0x560d17b06999 in /usr/bin/python3.7)
frame #31: _PyObject_FastCallKeywords + 0x92 (0x560d17ac0442 in /usr/bin/python3.7)
frame #32: _PyEval_EvalFrameDefault + 0x4efd (0x560d17b3560d in /usr/bin/python3.7)
frame #33: _PyObject_Call_Prepend + 0x105 (0x560d17ac1875 in /usr/bin/python3.7)
frame #34: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #35: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #36: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #37: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #38: + 0x17a999 (0x560d17b06999 in /usr/bin/python3.7)
frame #39: _PyObject_FastCallKeywords + 0x92 (0x560d17ac0442 in /usr/bin/python3.7)
frame #40: _PyEval_EvalFrameDefault + 0x4695 (0x560d17b34da5 in /usr/bin/python3.7)
frame #41: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #42: PyEval_EvalCode + 0x23 (0x560d17b2f9e3 in /usr/bin/python3.7)
frame #43: + 0x1a1d93 (0x560d17b2dd93 in /usr/bin/python3.7)
frame #44: _PyMethodDef_RawFastCallKeywords + 0x6d (0x560d17abfabd in /usr/bin/python3.7)
frame #45: _PyCFunction_FastCallKeywords + 0x20 (0x560d17abfa30 in /usr/bin/python3.7)
frame #46: _PyEval_EvalFrameDefault + 0x4383 (0x560d17b34a93 in /usr/bin/python3.7)
frame #47: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #48: _PyEval_EvalFrameDefault + 0x1652 (0x560d17b31d62 in /usr/bin/python3.7)
frame #49: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #50: _PyEval_EvalFrameDefault + 0x1652 (0x560d17b31d62 in /usr/bin/python3.7)
frame #51: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #52: _PyMethodDef_RawFastCallKeywords + 0xe9 (0x560d17abfb39 in /usr/bin/python3.7)
frame #53: _PyMethodDescr_FastCallKeywords + 0x4d (0x560d17bba4ad in /usr/bin/python3.7)
frame #54: _PyEval_EvalFrameDefault + 0x48d9 (0x560d17b34fe9 in /usr/bin/python3.7)
frame #55: _PyFunction_FastCallKeywords + 0x10a (0x560d17ac130a in /usr/bin/python3.7)
frame #56: _PyEval_EvalFrameDefault + 0x43e (0x560d17b30b4e in /usr/bin/python3.7)
frame #57: _PyFunction_FastCallKeywords + 0x10a (0x560d17ac130a in /usr/bin/python3.7)
frame #58: _PyEval_EvalFrameDefault + 0x697 (0x560d17b30da7 in /usr/bin/python3.7)
frame #59: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #60: _PyObject_Call_Prepend + 0x533 (0x560d17ac1ca3 in /usr/bin/python3.7)
frame #61: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #62: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #63: _PyEval_EvalCodeWithName + 0x475 (0x560d17b2fe65 in /usr/bin/python3.7)
I really don't know CUDA, so sadly I can't be of any further help. It seems to me like you call the malloc on the wrong GPU, maybe?
Thanks a lot for all your work!
The text was updated successfully, but these errors were encountered:
I played again around with the PyTorch extension API, but it seems that multi-GPU support does still not work (pytorch/extension-cpp#18). I will ask for a status update. In the meantime, you can use
Hey rusty,
I am trying to use a gpu at "cuda:1" instead of "cuda:0", and it seems that whenever I do so the knn calculation fails, throwing this error:
/usr/local/lib/python3.7/dist-packages/torch_cluster/knn.py in knn(x, y, k, batch_x, batch_y)
55
56 if x.is_cuda:
---> 57 return torch_cluster.knn_cuda.knn(x, y, k, batch_x, batch_y)
58
59 # Rescale x and y.
RuntimeError: CUDA error: an illegal memory access was encountered (free_blocks at /pytorch/aten/src/THC/THCCachingAllocator.cpp:439)
frame #0: std::function<std::string ()>::operator()() const + 0x11 (0x7f68e9bbb021 in /usr/local/lib/python3.7/dist-packages/torch/lib/libc10.so)
frame #1: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x2a (0x7f68e9bba8ea in /usr/local/lib/python3.7/dist-packages/torch/lib/libc10.so)
frame #2: + 0x13f295d (0x7f68f513b95d in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #3: + 0x13f79df (0x7f68f51409df in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #4: + 0x13f900a (0x7f68f514200a in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #5: at::native::empty_cuda(c10::ArrayRef, at::TensorOptions const&) + 0x36b (0x7f68f64007fb in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #6: at::CUDALongType::empty(c10::ArrayRef, at::TensorOptions const&) const + 0x4d (0x7f68f50fadad in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2_gpu.so)
frame #7: torch::autograd::VariableType::empty(c10::ArrayRef, at::TensorOptions const&) const + 0x179 (0x7f68e8e7c2b9 in /usr/local/lib/python3.7/dist-packages/torch/lib/libtorch.so.1)
frame #8: at::native::zeros(c10::ArrayRef, at::TensorOptions const&) + 0x40 (0x7f68ea74c470 in /usr/local/lib/python3.7/dist-packages/torch/lib/libcaffe2.so)
frame #9: + 0x1dc5e (0x7f68c1aadc5e in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #10: degree(at::Tensor, long) + 0x63 (0x7f68c1aae405 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #11: knn_cuda(at::Tensor, at::Tensor, unsigned long, at::Tensor, at::Tensor) + 0xf7 (0x7f68c1aaf226 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #12: knn(at::Tensor, at::Tensor, unsigned long, at::Tensor, at::Tensor) + 0x134 (0x7f68c1aa1324 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #13: + 0x1cc7b (0x7f68c1aacc7b in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #14: + 0x19ed7 (0x7f68c1aa9ed7 in /usr/local/lib/python3.7/dist-packages/torch_cluster/knn_cuda.cpython-37m-x86_64-linux-gnu.so)
frame #15: _PyMethodDef_RawFastCallKeywords + 0x24c (0x560d17abfc9c in /usr/bin/python3.7)
frame #16: _PyCFunction_FastCallKeywords + 0x20 (0x560d17abfa30 in /usr/bin/python3.7)
frame #17: _PyEval_EvalFrameDefault + 0x4ae9 (0x560d17b351f9 in /usr/bin/python3.7)
frame #18: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #19: _PyFunction_FastCallKeywords + 0x1ea (0x560d17ac13ea in /usr/bin/python3.7)
frame #20: _PyEval_EvalFrameDefault + 0x43e (0x560d17b30b4e in /usr/bin/python3.7)
frame #21: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #22: _PyFunction_FastCallKeywords + 0x1ea (0x560d17ac13ea in /usr/bin/python3.7)
frame #23: _PyEval_EvalFrameDefault + 0x12db (0x560d17b319eb in /usr/bin/python3.7)
frame #24: _PyEval_EvalCodeWithName + 0x475 (0x560d17b2fe65 in /usr/bin/python3.7)
frame #25: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #26: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #27: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #28: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #29: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #30: + 0x17a999 (0x560d17b06999 in /usr/bin/python3.7)
frame #31: _PyObject_FastCallKeywords + 0x92 (0x560d17ac0442 in /usr/bin/python3.7)
frame #32: _PyEval_EvalFrameDefault + 0x4efd (0x560d17b3560d in /usr/bin/python3.7)
frame #33: _PyObject_Call_Prepend + 0x105 (0x560d17ac1875 in /usr/bin/python3.7)
frame #34: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #35: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #36: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #37: _PyObject_Call_Prepend + 0x347 (0x560d17ac1ab7 in /usr/bin/python3.7)
frame #38: + 0x17a999 (0x560d17b06999 in /usr/bin/python3.7)
frame #39: _PyObject_FastCallKeywords + 0x92 (0x560d17ac0442 in /usr/bin/python3.7)
frame #40: _PyEval_EvalFrameDefault + 0x4695 (0x560d17b34da5 in /usr/bin/python3.7)
frame #41: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #42: PyEval_EvalCode + 0x23 (0x560d17b2f9e3 in /usr/bin/python3.7)
frame #43: + 0x1a1d93 (0x560d17b2dd93 in /usr/bin/python3.7)
frame #44: _PyMethodDef_RawFastCallKeywords + 0x6d (0x560d17abfabd in /usr/bin/python3.7)
frame #45: _PyCFunction_FastCallKeywords + 0x20 (0x560d17abfa30 in /usr/bin/python3.7)
frame #46: _PyEval_EvalFrameDefault + 0x4383 (0x560d17b34a93 in /usr/bin/python3.7)
frame #47: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #48: _PyEval_EvalFrameDefault + 0x1652 (0x560d17b31d62 in /usr/bin/python3.7)
frame #49: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #50: _PyEval_EvalFrameDefault + 0x1652 (0x560d17b31d62 in /usr/bin/python3.7)
frame #51: _PyGen_Send + 0x295 (0x560d17bbc3c5 in /usr/bin/python3.7)
frame #52: _PyMethodDef_RawFastCallKeywords + 0xe9 (0x560d17abfb39 in /usr/bin/python3.7)
frame #53: _PyMethodDescr_FastCallKeywords + 0x4d (0x560d17bba4ad in /usr/bin/python3.7)
frame #54: _PyEval_EvalFrameDefault + 0x48d9 (0x560d17b34fe9 in /usr/bin/python3.7)
frame #55: _PyFunction_FastCallKeywords + 0x10a (0x560d17ac130a in /usr/bin/python3.7)
frame #56: _PyEval_EvalFrameDefault + 0x43e (0x560d17b30b4e in /usr/bin/python3.7)
frame #57: _PyFunction_FastCallKeywords + 0x10a (0x560d17ac130a in /usr/bin/python3.7)
frame #58: _PyEval_EvalFrameDefault + 0x697 (0x560d17b30da7 in /usr/bin/python3.7)
frame #59: _PyEval_EvalCodeWithName + 0x2df (0x560d17b2fccf in /usr/bin/python3.7)
frame #60: _PyObject_Call_Prepend + 0x533 (0x560d17ac1ca3 in /usr/bin/python3.7)
frame #61: PyObject_Call + 0xb8 (0x560d17ac1ea8 in /usr/bin/python3.7)
frame #62: _PyEval_EvalFrameDefault + 0x1bbf (0x560d17b322cf in /usr/bin/python3.7)
frame #63: _PyEval_EvalCodeWithName + 0x475 (0x560d17b2fe65 in /usr/bin/python3.7)
I really don't know CUDA, so sadly I can't be of any further help. It seems to me like you call the malloc on the wrong GPU, maybe?
Thanks a lot for all your work!
The text was updated successfully, but these errors were encountered: