Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable CPU-only builds on macOS #616

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 32 additions & 18 deletions spconv/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from cumm.gemm.main import GemmMainUnitTest
from cumm.conv.main import ConvMainUnitTest
from cumm.common import CompileInfo
from cumm.constants import CUMM_CPU_ONLY_BUILD

from spconv.csrc.sparse.all import SpconvOps
from spconv.csrc.sparse.alloc import ExternalAllocator
Expand All @@ -38,39 +39,52 @@
from spconv.csrc.sparse.convops import SimpleExternalSpconvMatmul
from spconv.csrc.sparse.inference import InferenceOps

all_shuffle = SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS + SHUFFLE_AMPERE_PARAMS
# all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
cu = GemmMainUnitTest(all_shuffle)
cu.namespace = "cumm.gemm.main"
all_imp = (IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS +
IMPLGEMM_TURING_PARAMS + IMPLGEMM_AMPERE_PARAMS)
# all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
convcu = ConvMainUnitTest(all_imp)
convcu.namespace = "cumm.conv.main"
gemmtuner = GemmTunerSimple(cu)
gemmtuner.namespace = "csrc.sparse.convops.gemmops"
convtuner = ConvTunerSimple(convcu)
convtuner.namespace = "csrc.sparse.convops.convops"
convops = ConvGemmOps(gemmtuner, convtuner)
convops.namespace = "csrc.sparse.convops.spops"
if not CUMM_CPU_ONLY_BUILD:
all_shuffle = SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS + SHUFFLE_AMPERE_PARAMS
# all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
cu = GemmMainUnitTest(all_shuffle)
cu.namespace = "cumm.gemm.main"
all_imp = (IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS +
IMPLGEMM_TURING_PARAMS + IMPLGEMM_AMPERE_PARAMS)
# all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
convcu = ConvMainUnitTest(all_imp)
convcu.namespace = "cumm.conv.main"

cus = [
cu, convcu, gemmtuner, convtuner,
gemmtuner = GemmTunerSimple(cu)
gemmtuner.namespace = "csrc.sparse.convops.gemmops"
convtuner = ConvTunerSimple(convcu)
convtuner.namespace = "csrc.sparse.convops.convops"
convops = ConvGemmOps(gemmtuner, convtuner)
convops.namespace = "csrc.sparse.convops.spops"
else:
gemmtuner = GemmTunerSimple(None)
gemmtuner.namespace = "csrc.sparse.convops.gemmops"
convtuner = ConvTunerSimple(None)
convtuner.namespace = "csrc.sparse.convops.convops"
convops = ConvGemmOps(gemmtuner, convtuner)
convops.namespace = "csrc.sparse.convops.spops"
cus = []
if not CUMM_CPU_ONLY_BUILD:
cus += [cu, convcu]
cus += [
gemmtuner, convtuner,
convops,
SpconvOps(),
BoxOps(),
HashTable(),
CompileInfo(),
ExternalAllocator(),
ExternalSpconvMatmul(),
SimpleExternalSpconvMatmul(), # for debug, won't be included in release
# SimpleExternalSpconvMatmul(), # for debug, won't be included in release, also doesn't work on CPU
InferenceOps(),
PointCloudCompress(),
]

pccm.builder.build_pybind(cus,
PACKAGE_ROOT / "core_cc",
namespace_root=PACKAGE_ROOT,
load_library=False,
# build_meta=build_meta,
verbose=True)

# cus_dev: List[pccm.Class] = [
Expand Down
3 changes: 3 additions & 0 deletions spconv/csrc/sparse/cpu_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def __init__(self):
self.add_include("tensorview/parallel/all.h")
if compat.InWindows:
self.build_meta.add_public_cflags("cl", "/openmp")
elif compat.InMacOS:
# OpenMP on macOS fails tests, so disable OpenMP for now
pass
else:
self.build_meta.add_public_cflags("g++", "-fopenmp")
self.build_meta.add_public_cflags("clang++", "-fopenmp")
Expand Down
5 changes: 4 additions & 1 deletion spconv/pytorch/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,10 @@ def indice_conv_backward(features: torch.Tensor,
pair_in = indice_pairs_tv[int(inverse)]
pair_out = indice_pairs_tv[int(not inverse)]

stream = get_current_stream()
if features.is_cuda:
stream = get_current_stream()
else:
stream = 0
indice_pair_num_cpu = indice_pair_num.cpu().tolist()
if subm and all(x == 0 for x in indice_pair_num_cpu):
return (din, dfilters.reshape(filters_shape))
Expand Down
37 changes: 28 additions & 9 deletions test/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ def test_spconv3d():
test_case = TestCase()
np.random.seed(484)
torch.manual_seed(48848)
devices = ["cuda:0"]
devices = ["cpu"]
if torch.has_cuda:
devices += ["cuda:0"]
shapes = [[19, 18, 17]]
batchsizes = [1, 2]

Expand All @@ -259,17 +261,19 @@ def test_spconv3d():
paddings = [0, 1, 2]
dilations = [1, 2, 3]
algos = [
ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
ConvAlgo.Native,
ConvAlgo.MaskImplicitGemm,
ConvAlgo.MaskSplitImplicitGemm
]
algos = [ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.MaskSplitImplicitGemm]

for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid(
devices, shapes, batchsizes, in_channels, out_channels, ksizes,
strides, paddings, dilations, algos):
if all([s > 1, d > 1]):
continue # don't support this.
# print(dev, shape, bs, IC, OC, k, s, p, d)
if dev == "cpu" and al is not ConvAlgo.Native:
continue # CPU only supports ConvAlgo.Native
# print(dev, shape, bs, IC, OC, k, s, p, d, al)
device = torch.device(dev)
num_points = [1500] * bs
dtype = torch.float32
Expand Down Expand Up @@ -360,7 +364,9 @@ def test_spdeconv3d():
test_case = TestCase()

np.random.seed(484)
devices = ["cuda:0"]
devices = ["cpu"]
if torch.has_cuda:
devices += ["cuda:0"]
shapes = [[19, 18, 17]]
batchsizes = [1, 2]

Expand All @@ -372,7 +378,8 @@ def test_spdeconv3d():
dilations = [1, 2, 3]

algos = [
ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
ConvAlgo.Native,
ConvAlgo.MaskImplicitGemm,
ConvAlgo.MaskSplitImplicitGemm
]

Expand All @@ -381,6 +388,8 @@ def test_spdeconv3d():
strides, paddings, dilations, algos):
if all([s > 1, d > 1]):
continue # don't support this.
if dev == "cpu" and al is not ConvAlgo.Native:
continue # CPU only supports ConvAlgo.Native
device = torch.device(dev)
num_points = [1000] * bs
dtype = torch.float32
Expand Down Expand Up @@ -463,7 +472,9 @@ def test_spmaxpool3d():
test_case = TestCase()

np.random.seed(485)
devices = ["cuda:0"]
devices = ["cpu"]
if torch.has_cuda:
devices += ["cuda:0"]
shapes = [[19, 18, 17]]
batchsizes = [1, 2]

Expand All @@ -478,7 +489,8 @@ def test_spmaxpool3d():
# paddings = [0]
# dilations = [1]
algos = [
ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
ConvAlgo.Native,
ConvAlgo.MaskImplicitGemm,
ConvAlgo.MaskSplitImplicitGemm
]

Expand All @@ -488,6 +500,8 @@ def test_spmaxpool3d():
strides, paddings, dilations, algos):
if all([s > 1, d > 1]):
continue # don't support this.
if dev == "cpu" and al is not ConvAlgo.Native:
continue # CPU only supports ConvAlgo.Native
device = torch.device(dev)
num_points = [1000] * bs

Expand Down Expand Up @@ -544,7 +558,9 @@ def test_spglobalmaxpool3d():
test_case = TestCase()

np.random.seed(485)
devices = ["cpu:0", "cuda:0"]
devices = [] # no support for globalpool for CPU yet
if torch.has_cuda:
devices += ["cuda:0"]
shapes = [[19, 18, 17]]
batchsizes = [1, 2]

Expand Down Expand Up @@ -599,4 +615,7 @@ def test_spglobalmaxpool3d():
test_case.assertAllClose(din_np, din_sparse_np, atol=1e-4)

if __name__ == "__main__":
test_spconv3d()
test_spdeconv3d()
test_spmaxpool3d()
test_spglobalmaxpool3d()