traveller59 · tilmantroester · Mar 25, 2023 · Mar 25, 2023 · Jul 3, 2023
diff --git a/spconv/build.py b/spconv/build.py
@@ -28,6 +28,7 @@
     from cumm.gemm.main import GemmMainUnitTest
     from cumm.conv.main import ConvMainUnitTest
     from cumm.common import CompileInfo
+    from cumm.constants import CUMM_CPU_ONLY_BUILD
 
     from spconv.csrc.sparse.all import SpconvOps
     from spconv.csrc.sparse.alloc import ExternalAllocator
@@ -38,39 +39,52 @@
     from spconv.csrc.sparse.convops import SimpleExternalSpconvMatmul
     from spconv.csrc.sparse.inference import InferenceOps
 
-    all_shuffle = SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS + SHUFFLE_AMPERE_PARAMS
-    # all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
-    cu = GemmMainUnitTest(all_shuffle)
-    cu.namespace = "cumm.gemm.main"
-    all_imp = (IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS +
-               IMPLGEMM_TURING_PARAMS + IMPLGEMM_AMPERE_PARAMS)
-    # all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
-    convcu = ConvMainUnitTest(all_imp)
-    convcu.namespace = "cumm.conv.main"
-    gemmtuner = GemmTunerSimple(cu)
-    gemmtuner.namespace = "csrc.sparse.convops.gemmops"
-    convtuner = ConvTunerSimple(convcu)
-    convtuner.namespace = "csrc.sparse.convops.convops"
-    convops = ConvGemmOps(gemmtuner, convtuner)
-    convops.namespace = "csrc.sparse.convops.spops"
+    if not CUMM_CPU_ONLY_BUILD:
+        all_shuffle = SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS + SHUFFLE_AMPERE_PARAMS
+        # all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
+        cu = GemmMainUnitTest(all_shuffle)
+        cu.namespace = "cumm.gemm.main"
+        all_imp = (IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS +
+                IMPLGEMM_TURING_PARAMS + IMPLGEMM_AMPERE_PARAMS)
+        # all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
+        convcu = ConvMainUnitTest(all_imp)
+        convcu.namespace = "cumm.conv.main"
 
-    cus = [
-        cu, convcu, gemmtuner, convtuner,
+        gemmtuner = GemmTunerSimple(cu)
+        gemmtuner.namespace = "csrc.sparse.convops.gemmops"
+        convtuner = ConvTunerSimple(convcu)
+        convtuner.namespace = "csrc.sparse.convops.convops"
+        convops = ConvGemmOps(gemmtuner, convtuner)
+        convops.namespace = "csrc.sparse.convops.spops"
+    else:
+        gemmtuner = GemmTunerSimple(None)
+        gemmtuner.namespace = "csrc.sparse.convops.gemmops"
+        convtuner = ConvTunerSimple(None)
+        convtuner.namespace = "csrc.sparse.convops.convops"
+        convops = ConvGemmOps(gemmtuner, convtuner)
+        convops.namespace = "csrc.sparse.convops.spops"
+    cus = []
+    if not CUMM_CPU_ONLY_BUILD:
+        cus += [cu, convcu]
+    cus += [
+        gemmtuner, convtuner,
         convops,
         SpconvOps(),
         BoxOps(),
         HashTable(),
         CompileInfo(),
         ExternalAllocator(),
         ExternalSpconvMatmul(),
-        SimpleExternalSpconvMatmul(), # for debug, won't be included in release
+        # SimpleExternalSpconvMatmul(), # for debug, won't be included in release, also doesn't work on CPU
         InferenceOps(),
         PointCloudCompress(),
     ]
+
     pccm.builder.build_pybind(cus,
                               PACKAGE_ROOT / "core_cc",
                               namespace_root=PACKAGE_ROOT,
                               load_library=False,
+                            #   build_meta=build_meta,
                               verbose=True)
 
     # cus_dev: List[pccm.Class] = [

diff --git a/spconv/csrc/sparse/cpu_core.py b/spconv/csrc/sparse/cpu_core.py
@@ -23,6 +23,9 @@ def __init__(self):
         self.add_include("tensorview/parallel/all.h")
         if compat.InWindows:
             self.build_meta.add_public_cflags("cl", "/openmp")
+        elif compat.InMacOS:
+            # OpenMP on macOS fails tests, so disable OpenMP for now
+            pass
         else:
             self.build_meta.add_public_cflags("g++", "-fopenmp")
             self.build_meta.add_public_cflags("clang++", "-fopenmp")

diff --git a/spconv/pytorch/ops.py b/spconv/pytorch/ops.py
@@ -1197,7 +1197,10 @@ def indice_conv_backward(features: torch.Tensor,
     pair_in = indice_pairs_tv[int(inverse)]
     pair_out = indice_pairs_tv[int(not inverse)]
 
-    stream = get_current_stream()
+    if features.is_cuda:
+        stream = get_current_stream()
+    else:
+        stream = 0
     indice_pair_num_cpu = indice_pair_num.cpu().tolist()
     if subm and all(x == 0 for x in indice_pair_num_cpu):
         return (din, dfilters.reshape(filters_shape))

diff --git a/test/test_conv.py b/test/test_conv.py
@@ -248,7 +248,9 @@ def test_spconv3d():
     test_case = TestCase()
     np.random.seed(484)
     torch.manual_seed(48848)
-    devices = ["cuda:0"]
+    devices = ["cpu"]
+    if torch.has_cuda:
+        devices += ["cuda:0"]
     shapes = [[19, 18, 17]]
     batchsizes = [1, 2]
 
@@ -259,17 +261,19 @@ def test_spconv3d():
     paddings = [0, 1, 2]
     dilations = [1, 2, 3]
     algos = [
-        ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
+        ConvAlgo.Native, 
+        ConvAlgo.MaskImplicitGemm,
         ConvAlgo.MaskSplitImplicitGemm
     ]
-    algos = [ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.MaskSplitImplicitGemm]
 
     for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid(
             devices, shapes, batchsizes, in_channels, out_channels, ksizes,
             strides, paddings, dilations, algos):
         if all([s > 1, d > 1]):
             continue  # don't support this.
-        # print(dev, shape, bs, IC, OC, k, s, p, d)
+        if dev == "cpu" and al is not ConvAlgo.Native:
+            continue # CPU only supports ConvAlgo.Native
+        # print(dev, shape, bs, IC, OC, k, s, p, d, al)
         device = torch.device(dev)
         num_points = [1500] * bs
         dtype = torch.float32
@@ -360,7 +364,9 @@ def test_spdeconv3d():
     test_case = TestCase()
 
     np.random.seed(484)
-    devices = ["cuda:0"]
+    devices = ["cpu"]
+    if torch.has_cuda:
+        devices += ["cuda:0"]
     shapes = [[19, 18, 17]]
     batchsizes = [1, 2]
 
@@ -372,7 +378,8 @@ def test_spdeconv3d():
     dilations = [1, 2, 3]
 
     algos = [
-        ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
+        ConvAlgo.Native,
+        ConvAlgo.MaskImplicitGemm,
         ConvAlgo.MaskSplitImplicitGemm
     ]
 
@@ -381,6 +388,8 @@ def test_spdeconv3d():
             strides, paddings, dilations, algos):
         if all([s > 1, d > 1]):
             continue  # don't support this.
+        if dev == "cpu" and al is not ConvAlgo.Native:
+            continue # CPU only supports ConvAlgo.Native
         device = torch.device(dev)
         num_points = [1000] * bs
         dtype = torch.float32
@@ -463,7 +472,9 @@ def test_spmaxpool3d():
     test_case = TestCase()
 
     np.random.seed(485)
-    devices = ["cuda:0"]
+    devices = ["cpu"]
+    if torch.has_cuda:
+        devices += ["cuda:0"]
     shapes = [[19, 18, 17]]
     batchsizes = [1, 2]
 
@@ -478,7 +489,8 @@ def test_spmaxpool3d():
     # paddings = [0]
     # dilations = [1]
     algos = [
-        ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
+        ConvAlgo.Native, 
+        ConvAlgo.MaskImplicitGemm,
         ConvAlgo.MaskSplitImplicitGemm
     ]
 
@@ -488,6 +500,8 @@ def test_spmaxpool3d():
             strides, paddings, dilations, algos):
         if all([s > 1, d > 1]):
             continue  # don't support this.
+        if dev == "cpu" and al is not ConvAlgo.Native:
+            continue # CPU only supports ConvAlgo.Native
         device = torch.device(dev)
         num_points = [1000] * bs
 
@@ -544,7 +558,9 @@ def test_spglobalmaxpool3d():
     test_case = TestCase()
 
     np.random.seed(485)
-    devices = ["cpu:0", "cuda:0"]
+    devices = [] # no support for globalpool for CPU yet
+    if torch.has_cuda:
+        devices += ["cuda:0"]
     shapes = [[19, 18, 17]]
     batchsizes = [1, 2]
 
@@ -599,4 +615,7 @@ def test_spglobalmaxpool3d():
         test_case.assertAllClose(din_np, din_sparse_np, atol=1e-4)
 
 if __name__ == "__main__":
+    test_spconv3d()
+    test_spdeconv3d()
+    test_spmaxpool3d()
     test_spglobalmaxpool3d()