Teach xla_test about the interpreter backend, fail on unknown backends

PiperOrigin-RevId: 632611687
tensorflow · May 10, 2024 · da08c58 · da08c58
1 parent 8683749
commit da08c58
Show file tree

Hide file tree

Showing 6 changed files with 164 additions and 63 deletions.
diff --git a/third_party/xla/.kokoro/linux/build.sh b/third_party/xla/.kokoro/linux/build.sh
@@ -54,7 +54,7 @@ RBE_FLAGS=""
 TARGET_FILTERS="-@local_tsl//tsl/platform:subprocess_test -@local_tsl//tsl/platform/cloud:google_auth_provider_test -@local_tsl//tsl/platform/cloud:oauth_client_test"
 
 if is_linux_gpu_job ; then
-    TAGS_FILTER="$TAGS_FILTER,gpu,requires-gpu-nvidia,-no_gpu"
+    TAGS_FILTER="$TAGS_FILTER,gpu_any,requires-gpu-nvidia,-no_gpu"
 
     # We are currently running XLA presubmits on machines with NVIDIA T4 GPUs,
     # which have a compute compatibility of 7.5. Se we filter out all the tests

diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD
@@ -1683,7 +1683,7 @@ xla_test(
     srcs = if_gpu_is_configured(["gemm_algorithm_picker_test.cc"]),
     backends = [
         "gpu_v100",
-    ] + if_oss(["gpu"]),
+    ] + if_oss(["gpu_any"]),
     deps = [
         ":autotuner_util",
         ":backend_configs_cc",
@@ -1976,7 +1976,7 @@ xla_test(
     srcs = if_gpu_is_configured(["conv_algorithm_picker_test.cc"]),
     backends = [
         "gpu_v100",
-    ] + if_oss(["gpu"]),
+    ] + if_oss(["gpu_any"]),
     tags = [
         "noasan",
         "nomsan",
@@ -4636,7 +4636,7 @@ xla_test(
     },
     backends = [
         "gpu_a100",
-    ] + if_oss(["gpu"]),
+    ] + if_oss(["gpu_any"]),
     local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
     shard_count = 10,
     deps = [
@@ -5299,19 +5299,22 @@ xla_test(
         "gpu_p100",
         "gpu_v100",
     ],
-    deps = if_cuda_is_configured([
-        ":dot_operand_converter",
-        "@com_google_googletest//:gtest",
-        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/strings:string_view",
-        "//xla:shape_util",
-        "//xla/hlo/ir:hlo",
-        "//xla/hlo/utils:hlo_matchers",
-        "//xla/service:pattern_matcher",
-        "//xla/tests:hlo_test_base",
-        "//xla/tests:xla_internal_test_main",
-        "@local_tsl//tsl/platform:statusor",
-    ]),
+    deps = if_cuda_is_configured(
+        [
+            ":dot_operand_converter",
+            "@com_google_googletest//:gtest",
+            "@com_google_absl//absl/strings",
+            "@com_google_absl//absl/strings:string_view",
+            "//xla:shape_util",
+            "//xla/hlo/ir:hlo",
+            "//xla/hlo/utils:hlo_matchers",
+            "//xla/service:pattern_matcher",
+            "//xla/tests:hlo_test_base",
+            "//xla/tests:xla_internal_test_main",
+            "@local_tsl//tsl/platform:statusor",
+        ],
+        ["@local_tsl//tsl/platform:test_main"],  # b/317293391
+    ),
 )
 
 cc_library(
@@ -5848,28 +5851,31 @@ xla_cc_test(
 
 xla_test(
     name = "determinism_test",
-    srcs = ["determinism_test.cc"],
+    srcs = if_gpu_is_configured(["determinism_test.cc"]),
     backends = [
         "gpu_a100",
     ],
     local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
         "TENSORFLOW_USE_ROCM=1",
     ]),
-    deps = [
-        ":autotuner_util",
-        "//xla:literal",
-        "//xla:xla_proto_cc",
-        "//xla/hlo/ir:hlo",
-        "//xla/service/gpu/tests:gpu_codegen_test",
-        "//xla/stream_executor:device_description",
-        "//xla/stream_executor/gpu:gpu_timer",
-        "//xla/tests:hlo_test_base",
-        "//xla/tests:literal_test_util",
-        "//xla/tests:test_utils",
-        "@com_google_absl//absl/strings",
-        "@com_google_googletest//:gtest_main",
-        "@local_tsl//tsl/platform:statusor",
-    ],
+    deps = if_gpu_is_configured(
+        [
+            ":autotuner_util",
+            "@com_google_googletest//:gtest_main",
+            "@com_google_absl//absl/strings",
+            "//xla:literal",
+            "//xla:xla_proto_cc",
+            "//xla/hlo/ir:hlo",
+            "//xla/service/gpu/tests:gpu_codegen_test",
+            "//xla/stream_executor:device_description",
+            "//xla/stream_executor/gpu:gpu_timer",
+            "//xla/tests:hlo_test_base",
+            "//xla/tests:literal_test_util",
+            "//xla/tests:test_utils",
+            "@local_tsl//tsl/platform:statusor",
+        ],
+        ["@local_tsl//tsl/platform:test_main"],  # b/317293391
+    ),
 )
 
 cc_library(

diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD
@@ -530,7 +530,7 @@ xla_test(
     srcs = ["gpu_kernel_tiling_test.cc"],
     backends = [
         "gpu_p100",
-    ] + if_oss(["gpu"]),
+    ] + if_oss(["gpu_any"]),
     deps = [
         ":gpu_codegen_test",
         "//xla:error_spec",
@@ -819,7 +819,7 @@ lit_test_suite(
         "//xla/tools/hlo_opt:gpu_specs/p100.txtpb",
         "//xla/tools/hlo_opt:gpu_specs/v100.txtpb",
     ],
-    default_tags = tf_cuda_tests_tags(),
+    default_tags = tf_cuda_tests_tags() + ["gpu_any"],
     tags_override = {
         "reduction_vectorization_sm_all.hlo": ["no_rocm"],
         "element_wise_row_vectorization.hlo": ["no_rocm"],
@@ -908,7 +908,7 @@ xla_test(
     srcs = ["tensor_float_32_global_var_test.cc"],
     backends = [
         "gpu_a100",
-    ] + if_oss(["gpu"]),
+    ] + if_oss(["gpu_any"]),
     deps = [
         "//xla:error_spec",
         "//xla/tests:hlo_test_base",
@@ -919,21 +919,24 @@ xla_test(
 
 xla_test(
     name = "gpu_sparse_dot_test",
-    srcs = ["gpu_sparse_dot_test.cc"],
+    srcs = if_cuda_is_configured(["gpu_sparse_dot_test.cc"]),
     backends = [
         "gpu_a100",
         "gpu_h100",
     ],
-    deps = [
-        ":gpu_codegen_test",
-        "//third_party/half:includes",
-        "//xla:literal",
-        "//xla:literal_util",
-        "//xla/tests:xla_internal_test_main",
-        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/types:span",
-        "@com_google_googletest//:gtest",
-    ],
+    deps = if_cuda_is_configured(
+        [
+            ":gpu_codegen_test",
+            "@com_google_googletest//:gtest",
+            "@com_google_absl//absl/strings",
+            "@com_google_absl//absl/types:span",
+            "//third_party/half:includes",
+            "//xla:literal",
+            "//xla:literal_util",
+            "//xla/tests:xla_internal_test_main",
+        ],
+        ["@local_tsl//tsl/platform:test_main"],  # b/317293391
+    ),
 )
 
 xla_test(

diff --git a/third_party/xla/xla/tests/build_defs.bzl b/third_party/xla/xla/tests/build_defs.bzl
@@ -11,7 +11,81 @@ load(
 )
 load("//xla/tests:plugin.bzl", "plugins")
 
-all_backends = ["cpu", "gpu"] + list(plugins.keys())
+# Possible backend values for the GPU family.
+GPU_BACKENDS = [
+    "gpu_any",
+    "gpu_p100",
+    "gpu_v100",
+    "gpu_a100",
+    "gpu_h100",
+]
+
+# The generic "gpu" backend includes the actual backends in this list.
+GPU_DEFAULT_BACKENDS = [
+    "gpu_any",
+    "gpu_a100",
+    "gpu_h100",
+]
+
+_DEFAULT_BACKENDS = ["cpu"] + GPU_DEFAULT_BACKENDS
+
+_ALL_BACKENDS = ["cpu", "interpreter"] + GPU_BACKENDS + list(plugins.keys())
+
+# buildifier: disable=function-docstring
+def prepare_gpu_backend_data(backends, disabled_backends, backend_tags, backend_args):
+    # Expand "gpu" backend name into device specific backend names.
+    new_backends = [name for name in backends if name != "gpu"]
+    if len(new_backends) < len(backends):
+        new_backends.extend(GPU_DEFAULT_BACKENDS)
+
+    new_disabled_backends = [name for name in disabled_backends if name != "gpu"]
+    if len(new_disabled_backends) < len(disabled_backends):
+        new_disabled_backends.extend(GPU_BACKENDS)
+
+    new_backend_tags = {key: value for key, value in backend_tags.items() if key != "gpu"}
+    gpu_backend_tags = backend_tags.get("gpu", [])
+    for key in GPU_BACKENDS:
+        new_backend_tags.setdefault(key, gpu_backend_tags[:])
+
+    new_backend_args = {key: value for key, value in backend_args.items() if key != "gpu"}
+    if "gpu" in backend_args:
+        for key in GPU_BACKENDS:
+            new_backend_args.setdefault(key, backend_args["gpu"])
+
+    # Disable backends that don't meet the device requirements.
+    sm_requirements = {
+        "gpu_any": (0, 0),
+        "gpu_p100": (6, 0),
+        "gpu_v100": (7, 0),
+        "gpu_a100": (8, 0),
+        "gpu_h100": (9, 0),
+    }
+    for gpu_backend in GPU_BACKENDS:
+        all_tags = new_backend_tags[gpu_backend]
+        requires_gpu = [t for t in all_tags if t.startswith("requires-gpu-")]
+        requires_sm, only = None, False
+        for tag in requires_gpu:
+            if tag.startswith("requires-gpu-sm"):
+                version = tag.split("-")[2][2:]
+                sm = (int(version[:-1]), int(version[-1]))
+                if not requires_sm or sm < requires_sm:
+                    requires_sm = sm
+                if tag.endswith("-only"):
+                    only = True
+        if only:
+            disable = requires_sm != sm_requirements[gpu_backend]
+        else:
+            disable = requires_sm and requires_sm > sm_requirements[gpu_backend]
+
+        if disable:
+            new_disabled_backends.append(gpu_backend)
+        else:
+            sm_major, sm_minor = sm_requirements[gpu_backend]
+            sm_tag = "requires-gpu-nvidia" if sm_major == 0 else "requires-gpu-sm%s%s-only" % (sm_major, sm_minor)
+            new_backend_tags[gpu_backend] = [t for t in all_tags if t not in requires_gpu]
+            new_backend_tags[gpu_backend].append(sm_tag)
+
+    return new_backends, new_disabled_backends, new_backend_tags, new_backend_args
 
 def xla_test(
         name,
@@ -94,7 +168,11 @@ def xla_test(
 
     test_names = []
     if not backends:
-        backends = all_backends
+        backends = _DEFAULT_BACKENDS
+
+    # Expand "gpu" backend name to specific GPU backends and update tags.
+    backends, disabled_backends, backend_tags, backend_args = \
+        prepare_gpu_backend_data(backends, disabled_backends, backend_tags, backend_args)
 
     backends = [
         backend
@@ -108,23 +186,32 @@ def xla_test(
         this_backend_copts = []
         this_backend_args = backend_args.get(backend, [])
         this_backend_data = []
+        backend_deps = []
         if backend == "cpu":
-            backend_deps = ["//xla/service:cpu_plugin"]
-            backend_deps += ["//xla/tests:test_macros_cpu"]  # buildifier: disable=list-append
-        elif backend == "gpu":
-            backend_deps = if_gpu_is_configured(["//xla/service:gpu_plugin"])
-            backend_deps += if_gpu_is_configured(["//xla/tests:test_macros_gpu"])  # buildifier: disable=list-append
+            backend_deps += [
+                "//xla/service:cpu_plugin",
+                "//xla/tests:test_macros_cpu",
+            ]
+        elif backend in GPU_BACKENDS:
+            backend_deps += if_gpu_is_configured([
+                "//xla/service:gpu_plugin",
+                "//xla/tests:test_macros_%s" % backend,
+            ])
             this_backend_tags += tf_gpu_tests_tags()
+            this_backend_copts.append("-DXLA_TEST_BACKEND_GPU=1")
+        elif backend == "interpreter":
+            backend_deps += [
+                "//xla/service:interpreter_plugin",
+                "//xla/tests:test_macros_interpreter",
+            ]
         elif backend in plugins:
-            backend_deps = []
             backend_deps += plugins[backend]["deps"]
             this_backend_copts += plugins[backend]["copts"]
             this_backend_tags += plugins[backend]["tags"]
             this_backend_args += plugins[backend]["args"]
             this_backend_data += plugins[backend]["data"]
         else:
-            # Ignore unknown backends. TODO(b/289028518): Change back to fail.
-            continue
+            fail("Unknown backend %s" % backend)
 
         if xla_test_library_deps:
             for lib_dep in xla_test_library_deps:
@@ -197,12 +284,16 @@ def xla_test_library(
     """
 
     if not backends:
-        backends = all_backends
+        backends = _ALL_BACKENDS
 
     for backend in backends:
         this_backend_copts = []
-        if backend in ["cpu", "gpu"]:
+        if backend == "cpu":
+            backend_deps = ["//xla/tests:test_macros_cpu"]
+        elif backend in GPU_BACKENDS:
             backend_deps = ["//xla/tests:test_macros_%s" % backend]
+        elif backend == "interpreter":
+            backend_deps = ["//xla/tests:test_macros_interpreter"]
         elif backend in plugins:
             backend_deps = plugins[backend]["deps"]
             this_backend_copts += plugins[backend]["copts"]
@@ -230,7 +321,7 @@ def generate_backend_suites(backends = []):  # buildifier: disable=unnamed-macro
     """
 
     if not backends:
-        backends = all_backends
+        backends = _ALL_BACKENDS
     for backend in backends:
         native.test_suite(
             name = "%s_tests" % backend,
@@ -244,7 +335,7 @@ def generate_backend_test_macros(backends = []):  # buildifier: disable=unnamed-
       backends: The list of backends to generate libraries for.
     """
     if not backends:
-        backends = all_backends
+        backends = _ALL_BACKENDS
     for backend in backends:
         manifest = ""
         if backend in plugins:

diff --git a/third_party/xla/xla/tools/hlo_opt/BUILD b/third_party/xla/xla/tools/hlo_opt/BUILD
@@ -169,7 +169,7 @@ lit_test_suite(
     ]),
     cfg = "//xla:lit.cfg.py",
     data = [":test_utilities"],
-    default_tags = tf_cuda_tests_tags(),
+    default_tags = tf_cuda_tests_tags() + ["gpu_any"],
     tags_override = {
         "gpu_hlo_ptx.hlo": ["no_rocm"],
     },

diff --git a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD
@@ -16,7 +16,7 @@ build_test(
     name = "hlo_runner_main_build_test",
     tags = [
         "cpu",
-        "gpu",
+        "gpu_any",
     ],
     targets = [
         ":hlo_runner_main",
@@ -29,6 +29,7 @@ xla_cc_binary(
     srcs = ["hlo_runner_main.cc"],
     tags = [
         "gpu",
+        "gpu_any",
         "noasan",  # Exceeds linker limit.
         "nomac",
     ],