From 0dcaa941d6acee671e004e4fbe83befd127c4ed8 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Fri, 29 Sep 2023 10:49:46 -0700 Subject: [PATCH] Revert "add CPU autotp UT (#4263)" This reverts commit 388c84834fca87465aff8bb8f6d85be88fa82ba6. --- .github/workflows/cpu-inference.yml | 3 +-- tests/unit/hybrid_engine/test_he_all.py | 4 ---- tests/unit/hybrid_engine/test_he_llama.py | 4 ---- tests/unit/hybrid_engine/test_he_lora.py | 4 ---- tests/unit/inference/test_inference.py | 18 ++++-------------- 5 files changed, 5 insertions(+), 28 deletions(-) diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml index 8bba51dab6fd..2c555203e950 100644 --- a/.github/workflows/cpu-inference.yml +++ b/.github/workflows/cpu-inference.yml @@ -76,5 +76,4 @@ jobs: source oneCCL/build/_install/env/setvars.sh unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ - TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ + TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' -m 'inference_ops' -m 'inference' unit/ diff --git a/tests/unit/hybrid_engine/test_he_all.py b/tests/unit/hybrid_engine/test_he_all.py index aa1f120645b1..86eabb1add0c 100644 --- a/tests/unit/hybrid_engine/test_he_all.py +++ b/tests/unit/hybrid_engine/test_he_all.py @@ -12,10 +12,6 @@ from deepspeed.accelerator import get_accelerator from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) -from deepspeed.ops.op_builder import InferenceBuilder - -if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): diff --git a/tests/unit/hybrid_engine/test_he_llama.py b/tests/unit/hybrid_engine/test_he_llama.py index fcf5b8ffb89b..5f992f69b402 100644 --- a/tests/unit/hybrid_engine/test_he_llama.py +++ b/tests/unit/hybrid_engine/test_he_llama.py @@ -12,10 +12,6 @@ from deepspeed.accelerator import get_accelerator from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) -from deepspeed.ops.op_builder import InferenceBuilder - -if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): diff --git a/tests/unit/hybrid_engine/test_he_lora.py b/tests/unit/hybrid_engine/test_he_lora.py index ea27239ed55e..f61fdeb3a9f9 100644 --- a/tests/unit/hybrid_engine/test_he_lora.py +++ b/tests/unit/hybrid_engine/test_he_lora.py @@ -14,10 +14,6 @@ from deepspeed.utils import safe_get_full_grad import numpy.testing as npt from unit.common import DistributedTest -from deepspeed.ops.op_builder import InferenceBuilder - -if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py index 894f040be207..4ee3cd73c045 100644 --- a/tests/unit/inference/test_inference.py +++ b/tests/unit/inference/test_inference.py @@ -22,6 +22,9 @@ from deepspeed.accelerator import get_accelerator from deepspeed.ops.op_builder import InferenceBuilder +if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: + pytest.skip("This op had not been implemented on this system.", allow_module_level=True) + rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): pytest.skip("skip inference tests on rocm for now", allow_module_level=True) @@ -362,9 +365,6 @@ def test( if invalid_test_msg: pytest.skip(invalid_test_msg) - if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) - model, task = model_w_task local_rank = int(os.getenv("LOCAL_RANK", "0")) @@ -401,9 +401,6 @@ def test( ): model, task = model_w_task dtype = torch.float16 - if dtype not in get_accelerator().supported_dtypes(): - pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.") - local_rank = int(os.getenv("LOCAL_RANK", "0")) pipe = pipeline(task, model=model, model_kwargs={"low_cpu_mem_usage": True}, device=local_rank, framework="pt") @@ -517,7 +514,7 @@ def test( [("Helsinki-NLP/opus-mt-en-de", "translation"), ("Salesforce/codegen-350M-mono", "text-generation")], ids=["marian", "codegen"], #codegen has fusedqkv weight. ) -@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"]) +@pytest.mark.parametrize("dtype", [torch.float16], ids=["fp16"]) class TestAutoTensorParallelism(DistributedTest): world_size = [2] @@ -533,13 +530,6 @@ def test( if invalid_test_msg: pytest.skip(invalid_test_msg) - if dtype not in get_accelerator().supported_dtypes(): - pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.") - - # TODO: enable this test after torch 2.1 stable release - if dtype == torch.bfloat16 and model_w_task[0] == "Salesforce/codegen-350M-mono": - pytest.skip("Codegen model(bf16) need to use torch version > 2.0.") - model, task = model_w_task local_rank = int(os.getenv("LOCAL_RANK", "0")) world_size = int(os.getenv("WORLD_SIZE", "2"))