#8373: Add skip_for_wormhole_b0 and get rid of arch check in perf scr…

…ipt for other models
tenstorrent · May 14, 2024 · b51d209 · b51d209
1 parent b8994f7
commit b51d209
Show file tree

Hide file tree

Showing 7 changed files with 15 additions and 242 deletions.
diff --git a/models/demos/falcon7b/tests/test_perf_falcon.py b/models/demos/falcon7b/tests/test_perf_falcon.py
@@ -638,72 +638,3 @@ def test_perf_t3000_bare_metal(
             all_devices,
             async_mode,
         )
-
-
-@pytest.mark.models_performance_virtual_machine
-@pytest.mark.parametrize(
-    "llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
-    (
-        ("prefill", 1, 128, 0, 0.4),
-        ("decode", 32, 1, 128, 0.3),
-        # ("prefill", 1, 256, 0, 0.40),
-        # ("decode", 32, 1, 1024, 0.36),
-        # ("decode", 32, 1, 2047, 0.47),
-    ),
-    ids=[
-        "prefill_seq128",
-        "decode_batch32",
-    ],  # "prefill_seq256","decode_batch32_1024", "decode_batch32_2047"],
-)
-@pytest.mark.parametrize(
-    "num_layers, expected_pcc",
-    ((32, 0.89),),
-    ids=["layers_32"],
-)
-@pytest.mark.parametrize(
-    "model_version",
-    ("tiiuae/falcon-7b-instruct",),
-    ids=["falcon_7b"],
-)
-@pytest.mark.parametrize("model_config_str", ("BFLOAT16-L1",))
-def test_perf_virtual_machine(
-    model_version,
-    llm_mode,
-    batch,
-    seq_len,
-    kv_cache_len,
-    expected_inference_time,
-    num_layers,
-    expected_pcc,
-    request,
-    model_config_str,
-    model_location_generator,
-    get_tt_cache_path,
-    device,
-    use_program_cache,
-):
-    if is_e75(device) and batch == 32:
-        pytest.skip("Falcon batch 32 is not supported on E75")
-
-    model_config = get_model_config(model_config_str)
-    tt_cache_path = get_tt_cache_path(
-        model_version, model_subdir="Falcon", default_dir=model_config["DEFAULT_CACHE_PATH"]
-    )
-    disable_persistent_kernel_cache()
-    disable_compilation_reports()
-
-    run_test_FalconCausalLM_end_to_end(
-        [device],
-        model_version,
-        llm_mode,
-        batch,
-        seq_len,
-        kv_cache_len,
-        num_layers,
-        [expected_pcc, expected_pcc, expected_pcc],
-        model_config,
-        model_config_str,
-        tt_cache_path,
-        model_location_generator,
-        expected_inference_time,
-    )
diff --git a/models/demos/metal_BERT_large_11/tests/test_perf_bert11.py b/models/demos/metal_BERT_large_11/tests/test_perf_bert11.py
@@ -17,6 +17,7 @@
     disable_persistent_kernel_cache,
     profiler,
     is_e75,
+    skip_for_wormhole_b0,
 )
 from models.perf.perf_utils import prep_perf_report
 
@@ -144,38 +145,7 @@ def run_perf_bert11(
     logger.info(f"bert11 compile time: {compile_time}")
 
 
-@pytest.mark.models_performance_virtual_machine
-@pytest.mark.parametrize(
-    "batch_size, model_config_str, expected_inference_time, expected_compile_time, inference_iterations",
-    (
-        [7, "BFLOAT8_B-SHARDED", 0.05, 14.5, 10],
-        [12, "BFLOAT8_B-SHARDED", 0.05, 14.5, 10],
-    ),
-)
-def test_perf_virtual_machine(
-    device,
-    use_program_cache,
-    batch_size,
-    model_config_str,
-    expected_inference_time,
-    expected_compile_time,
-    inference_iterations,
-    model_location_generator,
-):
-    if is_e75(device):
-        pytest.skip("Bert large 11 is not supported on E75")
-
-    run_perf_bert11(
-        batch_size,
-        model_config_str,
-        expected_inference_time,
-        expected_compile_time,
-        inference_iterations,
-        model_location_generator,
-        device,
-    )
-
-
+@skip_for_wormhole_b0(reason_str="Didn't test on WH yet")
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.parametrize(
     "batch_size, model_config_str, expected_inference_time, expected_compile_time, inference_iterations",

diff --git a/models/demos/resnet/tests/test_perf_accuracy_resnet.py b/models/demos/resnet/tests/test_perf_accuracy_resnet.py
@@ -16,6 +16,7 @@
     disable_persistent_kernel_cache,
     profiler,
     is_e75,
+    skip_for_wormhole_b0,
 )
 from models.perf.perf_utils import prep_perf_report
 from models.demos.resnet.tests.demo_utils import get_data
@@ -160,6 +161,7 @@ def run_perf_resnet(
     logger.info(f"resnet50 inference for {batch_size}x{iterations} Samples: {third_iter_time}")
 
 
+@skip_for_wormhole_b0(reason_str="Not tested on single WH")
 @pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.parametrize(
@@ -189,34 +191,3 @@ def test_perf_bare_metal(
         iterations,
         device,
     )
-
-
-@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
-@pytest.mark.models_performance_virtual_machine
-@pytest.mark.parametrize(
-    "batch_size, expected_inference_time, expected_compile_time, iterations",
-    ((16, 0.015, 36, 50), (20, 0.016, 36, 50)),
-)
-def test_perf_virtual_machine(
-    device,
-    use_program_cache,
-    model_location_generator,
-    batch_size,
-    expected_inference_time,
-    expected_compile_time,
-    hf_cat_image_sample_input,
-    iterations,
-    function_level_defaults,
-):
-    if is_e75(device):
-        pytest.skip("Resnet is not supported on E75")
-
-    run_perf_resnet(
-        model_location_generator,
-        batch_size,
-        expected_inference_time,
-        expected_compile_time,
-        hf_cat_image_sample_input,
-        iterations,
-        device,
-    )
diff --git a/models/demos/resnet/tests/test_perf_resnet.py b/models/demos/resnet/tests/test_perf_resnet.py
@@ -11,7 +11,7 @@
 
 from models.utility_functions import is_e75
 from models.utility_functions import profiler
-from models.utility_functions import disable_persistent_kernel_cache
+from models.utility_functions import disable_persistent_kernel_cache, skip_for_wormhole_b0
 from models.perf.perf_utils import prep_perf_report
 
 from loguru import logger
@@ -125,6 +125,7 @@ def run_perf_resnet(
     assert compile_time < expected_compile_time, f"resnet50 {comments} compilation is too slow"
 
 
+@skip_for_wormhole_b0(reason_str="Not tested on single WH")
 @pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.parametrize(
@@ -154,34 +155,3 @@ def test_perf_bare_metal(
         hf_cat_image_sample_input,
         device,
     )
-
-
-@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
-@pytest.mark.models_performance_virtual_machine
-@pytest.mark.parametrize(
-    "batch_size, expected_inference_time, expected_compile_time",
-    (
-        (1, 0.015, 30),
-        (2, 0.02, 30),
-        (16, 0.04, 30),
-        (20, 0.04, 30),
-    ),
-)
-def test_perf_virtual_machine(
-    device,
-    use_program_cache,
-    batch_size,
-    expected_inference_time,
-    expected_compile_time,
-    hf_cat_image_sample_input,
-):
-    if is_e75(device):
-        pytest.skip("Resnet is not supported on E75")
-
-    run_perf_resnet(
-        batch_size,
-        expected_inference_time,
-        expected_compile_time,
-        hf_cat_image_sample_input,
-        device,
-    )
diff --git a/models/demos/ttnn_falcon7b/tests/test_perf_falcon.py b/models/demos/ttnn_falcon7b/tests/test_perf_falcon.py
@@ -34,6 +34,7 @@
     disable_compilation_reports,
     is_e75,
     is_wormhole_b0,
+    skip_for_wormhole_b0,
 )
 from models.perf.perf_utils import prep_perf_report
 import ttnn
@@ -353,6 +354,7 @@ def convert_to_ttnn(model, name):
             assert does_pass, f"PCC value is lower than {pcc}"
 
 
+@skip_for_wormhole_b0(reason_str="Does not run on single WH")
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.parametrize(
     "llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
@@ -420,69 +422,3 @@ def test_perf_bare_metal(
         model_location_generator,
         expected_inference_time,
     )
-
-
-@pytest.mark.models_performance_virtual_machine
-@pytest.mark.parametrize(
-    "llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
-    (
-        ("prefill", 1, 128, 0, 0.4),
-        ("decode", 32, 1, 128, 0.3),
-        # ("prefill", 1, 256, 0, 0.40),
-        # ("decode", 32, 1, 1024, 0.36),
-        # ("decode", 32, 1, 2047, 0.47),
-    ),
-    ids=[
-        "prefill_seq128",
-        "decode_batch32",
-    ],  # "prefill_seq256","decode_batch32_1024", "decode_batch32_2047"],
-)
-@pytest.mark.parametrize(
-    "num_layers, pcc",
-    ((32, 0.85),),
-    ids=["layers_32"],
-)
-@pytest.mark.parametrize(
-    "model_version",
-    ("tiiuae/falcon-7b-instruct",),
-    ids=["falcon_7b"],
-)
-@pytest.mark.parametrize("model_config_str", ("BFLOAT16-L1",))
-def test_perf_virtual_machine(
-    device,
-    use_program_cache,
-    model_version,
-    llm_mode,
-    batch,
-    seq_len,
-    kv_cache_len,
-    expected_inference_time,
-    num_layers,
-    pcc,
-    request,
-    model_config_str,
-    model_location_generator,
-):
-    torch.manual_seed(0)
-    if is_e75(device) and batch == 32:
-        pytest.skip("Falcon batch 32 is not supported on E75")
-
-    model_config = get_model_config(model_config_str)
-    tt_cache_path = get_tt_cache_path(model_version)
-    disable_persistent_kernel_cache()
-    disable_compilation_reports()
-
-    run_test_FalconCausalLM_end_to_end(
-        device,
-        model_version,
-        llm_mode,
-        batch,
-        seq_len,
-        kv_cache_len,
-        num_layers,
-        pcc,
-        model_config,
-        tt_cache_path,
-        model_location_generator,
-        expected_inference_time,
-    )
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
@@ -11,22 +11,17 @@ run_perf_models_other() {
     local tt_arch=$1
     local test_marker=$2
 
-    if [ "$tt_arch" == "grayskull" ]; then
-        env pytest "tests/ttnn/integration_tests/resnet/test_performance.py" -m $test_marker
+    env pytest tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
 
-        env pytest "tests/ttnn/integration_tests/bert/test_performance.py" -m $test_marker
-
-        env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
+    env pytest tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
 
-        env pytest models/demos/resnet/tests -m $test_marker
+    env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
 
-        env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+    env pytest models/demos/resnet/tests -m $test_marker
 
-        env pytest "tests/ttnn/integration_tests/whisper/test_performance.py::test_performance" -m $test_marker
+    env pytest tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
 
-    else
-        echo "There are no other model perf tests for Javelin yet specified. Arch $tt_arch requested"
-    fi
+    env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py

diff --git a/tests/ttnn/integration_tests/whisper/test_performance.py b/tests/ttnn/integration_tests/whisper/test_performance.py
@@ -22,7 +22,7 @@ def get_expected_times(functional_whisper):
     }[functional_whisper]
 
 
-@skip_for_wormhole_b0()
+@skip_for_wormhole_b0(reason_str="Not tested on single WH")
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.models_performance_virtual_machine
 @pytest.mark.parametrize("model_name", ["openai/whisper-base"])