Skip to content

Commit

Permalink
#8373: Add skip_for_wormhole_b0 and get rid of arch check in perf scr…
Browse files Browse the repository at this point in the history
…ipt for other models
  • Loading branch information
tt-rkim committed May 14, 2024
1 parent b8994f7 commit b51d209
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 242 deletions.
69 changes: 0 additions & 69 deletions models/demos/falcon7b/tests/test_perf_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,72 +638,3 @@ def test_perf_t3000_bare_metal(
all_devices,
async_mode,
)


@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize(
"llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
(
("prefill", 1, 128, 0, 0.4),
("decode", 32, 1, 128, 0.3),
# ("prefill", 1, 256, 0, 0.40),
# ("decode", 32, 1, 1024, 0.36),
# ("decode", 32, 1, 2047, 0.47),
),
ids=[
"prefill_seq128",
"decode_batch32",
], # "prefill_seq256","decode_batch32_1024", "decode_batch32_2047"],
)
@pytest.mark.parametrize(
"num_layers, expected_pcc",
((32, 0.89),),
ids=["layers_32"],
)
@pytest.mark.parametrize(
"model_version",
("tiiuae/falcon-7b-instruct",),
ids=["falcon_7b"],
)
@pytest.mark.parametrize("model_config_str", ("BFLOAT16-L1",))
def test_perf_virtual_machine(
model_version,
llm_mode,
batch,
seq_len,
kv_cache_len,
expected_inference_time,
num_layers,
expected_pcc,
request,
model_config_str,
model_location_generator,
get_tt_cache_path,
device,
use_program_cache,
):
if is_e75(device) and batch == 32:
pytest.skip("Falcon batch 32 is not supported on E75")

model_config = get_model_config(model_config_str)
tt_cache_path = get_tt_cache_path(
model_version, model_subdir="Falcon", default_dir=model_config["DEFAULT_CACHE_PATH"]
)
disable_persistent_kernel_cache()
disable_compilation_reports()

run_test_FalconCausalLM_end_to_end(
[device],
model_version,
llm_mode,
batch,
seq_len,
kv_cache_len,
num_layers,
[expected_pcc, expected_pcc, expected_pcc],
model_config,
model_config_str,
tt_cache_path,
model_location_generator,
expected_inference_time,
)
34 changes: 2 additions & 32 deletions models/demos/metal_BERT_large_11/tests/test_perf_bert11.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
disable_persistent_kernel_cache,
profiler,
is_e75,
skip_for_wormhole_b0,
)
from models.perf.perf_utils import prep_perf_report

Expand Down Expand Up @@ -144,38 +145,7 @@ def run_perf_bert11(
logger.info(f"bert11 compile time: {compile_time}")


@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize(
"batch_size, model_config_str, expected_inference_time, expected_compile_time, inference_iterations",
(
[7, "BFLOAT8_B-SHARDED", 0.05, 14.5, 10],
[12, "BFLOAT8_B-SHARDED", 0.05, 14.5, 10],
),
)
def test_perf_virtual_machine(
device,
use_program_cache,
batch_size,
model_config_str,
expected_inference_time,
expected_compile_time,
inference_iterations,
model_location_generator,
):
if is_e75(device):
pytest.skip("Bert large 11 is not supported on E75")

run_perf_bert11(
batch_size,
model_config_str,
expected_inference_time,
expected_compile_time,
inference_iterations,
model_location_generator,
device,
)


@skip_for_wormhole_b0(reason_str="Didn't test on WH yet")
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
"batch_size, model_config_str, expected_inference_time, expected_compile_time, inference_iterations",
Expand Down
33 changes: 2 additions & 31 deletions models/demos/resnet/tests/test_perf_accuracy_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
disable_persistent_kernel_cache,
profiler,
is_e75,
skip_for_wormhole_b0,
)
from models.perf.perf_utils import prep_perf_report
from models.demos.resnet.tests.demo_utils import get_data
Expand Down Expand Up @@ -160,6 +161,7 @@ def run_perf_resnet(
logger.info(f"resnet50 inference for {batch_size}x{iterations} Samples: {third_iter_time}")


@skip_for_wormhole_b0(reason_str="Not tested on single WH")
@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
Expand Down Expand Up @@ -189,34 +191,3 @@ def test_perf_bare_metal(
iterations,
device,
)


@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time, iterations",
((16, 0.015, 36, 50), (20, 0.016, 36, 50)),
)
def test_perf_virtual_machine(
device,
use_program_cache,
model_location_generator,
batch_size,
expected_inference_time,
expected_compile_time,
hf_cat_image_sample_input,
iterations,
function_level_defaults,
):
if is_e75(device):
pytest.skip("Resnet is not supported on E75")

run_perf_resnet(
model_location_generator,
batch_size,
expected_inference_time,
expected_compile_time,
hf_cat_image_sample_input,
iterations,
device,
)
34 changes: 2 additions & 32 deletions models/demos/resnet/tests/test_perf_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from models.utility_functions import is_e75
from models.utility_functions import profiler
from models.utility_functions import disable_persistent_kernel_cache
from models.utility_functions import disable_persistent_kernel_cache, skip_for_wormhole_b0
from models.perf.perf_utils import prep_perf_report

from loguru import logger
Expand Down Expand Up @@ -125,6 +125,7 @@ def run_perf_resnet(
assert compile_time < expected_compile_time, f"resnet50 {comments} compilation is too slow"


@skip_for_wormhole_b0(reason_str="Not tested on single WH")
@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
Expand Down Expand Up @@ -154,34 +155,3 @@ def test_perf_bare_metal(
hf_cat_image_sample_input,
device,
)


@pytest.mark.parametrize("device_l1_small_size", [32768], indirect=True)
@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time",
(
(1, 0.015, 30),
(2, 0.02, 30),
(16, 0.04, 30),
(20, 0.04, 30),
),
)
def test_perf_virtual_machine(
device,
use_program_cache,
batch_size,
expected_inference_time,
expected_compile_time,
hf_cat_image_sample_input,
):
if is_e75(device):
pytest.skip("Resnet is not supported on E75")

run_perf_resnet(
batch_size,
expected_inference_time,
expected_compile_time,
hf_cat_image_sample_input,
device,
)
68 changes: 2 additions & 66 deletions models/demos/ttnn_falcon7b/tests/test_perf_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
disable_compilation_reports,
is_e75,
is_wormhole_b0,
skip_for_wormhole_b0,
)
from models.perf.perf_utils import prep_perf_report
import ttnn
Expand Down Expand Up @@ -353,6 +354,7 @@ def convert_to_ttnn(model, name):
assert does_pass, f"PCC value is lower than {pcc}"


@skip_for_wormhole_b0(reason_str="Does not run on single WH")
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
"llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
Expand Down Expand Up @@ -420,69 +422,3 @@ def test_perf_bare_metal(
model_location_generator,
expected_inference_time,
)


@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize(
"llm_mode, batch, seq_len, kv_cache_len, expected_inference_time",
(
("prefill", 1, 128, 0, 0.4),
("decode", 32, 1, 128, 0.3),
# ("prefill", 1, 256, 0, 0.40),
# ("decode", 32, 1, 1024, 0.36),
# ("decode", 32, 1, 2047, 0.47),
),
ids=[
"prefill_seq128",
"decode_batch32",
], # "prefill_seq256","decode_batch32_1024", "decode_batch32_2047"],
)
@pytest.mark.parametrize(
"num_layers, pcc",
((32, 0.85),),
ids=["layers_32"],
)
@pytest.mark.parametrize(
"model_version",
("tiiuae/falcon-7b-instruct",),
ids=["falcon_7b"],
)
@pytest.mark.parametrize("model_config_str", ("BFLOAT16-L1",))
def test_perf_virtual_machine(
device,
use_program_cache,
model_version,
llm_mode,
batch,
seq_len,
kv_cache_len,
expected_inference_time,
num_layers,
pcc,
request,
model_config_str,
model_location_generator,
):
torch.manual_seed(0)
if is_e75(device) and batch == 32:
pytest.skip("Falcon batch 32 is not supported on E75")

model_config = get_model_config(model_config_str)
tt_cache_path = get_tt_cache_path(model_version)
disable_persistent_kernel_cache()
disable_compilation_reports()

run_test_FalconCausalLM_end_to_end(
device,
model_version,
llm_mode,
batch,
seq_len,
kv_cache_len,
num_layers,
pcc,
model_config,
tt_cache_path,
model_location_generator,
expected_inference_time,
)
17 changes: 6 additions & 11 deletions tests/scripts/run_performance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,17 @@ run_perf_models_other() {
local tt_arch=$1
local test_marker=$2

if [ "$tt_arch" == "grayskull" ]; then
env pytest "tests/ttnn/integration_tests/resnet/test_performance.py" -m $test_marker
env pytest tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker

env pytest "tests/ttnn/integration_tests/bert/test_performance.py" -m $test_marker

env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
env pytest tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker

env pytest models/demos/resnet/tests -m $test_marker
env pytest models/demos/ttnn_falcon7b/tests -m $test_marker

env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
env pytest models/demos/resnet/tests -m $test_marker

env pytest "tests/ttnn/integration_tests/whisper/test_performance.py::test_performance" -m $test_marker
env pytest tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker

else
echo "There are no other model perf tests for Javelin yet specified. Arch $tt_arch requested"
fi
env pytest models/demos/metal_BERT_large_11/tests -m $test_marker

## Merge all the generated reports
env python models/perf/merge_perf_results.py
Expand Down
2 changes: 1 addition & 1 deletion tests/ttnn/integration_tests/whisper/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def get_expected_times(functional_whisper):
}[functional_whisper]


@skip_for_wormhole_b0()
@skip_for_wormhole_b0(reason_str="Not tested on single WH")
@pytest.mark.models_performance_bare_metal
@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize("model_name", ["openai/whisper-base"])
Expand Down

0 comments on commit b51d209

Please sign in to comment.