#0: Added option to filter perf results to within start/stop signposts

tenstorrent · May 1, 2024 · 1b000fb · 1b000fb
1 parent e215c97
commit 1b000fb
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 6 deletions.
diff --git a/models/experimental/functional_stable_diffusion/tests/test_perf_stable_diffusion.py b/models/experimental/functional_stable_diffusion/tests/test_perf_stable_diffusion.py
@@ -278,11 +278,11 @@ def test_stable_diffusion_perf(
 @pytest.mark.models_device_performance_bare_metal
 @pytest.mark.parametrize(
     "expected_perf",
-    ((0.9),),
+    ((6.60),),
 )
 def test_stable_diffusion_device_perf(expected_perf):
     subdir = "ttnn_stable_diffusion"
-    margin = 0.12
+    margin = 0.02
     batch = 1
     iterations = 1
     command = f"pytest tests/ttnn/integration_tests/stable_diffusion/test_unet_2d_condition_model.py::test_unet_2d_condition_model_512x512[batch_size=2-in_channels=4-input_height=64-input_width=64-device_l1_small_size=32768]"
@@ -297,7 +297,7 @@ def test_stable_diffusion_device_perf(expected_perf):
         wh_arch_yaml_backup = os.environ["WH_ARCH_YAML"]
 
     os.environ["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml"
-    post_processed_results = run_device_perf(command, subdir, iterations, cols, batch)
+    post_processed_results = run_device_perf(command, subdir, iterations, cols, batch, has_signposts=True)
     expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols)
     prep_device_perf_report(
         model_name=f"stable_diffusion_{batch}batch",

diff --git a/models/perf/device_perf_utils.py b/models/perf/device_perf_utils.py
@@ -15,7 +15,7 @@
 from models.perf.perf_utils import today, process_perf_results
 
 
-def run_device_perf(command, subdir, num_iterations, cols, batch_size):
+def run_device_perf(command, subdir, num_iterations, cols, batch_size, has_signposts=False):
     duration_cols = [col + " DURATION [ns]" for col in cols]
     samples_cols = [col + " SAMPLES/S" for col in cols]
 
@@ -29,7 +29,7 @@ def run_device_perf(command, subdir, num_iterations, cols, batch_size):
 
     for _ in range(num_iterations):
         run_device_profiler(command, subdir)
-        r = post_process_ops_log(subdir, duration_cols)
+        r = post_process_ops_log(subdir, duration_cols, has_signposts=has_signposts)
         for d_col in duration_cols:
             results[f"AVG {d_col}"] += r[d_col]
             results[f"MIN {d_col}"] = min(results[f"MIN {d_col}"], r[d_col])

diff --git a/tests/ttnn/integration_tests/stable_diffusion/test_cross_attention.py b/tests/ttnn/integration_tests/stable_diffusion/test_cross_attention.py
@@ -6,6 +6,7 @@
 import torch
 from diffusers import StableDiffusionPipeline
 import ttnn
+from tracy import signpost
 
 from models.experimental.functional_stable_diffusion.tt.ttnn_functional_cross_attention import (
     cross_attention as ttnn_cross_attention,
@@ -253,12 +254,14 @@ def test_cross_attention_512x512(device, model_name, N, C, H, W, index, has_enco
     ttnn_hidden_states = ttnn.to_device(ttnn_hidden_states, device)
 
     model = tt2_ttnn_cross_attention(device, parameters)
+    signpost(header="start")
     ttnn_output = model(
         ttnn_hidden_states,
         ttnn_encoder_hidden_states,
         attention_mask=None,
         dim_head=W // 8,
     )
+    signpost(header="stop")
 
     ttnn_output = ttnn.from_device(ttnn_output)
     ttnn_output = ttnn.to_torch(ttnn_output)

diff --git a/tests/ttnn/integration_tests/stable_diffusion/test_unet_2d_condition_model.py b/tests/ttnn/integration_tests/stable_diffusion/test_unet_2d_condition_model.py
@@ -8,6 +8,7 @@
 import pytest
 from tqdm.auto import tqdm
 import time
+from tracy import signpost
 
 from tests.ttnn.utils_for_testing import assert_with_pcc
 from models.utility_functions import (
@@ -210,6 +211,7 @@ def test_unet_2d_condition_model_512x512(device, batch_size, in_channels, input_
     model = UNet2D(device, parameters, batch_size, input_height, input_width, reader_patterns_cache)
 
     first_iter = time.time()
+    signpost(header="start")
     ttnn_output = model(
         input,
         timestep=ttnn_timestep,
@@ -220,6 +222,7 @@ def test_unet_2d_condition_model_512x512(device, batch_size, in_channels, input_
         return_dict=return_dict,
         config=config,
     )
+    signpost(header="stop")
     first_iter = time.time() - first_iter
     print(f"First iteration took {first_iter} seconds")
     # times = []

diff --git a/tt_metal/tools/profiler/process_model_log.py b/tt_metal/tools/profiler/process_model_log.py
@@ -10,10 +10,16 @@
 from tt_metal.tools.profiler.common import PROFILER_OUTPUT_DIR, PROFILER_SCRIPTS_ROOT
 
 
-def post_process_ops_log(output_logs_subdir, columns, sum_vals=True, op_name=""):
+def post_process_ops_log(output_logs_subdir, columns, sum_vals=True, op_name="", has_signposts=False):
     runDate = sorted(os.listdir(PROFILER_OUTPUT_DIR / output_logs_subdir))[-1]
     df = pd.read_csv(PROFILER_OUTPUT_DIR / output_logs_subdir / runDate / f"ops_perf_results_{runDate}.csv")
 
+    if has_signposts:
+        # there are explicit start and stop points in the model we want to measure between
+        markers = df[df["OP TYPE"] == "signpost"]["OP CODE"]
+        start = markers[markers == "start"].index[0]
+        stop = markers[markers == "stop"].index[0]
+        df = df.iloc[start + 1 : stop]
     if op_name != "":
         df = df[df["OP CODE"] == op_name]