From b87d923c862c7c1f02b5d432a2102639be5c017c Mon Sep 17 00:00:00 2001 From: "Jiang, Yanbing" Date: Fri, 15 Nov 2024 03:41:55 -0500 Subject: [PATCH 1/3] Remove tokens per sec in aggregate_metrics when jit_compile --- torchchat/generate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torchchat/generate.py b/torchchat/generate.py index 4a67195fb..2605d35fd 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -1149,9 +1149,11 @@ def callback(x, *, done_generating=False): print( f"just-in-time compilation time (incl run time): {compilation_time:.2} seconds" ) - aggregate_metrics["tokens_per_sec"].append(tokens_sec) - aggregate_metrics["first_token_per_sec"].append(first_token_sec) - aggregate_metrics["next_tokens_per_sec"].append(next_tokens_sec) + else: + # aggregate_metrics will not append when is jit_compile, which will affect the average numbers. + aggregate_metrics["tokens_per_sec"].append(tokens_sec) + aggregate_metrics["first_token_per_sec"].append(first_token_sec) + aggregate_metrics["next_tokens_per_sec"].append(next_tokens_sec) logging.info( f"\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\ From 3bb28dcdbd3c1da5779e94fdf52fe79867a42ec3 Mon Sep 17 00:00:00 2001 From: "Jiang, Yanbing" Date: Sun, 17 Nov 2024 22:18:49 -0500 Subject: [PATCH 2/3] Add warning to user --- torchchat/generate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchchat/generate.py b/torchchat/generate.py index 98d9fa7e5..4a63f4976 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -1207,7 +1207,8 @@ def callback(x, *, done_generating=False): or torch.isnan(torch.tensor(avg_next_tokens_sec)) ): print( - f"\n Average tokens/sec (total): {avg_tokens_sec:.2f} \ + f"\nWarning: The averages were calculated with the compile sample excluded. \ + \n Average tokens/sec (total): {avg_tokens_sec:.2f} \ \nAverage tokens/sec (first token): {avg_first_token_sec:.2f} \ \nAverage tokens/sec (next tokens): {avg_next_tokens_sec:.2f} \n\ " From fdc416ddb75bada7ec99645a69f4b6e46211638c Mon Sep 17 00:00:00 2001 From: "Jiang, Yanbing" Date: Tue, 19 Nov 2024 00:28:50 -0500 Subject: [PATCH 3/3] Update --- torchchat/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchchat/generate.py b/torchchat/generate.py index 4a63f4976..9b4c6430a 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -1207,7 +1207,7 @@ def callback(x, *, done_generating=False): or torch.isnan(torch.tensor(avg_next_tokens_sec)) ): print( - f"\nWarning: The averages were calculated with the compile sample excluded. \ + f"\nWarning: Excluding compile in calculations \ \n Average tokens/sec (total): {avg_tokens_sec:.2f} \ \nAverage tokens/sec (first token): {avg_first_token_sec:.2f} \ \nAverage tokens/sec (next tokens): {avg_next_tokens_sec:.2f} \n\