From 27c895db114bf8111398a85bf8bae9a97ab70bd4 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 12 Sep 2024 16:34:44 -0700 Subject: [PATCH 1/8] Define generic Android benchmark metric structure --- .../LlmBenchmarkRunner.java | 72 ++++++++++++++++--- .../pytorch/minibench/BenchmarkActivity.java | 19 ++--- .../pytorch/minibench/BenchmarkMetric.java | 37 ++++++++++ .../minibench/LlmBenchmarkActivity.java | 44 +++++++++--- 4 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index cee623507fd..e7e5299f7b5 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -18,7 +18,11 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class LlmBenchmarkRunner extends Activity implements ModelRunnerCallback { ModelRunner mModelRunner; @@ -50,6 +54,7 @@ protected void onCreate(Bundle savedInstanceState) { } mStatsDump = new StatsDump(); + mStatsDump.name = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); mStatsDump.loadStart = System.currentTimeMillis(); } @@ -87,22 +92,70 @@ public void onGenerationStopped() { mTextView.append(mStatsDump.toString()); }); - // TODO (huydhn): Remove txt files here once the JSON format is ready - try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) { - writer.write(mStatsDump.toString()); - } catch (IOException e) { - e.printStackTrace(); - } + final List results = new ArrayList<>(); + // The list of metrics we have atm includes: + // Model load time + results.add( + new BenchmarkMetric( + mStatsDump.name, + "model_load_time(ms)", + mStatsDump.loadEnd - mStatsDump.loadStart, + 0.0f)); + // LLM generate time + results.add( + new BenchmarkMetric( + mStatsDump.name, + "generate_time(ms)", + mStatsDump.generateEnd - mStatsDump.generateStart, + 0.0f)); + // Token per second + results.add( + new BenchmarkMetric(mStatsDump.name, "token_per_sec", extractTPS(mStatsDump.tokens), 0.0f)); - // TODO (huydhn): Figure out on what the final JSON results looks like, we need something - // with the same number of fields as https://github.com/pytorch/pytorch/pull/135042 try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); - writer.write(gson.toJson(mStatsDump)); + writer.write(gson.toJson(results)); } catch (IOException e) { e.printStackTrace(); } } + + private double extractTPS(final String tokens) { + final Matcher m = Pattern.compile("\\d+\\.?\\d*").matcher(tokens); + if (m.find()) { + return Double.parseDouble(m.group()); + } else { + return 0.0f; + } + } +} + +class BenchmarkMetric { + // The model name, i.e. stories110M + String name; + + // The metric name, i.e. TPS + String metric; + + // The actual value and the option target value + double actual; + double target; + + // TODO (huydhn): Is there a way to get this information from the export model itself? + final String dtype = "float32"; + + // Let's see which information we want to include here + final String device = android.os.Build.BRAND; + // DEBUG DEBUG + final String arch = android.os.Build.DEVICE + " / " + android.os.Build.MODEL; + + public BenchmarkMetric( + final String name, final String metric, final double actual, final double target) { + this.name = name; + this.metric = metric; + this.actual = actual; + this.target = target; + } } class StatsDump { @@ -111,6 +164,7 @@ class StatsDump { long generateStart; long generateEnd; String tokens; + String name; @NonNull @Override diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 9ede7d69184..4a724068ac2 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -46,18 +46,19 @@ protected void onCreate(Bundle savedInstanceState) { stats.latency.add(forwardMs); } - // TODO (huydhn): Remove txt files here once the JSON format is ready - try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) { - writer.write(stats.toString()); - } catch (IOException e) { - e.printStackTrace(); - } + final List results = new ArrayList<>(); + // The list of metrics we have atm includes: + // Avg inference latency after N iterations + results.add( + new BenchmarkMetric( + model.getName().replace(".pte", ""), + "avg_inference_latency(ms)", + stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), + 0.0f)); - // TODO (huydhn): Figure out on what the final JSON results looks like, we need something - // with the same number of fields as https://github.com/pytorch/pytorch/pull/135042 try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); - writer.write(gson.toJson(stats)); + writer.write(gson.toJson(results)); } catch (IOException e) { e.printStackTrace(); } diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java new file mode 100644 index 00000000000..511a3f4e978 --- /dev/null +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +package org.pytorch.minibench; + +class BenchmarkMetric { + // The model name, i.e. stories110M + String name; + + // The metric name, i.e. TPS + String metric; + + // The actual value and the option target value + double actual; + double target; + + // TODO (huydhn): Is there a way to get this information from the export model itself? + final String dtype = "float32"; + + // Let's see which information we want to include here + final String device = android.os.Build.BRAND; + // DEBUG DEBUG + final String arch = android.os.Build.DEVICE + " / " + android.os.Build.MODEL; + + public BenchmarkMetric( + final String name, final String metric, final double actual, final double target) { + this.name = name; + this.metric = metric; + this.actual = actual; + this.target = target; + } +} diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index 496cbde53d6..1e0daba930f 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -16,7 +16,11 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class LlmBenchmarkActivity extends Activity implements ModelRunnerCallback { ModelRunner mModelRunner; @@ -45,6 +49,7 @@ protected void onCreate(Bundle savedInstanceState) { } mStatsInfo = new StatsInfo(); + mStatsInfo.name = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); mStatsInfo.loadStart = System.currentTimeMillis(); } @@ -73,22 +78,42 @@ public void onStats(String stats) { public void onGenerationStopped() { mStatsInfo.generateEnd = System.currentTimeMillis(); - // TODO (huydhn): Remove txt files here once the JSON format is ready - try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) { - writer.write(mStatsInfo.toString()); - } catch (IOException e) { - e.printStackTrace(); - } + final List results = new ArrayList<>(); + // The list of metrics we have atm includes: + // Model load time + results.add( + new BenchmarkMetric( + mStatsInfo.name, + "model_load_time(ms)", + mStatsInfo.loadEnd - mStatsInfo.loadStart, + 0.0f)); + // LLM generate time + results.add( + new BenchmarkMetric( + mStatsInfo.name, + "generate_time(ms)", + mStatsInfo.generateEnd - mStatsInfo.generateStart, + 0.0f)); + // Token per second + results.add( + new BenchmarkMetric(mStatsInfo.name, "token_per_sec", extractTPS(mStatsInfo.tokens), 0.0f)); - // TODO (huydhn): Figure out on what the final JSON results looks like, we need something - // with the same number of fields as https://github.com/pytorch/pytorch/pull/135042 try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); - writer.write(gson.toJson(mStatsInfo)); + writer.write(gson.toJson(results)); } catch (IOException e) { e.printStackTrace(); } } + + private double extractTPS(final String tokens) { + final Matcher m = Pattern.compile("\\d+\\.?\\d*").matcher(tokens); + if (m.find()) { + return Double.parseDouble(m.group()); + } else { + return 0.0f; + } + } } class StatsInfo { @@ -97,6 +122,7 @@ class StatsInfo { long generateStart; long generateEnd; String tokens; + String name; @Override public String toString() { From 57c3c5e6d99aa4058acde4c47882c6a6054eb789 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 12 Sep 2024 18:31:00 -0700 Subject: [PATCH 2/8] Parse the model name for backend and quantization --- .../LlmBenchmarkRunner.java | 59 +++++++++++++++---- .../pytorch/minibench/BenchmarkActivity.java | 4 +- .../pytorch/minibench/BenchmarkMetric.java | 54 ++++++++++++++--- .../minibench/LlmBenchmarkActivity.java | 8 ++- 4 files changed, 100 insertions(+), 25 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index e7e5299f7b5..17ae4714353 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -10,6 +10,7 @@ import android.app.Activity; import android.content.Intent; +import android.os.Build; import android.os.Bundle; import android.util.Log; import android.widget.TextView; @@ -92,25 +93,27 @@ public void onGenerationStopped() { mTextView.append(mStatsDump.toString()); }); + final BenchmarkMetric.BenchmarkModel benchmarkModel = + BenchmarkMetric.extractBackendAndQuantization(mStatsDump.name); final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Model load time results.add( new BenchmarkMetric( - mStatsDump.name, + benchmarkModel, "model_load_time(ms)", mStatsDump.loadEnd - mStatsDump.loadStart, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( - mStatsDump.name, + benchmarkModel, "generate_time(ms)", mStatsDump.generateEnd - mStatsDump.generateStart, 0.0f)); // Token per second results.add( - new BenchmarkMetric(mStatsDump.name, "token_per_sec", extractTPS(mStatsDump.tokens), 0.0f)); + new BenchmarkMetric(benchmarkModel, "token_per_sec", extractTPS(mStatsDump.tokens), 0.0f)); try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); @@ -131,8 +134,20 @@ private double extractTPS(final String tokens) { } class BenchmarkMetric { - // The model name, i.e. stories110M - String name; + public static class BenchmarkModel { + // The model name, i.e. stories110M + String name; + String backend; + String quantization; + + public BenchmarkModel(final String name, final String backend, final String quantization) { + this.name = name; + this.backend = backend; + this.quantization = quantization; + } + } + + BenchmarkModel benchmarkModel; // The metric name, i.e. TPS String metric; @@ -141,21 +156,41 @@ class BenchmarkMetric { double actual; double target; - // TODO (huydhn): Is there a way to get this information from the export model itself? - final String dtype = "float32"; - // Let's see which information we want to include here - final String device = android.os.Build.BRAND; + final String device = Build.BRAND; // DEBUG DEBUG - final String arch = android.os.Build.DEVICE + " / " + android.os.Build.MODEL; + final String arch = + Build.PRODUCT + + " / " + + Build.MODEL + + " / " + + Build.DISPLAY + + " / " + + Build.VERSION.RELEASE + + " / " + + Build.VERSION.SDK_INT; public BenchmarkMetric( - final String name, final String metric, final double actual, final double target) { - this.name = name; + final BenchmarkModel benchmarkModel, + final String metric, + final double actual, + final double target) { + this.benchmarkModel = benchmarkModel; this.metric = metric; this.actual = actual; this.target = target; } + + public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { + final Matcher m = + Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); + if (m.matches()) { + return new BenchmarkMetric.BenchmarkModel( + m.group("name"), m.group("backend"), m.group("quantization")); + } else { + return new BenchmarkMetric.BenchmarkModel(model, "", ""); + } + } } class StatsDump { diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 4a724068ac2..9bb0cd959f2 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -46,12 +46,14 @@ protected void onCreate(Bundle savedInstanceState) { stats.latency.add(forwardMs); } + final BenchmarkMetric.BenchmarkModel benchmarkModel = + BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", "")); final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Avg inference latency after N iterations results.add( new BenchmarkMetric( - model.getName().replace(".pte", ""), + benchmarkModel, "avg_inference_latency(ms)", stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index 511a3f4e978..f04f3267742 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -8,9 +8,25 @@ package org.pytorch.minibench; +import android.os.Build; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + class BenchmarkMetric { - // The model name, i.e. stories110M - String name; + public static class BenchmarkModel { + // The model name, i.e. stories110M + String name; + String backend; + String quantization; + + public BenchmarkModel(final String name, final String backend, final String quantization) { + this.name = name; + this.backend = backend; + this.quantization = quantization; + } + } + + BenchmarkModel benchmarkModel; // The metric name, i.e. TPS String metric; @@ -19,19 +35,39 @@ class BenchmarkMetric { double actual; double target; - // TODO (huydhn): Is there a way to get this information from the export model itself? - final String dtype = "float32"; - // Let's see which information we want to include here - final String device = android.os.Build.BRAND; + final String device = Build.BRAND; // DEBUG DEBUG - final String arch = android.os.Build.DEVICE + " / " + android.os.Build.MODEL; + final String arch = + Build.PRODUCT + + " / " + + Build.MODEL + + " / " + + Build.DISPLAY + + " / " + + Build.VERSION.RELEASE + + " / " + + Build.VERSION.SDK_INT; public BenchmarkMetric( - final String name, final String metric, final double actual, final double target) { - this.name = name; + final BenchmarkModel benchmarkModel, + final String metric, + final double actual, + final double target) { + this.benchmarkModel = benchmarkModel; this.metric = metric; this.actual = actual; this.target = target; } + + public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { + final Matcher m = + Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); + if (m.matches()) { + return new BenchmarkMetric.BenchmarkModel( + m.group("name"), m.group("backend"), m.group("quantization")); + } else { + return new BenchmarkMetric.BenchmarkModel(model, "", ""); + } + } } diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index 1e0daba930f..f69c729199b 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -78,25 +78,27 @@ public void onStats(String stats) { public void onGenerationStopped() { mStatsInfo.generateEnd = System.currentTimeMillis(); + final BenchmarkMetric.BenchmarkModel benchmarkModel = + BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.name); final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Model load time results.add( new BenchmarkMetric( - mStatsInfo.name, + benchmarkModel, "model_load_time(ms)", mStatsInfo.loadEnd - mStatsInfo.loadStart, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( - mStatsInfo.name, + benchmarkModel, "generate_time(ms)", mStatsInfo.generateEnd - mStatsInfo.generateStart, 0.0f)); // Token per second results.add( - new BenchmarkMetric(mStatsInfo.name, "token_per_sec", extractTPS(mStatsInfo.tokens), 0.0f)); + new BenchmarkMetric(benchmarkModel, "token_per_sec", extractTPS(mStatsInfo.tokens), 0.0f)); try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); From 5c5499ad22750b5408c3107e8df12b46baf2819d Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 12 Sep 2024 22:28:53 -0700 Subject: [PATCH 3/8] Keep model and Android version --- .../executorchllamademo/LlmBenchmarkRunner.java | 15 ++++----------- .../org/pytorch/minibench/BenchmarkMetric.java | 15 ++++----------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 17ae4714353..8bdcaf77684 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -158,17 +158,8 @@ public BenchmarkModel(final String name, final String backend, final String quan // Let's see which information we want to include here final String device = Build.BRAND; - // DEBUG DEBUG - final String arch = - Build.PRODUCT - + " / " - + Build.MODEL - + " / " - + Build.DISPLAY - + " / " - + Build.VERSION.RELEASE - + " / " - + Build.VERSION.SDK_INT; + // The phone model and Android release version + final String arch = Build.MODEL + " / " + Build.VERSION.RELEASE; public BenchmarkMetric( final BenchmarkModel benchmarkModel, @@ -181,6 +172,8 @@ public BenchmarkMetric( this.target = target; } + // TODO (huydhn): Figure out a way to extract the backend and quantization information from + // the .pte model itself instead of parsing its name public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { final Matcher m = Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index f04f3267742..7498f280ef2 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -37,17 +37,8 @@ public BenchmarkModel(final String name, final String backend, final String quan // Let's see which information we want to include here final String device = Build.BRAND; - // DEBUG DEBUG - final String arch = - Build.PRODUCT - + " / " - + Build.MODEL - + " / " - + Build.DISPLAY - + " / " - + Build.VERSION.RELEASE - + " / " - + Build.VERSION.SDK_INT; + // The phone model and Android release version + final String arch = Build.MODEL + " / " + Build.VERSION.RELEASE; public BenchmarkMetric( final BenchmarkModel benchmarkModel, @@ -60,6 +51,8 @@ public BenchmarkMetric( this.target = target; } + // TODO (huydhn): Figure out a way to extract the backend and quantization information from + // the .pte model itself instead of parsing its name public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { final Matcher m = Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); From 0488a12ff6e9f09c67ec50ab36c757c885953f68 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 13 Sep 2024 09:39:49 -0700 Subject: [PATCH 4/8] Address review comments --- .../executorchllamademo/LlmBenchmarkRunner.java | 16 ++++++++++------ .../org/pytorch/minibench/BenchmarkActivity.java | 10 ++++++++++ .../org/pytorch/minibench/BenchmarkMetric.java | 12 ++++++------ .../pytorch/minibench/LlmBenchmarkActivity.java | 4 ++++ 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 8bdcaf77684..c9841ced5f6 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -63,6 +63,7 @@ protected void onCreate(Bundle savedInstanceState) { @Override public void onModelLoaded(int status) { mStatsDump.loadEnd = System.currentTimeMillis(); + mStatsDump.loadStatus = status; if (status != 0) { Log.e("LlmBenchmarkRunner", "Loaded failed: " + status); onGenerationStopped(); @@ -97,6 +98,8 @@ public void onGenerationStopped() { BenchmarkMetric.extractBackendAndQuantization(mStatsDump.name); final List results = new ArrayList<>(); // The list of metrics we have atm includes: + // Load status + results.add(new BenchmarkMetric(benchmarkModel, "load_status", mStatsDump.loadStatus, 0)); // Model load time results.add( new BenchmarkMetric( @@ -153,8 +156,8 @@ public BenchmarkModel(final String name, final String backend, final String quan String metric; // The actual value and the option target value - double actual; - double target; + double actualValue; + double targetValue; // Let's see which information we want to include here final String device = Build.BRAND; @@ -164,12 +167,12 @@ public BenchmarkModel(final String name, final String backend, final String quan public BenchmarkMetric( final BenchmarkModel benchmarkModel, final String metric, - final double actual, - final double target) { + final double actualValue, + final double targetValue) { this.benchmarkModel = benchmarkModel; this.metric = metric; - this.actual = actual; - this.target = target; + this.actualValue = actualValue; + this.targetValue = targetValue; } // TODO (huydhn): Figure out a way to extract the backend and quantization information from @@ -187,6 +190,7 @@ public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final } class StatsDump { + int loadStatus; long loadStart; long loadEnd; long generateStart; diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 9bb0cd959f2..9a556779b6c 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -38,7 +38,11 @@ protected void onCreate(Bundle savedInstanceState) { // TODO: Format the string with a parsable format Stats stats = new Stats(); + // Record the time it takes to load the model + stats.loadStart = System.currentTimeMillis(); Module module = Module.load(model.getPath()); + stats.loadEnd = System.currentTimeMillis(); + for (int i = 0; i < numIter; i++) { long start = System.currentTimeMillis(); module.forward(); @@ -57,6 +61,10 @@ protected void onCreate(Bundle savedInstanceState) { "avg_inference_latency(ms)", stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); + // Model load time + results.add( + new BenchmarkMetric( + benchmarkModel, "model_load_time(ms)", stats.loadEnd - stats.loadStart, 0.0f)); try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); @@ -68,6 +76,8 @@ protected void onCreate(Bundle savedInstanceState) { } class Stats { + long loadStart; + long loadEnd; List latency = new ArrayList<>(); @Override diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index 7498f280ef2..d1dc789b7bc 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -32,8 +32,8 @@ public BenchmarkModel(final String name, final String backend, final String quan String metric; // The actual value and the option target value - double actual; - double target; + double actualValue; + double targetValue; // Let's see which information we want to include here final String device = Build.BRAND; @@ -43,12 +43,12 @@ public BenchmarkModel(final String name, final String backend, final String quan public BenchmarkMetric( final BenchmarkModel benchmarkModel, final String metric, - final double actual, - final double target) { + final double actualValue, + final double targetValue) { this.benchmarkModel = benchmarkModel; this.metric = metric; - this.actual = actual; - this.target = target; + this.actualValue = actualValue; + this.targetValue = targetValue; } // TODO (huydhn): Figure out a way to extract the backend and quantization information from diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index f69c729199b..c4e39cb8efe 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -57,6 +57,7 @@ protected void onCreate(Bundle savedInstanceState) { @Override public void onModelLoaded(int status) { mStatsInfo.loadEnd = System.currentTimeMillis(); + mStatsInfo.loadStatus = status; if (status != 0) { Log.e("LlmBenchmarkRunner", "Loaded failed: " + status); onGenerationStopped(); @@ -82,6 +83,8 @@ public void onGenerationStopped() { BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.name); final List results = new ArrayList<>(); // The list of metrics we have atm includes: + // Load status + results.add(new BenchmarkMetric(benchmarkModel, "load_status", mStatsInfo.loadStatus, 0)); // Model load time results.add( new BenchmarkMetric( @@ -119,6 +122,7 @@ private double extractTPS(final String tokens) { } class StatsInfo { + int loadStatus; long loadStart; long loadEnd; long generateStart; From a529c3b90a6c9427a828b1dadee8dc6357eda49b Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 13 Sep 2024 10:21:49 -0700 Subject: [PATCH 5/8] Move Android OS version to a separate field --- .../com/example/executorchllamademo/LlmBenchmarkRunner.java | 3 ++- .../src/main/java/org/pytorch/minibench/BenchmarkMetric.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index c9841ced5f6..7c963a714a9 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -162,7 +162,8 @@ public BenchmarkModel(final String name, final String backend, final String quan // Let's see which information we want to include here final String device = Build.BRAND; // The phone model and Android release version - final String arch = Build.MODEL + " / " + Build.VERSION.RELEASE; + final String arch = Build.MODEL; + final String os = "Android " + Build.VERSION.RELEASE; public BenchmarkMetric( final BenchmarkModel benchmarkModel, diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index d1dc789b7bc..8334f9e3e87 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -38,7 +38,8 @@ public BenchmarkModel(final String name, final String backend, final String quan // Let's see which information we want to include here final String device = Build.BRAND; // The phone model and Android release version - final String arch = Build.MODEL + " / " + Build.VERSION.RELEASE; + final String arch = Build.MODEL; + final String os = "Android " + Build.VERSION.RELEASE; public BenchmarkMetric( final BenchmarkModel benchmarkModel, From b2b117ee5d1f5c1de11889e9889147723e69f2ac Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 13 Sep 2024 10:49:21 -0700 Subject: [PATCH 6/8] Use nano second --- .../LlmBenchmarkRunner.java | 12 ++++++------ .../pytorch/minibench/BenchmarkActivity.java | 18 ++++++++++-------- .../minibench/LlmBenchmarkActivity.java | 12 ++++++------ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 7c963a714a9..87edd3b6cd8 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -57,19 +57,19 @@ protected void onCreate(Bundle savedInstanceState) { mStatsDump = new StatsDump(); mStatsDump.name = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); - mStatsDump.loadStart = System.currentTimeMillis(); + mStatsDump.loadStart = System.nanoTime(); } @Override public void onModelLoaded(int status) { - mStatsDump.loadEnd = System.currentTimeMillis(); + mStatsDump.loadEnd = System.nanoTime(); mStatsDump.loadStatus = status; if (status != 0) { Log.e("LlmBenchmarkRunner", "Loaded failed: " + status); onGenerationStopped(); return; } - mStatsDump.generateStart = System.currentTimeMillis(); + mStatsDump.generateStart = System.nanoTime(); mModelRunner.generate(mPrompt); } @@ -88,7 +88,7 @@ public void onStats(String stats) { @Override public void onGenerationStopped() { - mStatsDump.generateEnd = System.currentTimeMillis(); + mStatsDump.generateEnd = System.nanoTime(); runOnUiThread( () -> { mTextView.append(mStatsDump.toString()); @@ -104,14 +104,14 @@ public void onGenerationStopped() { results.add( new BenchmarkMetric( benchmarkModel, - "model_load_time(ms)", + "model_load_time(ns)", mStatsDump.loadEnd - mStatsDump.loadStart, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( benchmarkModel, - "generate_time(ms)", + "generate_time(ns)", mStatsDump.generateEnd - mStatsDump.generateStart, 0.0f)); // Token per second diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 59aa9856571..4360bdcef85 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -47,18 +47,18 @@ protected void onCreate(Bundle savedInstanceState) { // TODO: Format the string with a parsable format Stats stats = new Stats(); - // Record the time it takes to load the model - stats.loadStart = System.currentTimeMillis(); + // Record the time it takes to load the model and the forward method + stats.loadStart = System.nanoTime(); Module module = Module.load(model.getPath()); - stats.loadEnd = System.currentTimeMillis(); + stats.errorCode = module.loadMethod("forward"); + stats.loadEnd = System.nanoTime(); for (int i = 0; i < numIter; i++) { - long start = System.currentTimeMillis(); + long start = System.nanoTime(); module.forward(); - long forwardMs = System.currentTimeMillis() - start; + long forwardMs = System.nanoTime() - start; stats.latency.add(forwardMs); } - stats.errorCode = module.loadMethod("forward"); final BenchmarkMetric.BenchmarkModel benchmarkModel = BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", "")); @@ -68,13 +68,15 @@ protected void onCreate(Bundle savedInstanceState) { results.add( new BenchmarkMetric( benchmarkModel, - "avg_inference_latency(ms)", + "avg_inference_latency(ns)", stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); // Model load time results.add( new BenchmarkMetric( - benchmarkModel, "model_load_time(ms)", stats.loadEnd - stats.loadStart, 0.0f)); + benchmarkModel, "model_load_time(ns)", stats.loadEnd - stats.loadStart, 0.0f)); + // Load status + results.add(new BenchmarkMetric(benchmarkModel, "load_status", stats.errorCode, 0)); try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { Gson gson = new Gson(); diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index 06f2b133b9c..267e8b21df3 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -59,19 +59,19 @@ protected void onCreate(Bundle savedInstanceState) { mStatsInfo = new StatsInfo(); mStatsInfo.name = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); - mStatsInfo.loadStart = System.currentTimeMillis(); + mStatsInfo.loadStart = System.nanoTime(); } @Override public void onModelLoaded(int status) { - mStatsInfo.loadEnd = System.currentTimeMillis(); + mStatsInfo.loadEnd = System.nanoTime(); mStatsInfo.loadStatus = status; if (status != 0) { Log.e("LlmBenchmarkRunner", "Loaded failed: " + status); onGenerationStopped(); return; } - mStatsInfo.generateStart = System.currentTimeMillis(); + mStatsInfo.generateStart = System.nanoTime(); mModelRunner.generate(mPrompt); } @@ -85,7 +85,7 @@ public void onStats(String stats) { @Override public void onGenerationStopped() { - mStatsInfo.generateEnd = System.currentTimeMillis(); + mStatsInfo.generateEnd = System.nanoTime(); final BenchmarkMetric.BenchmarkModel benchmarkModel = BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.name); @@ -97,14 +97,14 @@ public void onGenerationStopped() { results.add( new BenchmarkMetric( benchmarkModel, - "model_load_time(ms)", + "model_load_time(ns)", mStatsInfo.loadEnd - mStatsInfo.loadStart, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( benchmarkModel, - "generate_time(ms)", + "generate_time(ns)", mStatsInfo.generateEnd - mStatsInfo.generateStart, 0.0f)); // Token per second From a66b8348d28b87a26644c0b9dd4649aee0265d1f Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 13 Sep 2024 11:08:42 -0700 Subject: [PATCH 7/8] Add memory info --- .../LlmBenchmarkRunner.java | 23 ++++++++++++------- .../pytorch/minibench/BenchmarkMetric.java | 17 ++++++++++---- .../minibench/LlmBenchmarkActivity.java | 6 ++--- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 87edd3b6cd8..4b40c6f9372 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -9,6 +9,7 @@ package com.example.executorchllamademo; import android.app.Activity; +import android.app.ActivityManager; import android.content.Intent; import android.os.Build; import android.os.Bundle; @@ -55,7 +56,7 @@ protected void onCreate(Bundle savedInstanceState) { } mStatsDump = new StatsDump(); - mStatsDump.name = model.getName().replace(".pte", ""); + mStatsDump.modelName = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); mStatsDump.loadStart = System.nanoTime(); } @@ -95,7 +96,7 @@ public void onGenerationStopped() { }); final BenchmarkMetric.BenchmarkModel benchmarkModel = - BenchmarkMetric.extractBackendAndQuantization(mStatsDump.name); + BenchmarkMetric.extractBackendAndQuantization(mStatsDump.modelName); final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Load status @@ -159,11 +160,17 @@ public BenchmarkModel(final String name, final String backend, final String quan double actualValue; double targetValue; - // Let's see which information we want to include here - final String device = Build.BRAND; - // The phone model and Android release version - final String arch = Build.MODEL; - final String os = "Android " + Build.VERSION.RELEASE; + public static class DeviceInfo { + // Let's see which information we want to include here + final String device = Build.BRAND; + // The phone model and Android release version + final String arch = Build.MODEL; + final String os = "Android " + Build.VERSION.RELEASE; + final long totalMem = new ActivityManager.MemoryInfo().totalMem; + final long availMem = new ActivityManager.MemoryInfo().availMem; + } + + DeviceInfo deviceInfo; public BenchmarkMetric( final BenchmarkModel benchmarkModel, @@ -197,7 +204,7 @@ class StatsDump { long generateStart; long generateEnd; String tokens; - String name; + String modelName; @NonNull @Override diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index 8334f9e3e87..6ad51486187 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -8,6 +8,7 @@ package org.pytorch.minibench; +import android.app.ActivityManager; import android.os.Build; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -35,11 +36,17 @@ public BenchmarkModel(final String name, final String backend, final String quan double actualValue; double targetValue; - // Let's see which information we want to include here - final String device = Build.BRAND; - // The phone model and Android release version - final String arch = Build.MODEL; - final String os = "Android " + Build.VERSION.RELEASE; + public static class DeviceInfo { + // Let's see which information we want to include here + final String device = Build.BRAND; + // The phone model and Android release version + final String arch = Build.MODEL; + final String os = "Android " + Build.VERSION.RELEASE; + final long totalMem = new ActivityManager.MemoryInfo().totalMem; + final long availMem = new ActivityManager.MemoryInfo().availMem; + } + + DeviceInfo deviceInfo; public BenchmarkMetric( final BenchmarkModel benchmarkModel, diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index 267e8b21df3..04702562ba4 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -57,7 +57,7 @@ protected void onCreate(Bundle savedInstanceState) { } mStatsInfo = new StatsInfo(); - mStatsInfo.name = model.getName().replace(".pte", ""); + mStatsInfo.modelName = model.getName().replace(".pte", ""); mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this); mStatsInfo.loadStart = System.nanoTime(); } @@ -88,7 +88,7 @@ public void onGenerationStopped() { mStatsInfo.generateEnd = System.nanoTime(); final BenchmarkMetric.BenchmarkModel benchmarkModel = - BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.name); + BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.modelName); final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Load status @@ -136,7 +136,7 @@ class StatsInfo { long generateStart; long generateEnd; String tokens; - String name; + String modelName; @Override public String toString() { From 2ea88c4b2de733360d9b1ccc740fbf2fb08c52c7 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 13 Sep 2024 11:56:36 -0700 Subject: [PATCH 8/8] Init DeviceInfo --- .../com/example/executorchllamademo/LlmBenchmarkRunner.java | 2 +- .../src/main/java/org/pytorch/minibench/BenchmarkMetric.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 4b40c6f9372..efb9ab21eae 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -170,7 +170,7 @@ public static class DeviceInfo { final long availMem = new ActivityManager.MemoryInfo().availMem; } - DeviceInfo deviceInfo; + DeviceInfo deviceInfo = new DeviceInfo(); public BenchmarkMetric( final BenchmarkModel benchmarkModel, diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index 6ad51486187..22ee7b84804 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -46,7 +46,7 @@ public static class DeviceInfo { final long availMem = new ActivityManager.MemoryInfo().availMem; } - DeviceInfo deviceInfo; + DeviceInfo deviceInfo = new DeviceInfo(); public BenchmarkMetric( final BenchmarkModel benchmarkModel,