diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index 266454c39ab..e2f85e05d3a 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -367,7 +367,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
mkdir -p aar-out
- PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh
+ PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
mkdir -p extension/benchmark/android/benchmark/app/libs
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
pushd extension/benchmark/android/benchmark
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
index b2f7b8d9f47..06cae3475e8 100644
--- a/extension/android/CMakeLists.txt
+++ b/extension/android/CMakeLists.txt
@@ -80,6 +80,16 @@ list(
fbjni
)
+if(EXECUTORCH_ANDROID_PROFILING)
+ list(
+ APPEND
+ link_libraries
+ etdump
+ flatccrt
+ )
+ target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_ANDROID_PROFILING=1)
+endif()
+
if(TARGET optimized_native_cpu_ops_lib)
list(
APPEND
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
index 2fd488dd1f1..b31641d5a37 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
@@ -152,6 +152,18 @@ public String[] readLogBuffer() {
return mNativePeer.readLogBuffer();
}
+ /**
+ * Dump the ExecuTorch ETRecord file to /data/local/tmp/result.etdump.
+ *
+ *
Currently for internal (minibench) use only.
+ *
+ * @return true if the etdump was successfully written, false otherwise.
+ */
+ @Experimental
+ public boolean etdump() {
+ return mNativePeer.etdump();
+ }
+
/**
* Explicitly destroys the native Module object. Calling this method is not required, as the
* native object will be destroyed when this object is garbage-collected. However, the timing of
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java
index 5700176261b..58d58de1b3e 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java
@@ -62,4 +62,7 @@ public void resetNative() {
/** Retrieve the in-memory log buffer, containing the most recent ExecuTorch log entries. */
@DoNotStrip
public native String[] readLogBuffer();
+
+ @DoNotStrip
+ public native boolean etdump();
}
diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp
index a78f3801c64..048d5bffa78 100644
--- a/extension/android/jni/jni_layer.cpp
+++ b/extension/android/jni/jni_layer.cpp
@@ -15,7 +15,6 @@
#include
#include
#include
-
#include "jni_layer_constants.h"
#include
@@ -32,6 +31,12 @@
#include
#endif
+#ifdef EXECUTORCH_ANDROID_PROFILING
+#include
+#include
+#include
+#endif
+
#include
#include
@@ -238,8 +243,13 @@ class ExecuTorchJni : public facebook::jni::HybridClass {
} else if (loadMode == 3) {
load_mode = Module::LoadMode::MmapUseMlockIgnoreErrors;
}
-
- module_ = std::make_unique(modelPath->toStdString(), load_mode);
+#ifdef EXECUTORCH_ANDROID_PROFILING
+ auto etdump_gen = std::make_unique();
+#else
+ auto etdump_gen = nullptr;
+#endif
+ module_ = std::make_unique(
+ modelPath->toStdString(), load_mode, std::move(etdump_gen));
#ifdef ET_USE_THREADPOOL
// Default to using cores/2 threadpool threads. The long-term plan is to
@@ -362,7 +372,6 @@ class ExecuTorchJni : public facebook::jni::HybridClass {
auto jevalue = JEValue::newJEValueFromEValue(result.get()[i]);
jresult->setElement(i, *jevalue);
}
-
return jresult;
}
@@ -396,6 +405,37 @@ class ExecuTorchJni : public facebook::jni::HybridClass {
#endif
}
+ jboolean etdump() {
+#ifdef EXECUTORCH_ANDROID_PROFILING
+ executorch::etdump::ETDumpGen* etdumpgen =
+ (executorch::etdump::ETDumpGen*)module_->event_tracer();
+ auto etdump_data = etdumpgen->get_etdump_data();
+
+ if (etdump_data.buf != nullptr && etdump_data.size > 0) {
+ int etdump_file =
+ open("/data/local/tmp/result.etdump", O_WRONLY | O_CREAT, 0644);
+ if (etdump_file == -1) {
+ ET_LOG(Error, "Cannot create result.etdump error: %d", errno);
+ return false;
+ }
+ ssize_t bytes_written =
+ write(etdump_file, (uint8_t*)etdump_data.buf, etdump_data.size);
+ if (bytes_written == -1) {
+ ET_LOG(Error, "Cannot write result.etdump error: %d", errno);
+ return false;
+ } else {
+ ET_LOG(Info, "ETDump written %d bytes to file.", bytes_written);
+ }
+ close(etdump_file);
+ free(etdump_data.buf);
+ return true;
+ } else {
+ ET_LOG(Error, "No ETDump data available!");
+ }
+#endif
+ return false;
+ }
+
facebook::jni::local_ref> getUsedBackends(
facebook::jni::alias_ref methodName) {
auto methodMeta = module_->method_meta(methodName->toStdString()).get();
@@ -423,6 +463,7 @@ class ExecuTorchJni : public facebook::jni::HybridClass {
makeNativeMethod("execute", ExecuTorchJni::execute),
makeNativeMethod("loadMethod", ExecuTorchJni::load_method),
makeNativeMethod("readLogBuffer", ExecuTorchJni::readLogBuffer),
+ makeNativeMethod("etdump", ExecuTorchJni::etdump),
makeNativeMethod("getUsedBackends", ExecuTorchJni::getUsedBackends),
});
}
diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
index 3021d42038b..aa113561cc8 100644
--- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
+++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
@@ -107,6 +107,7 @@ phases:
- echo "Run benchmark"
- |
+ adb -s $DEVICEFARM_DEVICE_UDID shell touch /data/local/tmp/result.etdump
adb -s $DEVICEFARM_DEVICE_UDID shell am force-stop org.pytorch.minibench
adb -s $DEVICEFARM_DEVICE_UDID shell dumpsys deviceidle force-idle
@@ -147,6 +148,8 @@ phases:
# Trying to pull the file using adb ends up with permission error, but this works too, so why not
echo "${BENCHMARK_RESULTS}" > $DEVICEFARM_LOG_DIR/benchmark_results.json
+ adb -s $DEVICEFARM_DEVICE_UDID pull /data/local/tmp/result.etdump $DEVICEFARM_LOG_DIR/result.etdump
+
artifacts:
# By default, Device Farm will collect your artifacts from the $DEVICEFARM_LOG_DIR directory.
- $DEVICEFARM_LOG_DIR
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
index 3913a8d76f5..28f4e3728f0 100644
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
@@ -41,6 +41,8 @@ public void runBenchmark(
latency.add(forwardMs);
}
+ module.etdump();
+
final BenchmarkMetric.BenchmarkModel benchmarkModel =
BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", ""));
// The list of metrics we have atm includes:
diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh
index 5f0790adb82..38916873103 100755
--- a/scripts/build_android_library.sh
+++ b/scripts/build_android_library.sh
@@ -40,6 +40,8 @@ build_android_native_library() {
-DANDROID_PLATFORM=android-26 \
-DBUILD_TESTING=OFF \
-DEXECUTORCH_ENABLE_LOGGING=ON \
+ -DEXECUTORCH_BUILD_DEVTOOLS=ON \
+ -DEXECUTORCH_ENABLE_EVENT_TRACER="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \
-DEXECUTORCH_LOG_LEVEL=Info \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \
@@ -75,6 +77,7 @@ build_android_native_library() {
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
+ -DEXECUTORCH_ANDROID_PROFILING="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \
-DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \
-DEXECUTORCH_BUILD_LLAMA_JNI="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \