diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 266454c39ab..e2f85e05d3a 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -367,7 +367,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh mkdir -p aar-out - PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh + PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh mkdir -p extension/benchmark/android/benchmark/app/libs cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs pushd extension/benchmark/android/benchmark diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt index b2f7b8d9f47..06cae3475e8 100644 --- a/extension/android/CMakeLists.txt +++ b/extension/android/CMakeLists.txt @@ -80,6 +80,16 @@ list( fbjni ) +if(EXECUTORCH_ANDROID_PROFILING) + list( + APPEND + link_libraries + etdump + flatccrt + ) + target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_ANDROID_PROFILING=1) +endif() + if(TARGET optimized_native_cpu_ops_lib) list( APPEND diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java index 2fd488dd1f1..b31641d5a37 100644 --- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java +++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java @@ -152,6 +152,18 @@ public String[] readLogBuffer() { return mNativePeer.readLogBuffer(); } + /** + * Dump the ExecuTorch ETRecord file to /data/local/tmp/result.etdump. + * + *

Currently for internal (minibench) use only. + * + * @return true if the etdump was successfully written, false otherwise. + */ + @Experimental + public boolean etdump() { + return mNativePeer.etdump(); + } + /** * Explicitly destroys the native Module object. Calling this method is not required, as the * native object will be destroyed when this object is garbage-collected. However, the timing of diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java index 5700176261b..58d58de1b3e 100644 --- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java +++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/NativePeer.java @@ -62,4 +62,7 @@ public void resetNative() { /** Retrieve the in-memory log buffer, containing the most recent ExecuTorch log entries. */ @DoNotStrip public native String[] readLogBuffer(); + + @DoNotStrip + public native boolean etdump(); } diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp index a78f3801c64..048d5bffa78 100644 --- a/extension/android/jni/jni_layer.cpp +++ b/extension/android/jni/jni_layer.cpp @@ -15,7 +15,6 @@ #include #include #include - #include "jni_layer_constants.h" #include @@ -32,6 +31,12 @@ #include #endif +#ifdef EXECUTORCH_ANDROID_PROFILING +#include +#include +#include +#endif + #include #include @@ -238,8 +243,13 @@ class ExecuTorchJni : public facebook::jni::HybridClass { } else if (loadMode == 3) { load_mode = Module::LoadMode::MmapUseMlockIgnoreErrors; } - - module_ = std::make_unique(modelPath->toStdString(), load_mode); +#ifdef EXECUTORCH_ANDROID_PROFILING + auto etdump_gen = std::make_unique(); +#else + auto etdump_gen = nullptr; +#endif + module_ = std::make_unique( + modelPath->toStdString(), load_mode, std::move(etdump_gen)); #ifdef ET_USE_THREADPOOL // Default to using cores/2 threadpool threads. The long-term plan is to @@ -362,7 +372,6 @@ class ExecuTorchJni : public facebook::jni::HybridClass { auto jevalue = JEValue::newJEValueFromEValue(result.get()[i]); jresult->setElement(i, *jevalue); } - return jresult; } @@ -396,6 +405,37 @@ class ExecuTorchJni : public facebook::jni::HybridClass { #endif } + jboolean etdump() { +#ifdef EXECUTORCH_ANDROID_PROFILING + executorch::etdump::ETDumpGen* etdumpgen = + (executorch::etdump::ETDumpGen*)module_->event_tracer(); + auto etdump_data = etdumpgen->get_etdump_data(); + + if (etdump_data.buf != nullptr && etdump_data.size > 0) { + int etdump_file = + open("/data/local/tmp/result.etdump", O_WRONLY | O_CREAT, 0644); + if (etdump_file == -1) { + ET_LOG(Error, "Cannot create result.etdump error: %d", errno); + return false; + } + ssize_t bytes_written = + write(etdump_file, (uint8_t*)etdump_data.buf, etdump_data.size); + if (bytes_written == -1) { + ET_LOG(Error, "Cannot write result.etdump error: %d", errno); + return false; + } else { + ET_LOG(Info, "ETDump written %d bytes to file.", bytes_written); + } + close(etdump_file); + free(etdump_data.buf); + return true; + } else { + ET_LOG(Error, "No ETDump data available!"); + } +#endif + return false; + } + facebook::jni::local_ref> getUsedBackends( facebook::jni::alias_ref methodName) { auto methodMeta = module_->method_meta(methodName->toStdString()).get(); @@ -423,6 +463,7 @@ class ExecuTorchJni : public facebook::jni::HybridClass { makeNativeMethod("execute", ExecuTorchJni::execute), makeNativeMethod("loadMethod", ExecuTorchJni::load_method), makeNativeMethod("readLogBuffer", ExecuTorchJni::readLogBuffer), + makeNativeMethod("etdump", ExecuTorchJni::etdump), makeNativeMethod("getUsedBackends", ExecuTorchJni::getUsedBackends), }); } diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 index 3021d42038b..aa113561cc8 100644 --- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 +++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 @@ -107,6 +107,7 @@ phases: - echo "Run benchmark" - | + adb -s $DEVICEFARM_DEVICE_UDID shell touch /data/local/tmp/result.etdump adb -s $DEVICEFARM_DEVICE_UDID shell am force-stop org.pytorch.minibench adb -s $DEVICEFARM_DEVICE_UDID shell dumpsys deviceidle force-idle @@ -147,6 +148,8 @@ phases: # Trying to pull the file using adb ends up with permission error, but this works too, so why not echo "${BENCHMARK_RESULTS}" > $DEVICEFARM_LOG_DIR/benchmark_results.json + adb -s $DEVICEFARM_DEVICE_UDID pull /data/local/tmp/result.etdump $DEVICEFARM_LOG_DIR/result.etdump + artifacts: # By default, Device Farm will collect your artifacts from the $DEVICEFARM_LOG_DIR directory. - $DEVICEFARM_LOG_DIR diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java index 3913a8d76f5..28f4e3728f0 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java @@ -41,6 +41,8 @@ public void runBenchmark( latency.add(forwardMs); } + module.etdump(); + final BenchmarkMetric.BenchmarkModel benchmarkModel = BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", "")); // The list of metrics we have atm includes: diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh index 5f0790adb82..38916873103 100755 --- a/scripts/build_android_library.sh +++ b/scripts/build_android_library.sh @@ -40,6 +40,8 @@ build_android_native_library() { -DANDROID_PLATFORM=android-26 \ -DBUILD_TESTING=OFF \ -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \ -DEXECUTORCH_LOG_LEVEL=Info \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ @@ -75,6 +77,7 @@ build_android_native_library() { -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DEXECUTORCH_ANDROID_PROFILING="${EXECUTORCH_ANDROID_PROFILING:-OFF}" \ -DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \ -DEXECUTORCH_BUILD_LLAMA_JNI="${EXECUTORCH_BUILD_EXTENSION_LLM:-ON}" \