diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh index 7dc6d15e407..8ac87b2302d 100644 --- a/.ci/scripts/test_llava.sh +++ b/.ci/scripts/test_llava.sh @@ -33,6 +33,7 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi EXECUTORCH_COMMON_CMAKE_ARGS=" \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp index 20c45009da7..1924b057ec4 100644 --- a/examples/models/llava/runner/llava_runner.cpp +++ b/examples/models/llava/runner/llava_runner.cpp @@ -108,6 +108,8 @@ Error LlavaRunner::generate_from_pos( uint64_t prefill_next_token = ET_UNWRAP(prefill_prompt(prompt, start_pos, /*bos=*/0, /*eos*/ 0)); + stats_.first_token_ms = util::time_in_ms(); + stats_.prompt_eval_end_ms = util::time_in_ms(); stats_.num_prompt_tokens = start_pos; // Generate tokens @@ -116,7 +118,6 @@ Error LlavaRunner::generate_from_pos( // Bookkeeping stats_.num_generated_tokens = num_generated_tokens; - ::executorch::llm::print_report(stats_); if (stats_callback) { stats_callback(stats_); } @@ -151,6 +152,7 @@ Error LlavaRunner::generate( }; int64_t pos = 0; + stats_.inference_start_ms = util::time_in_ms(); // prefill preset prompt prefill_prompt(kPresetPrompt, pos, /*bos=*/1, /*eos*/ 0); @@ -167,6 +169,9 @@ Error LlavaRunner::generate( Error err = generate_from_pos( prompt, seq_len, pos, wrapped_callback, stats_callback, echo); + stats_.inference_end_ms = util::time_in_ms(); + ::executorch::llm::print_report(stats_); + ET_LOG( Info, "RSS after finishing text generation: %f MiB (0 if unsupported)",