Update base for Update on "[llava][15/N] Extract out text decoder runner"

larryliu0820 · larryliu0820 · commit 24eb00587c05 · 2024-08-07T23:31:17.000-07:00
Last PR #4556 refactored run_model_step() so that it is suitable to be extracted out as a separate class. This new `TextDecoderRunner` provides 2 APIs: * step(tokens, start_pos) This API takes one or more tokens with start_pos and feed them into Module. Return a tensor of logits. * logits_to_token(logits) This API samples the result and returns a token. We don't expect this logic to change across different runners. Differential Revision: [D60856571](https://our.internmc.facebook.com/intern/diff/D60856571) [ghstack-poisoned]
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
@@ -152,7 +152,7 @@ Result<uint64_t> Runner::prefill(
 
     ManagedTensor managed_start_pos(&start_pos, {1}, ScalarType::Long);
 
-    Result<torch::executor::Tensor> outputs_res =
+    Result<exec_aten::Tensor> outputs_res =
         run_model_step(managed_tokens, managed_start_pos);
 
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
@@ -164,6 +164,7 @@ Result<uint64_t> Runner::prefill(
         num_prompt_tokens,
         outputs_res.get().size(1));
     // insert new token into prompt_tokens
+    // NOLINTNEXTLINE(facebook-hte-ParameterUncheckedArrayBounds)
     uint64_t prev = prompt_tokens[0];
     uint64_t cur;
     for (int i = 1; i < prompt_tokens.size(); i++) {
@@ -177,6 +178,7 @@ Result<uint64_t> Runner::prefill(
     uint64_t prev_token;
     // token & pos
     int64_t pos_data = 0;
+    // NOLINTNEXTLINE(facebook-hte-ParameterUncheckedArrayBounds)
     cur_token = prompt_tokens[0];
 
     // initialize tensor wrappers
@@ -188,7 +190,7 @@ Result<uint64_t> Runner::prefill(
       // Run the model
       pos_data = start_pos + pos;
 
-      Result<torch::executor::Tensor> logits_res =
+      Result<exec_aten::Tensor> logits_res =
           run_model_step(managed_tokens, managed_start_pos);
 
       ET_CHECK_OK_OR_RETURN_ERROR(logits_res.error());