From 02f4581779eb6b7068fbcfb0d65c379b494262ac Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <shoumikhin@meta.com>
Date: Fri, 27 Jun 2025 01:30:23 -0700
Subject: [PATCH] Use extension/llm directly from LLaMARunner. (#12050)

Summary:

.

Reviewed By: larryliu0820, mergennachin

Differential Revision: D77414678
---
 .../LLaMARunner/Exported/LLaMARunner.mm       | 47 ++++++++++---------
 1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
index e6304b21e08..b91cc79befb 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
@@ -9,11 +9,11 @@
 #import "LLaMARunner.h"
 
 #import <ExecuTorch/ExecuTorchLog.h>
-#import <executorch/examples/models/llama/runner/runner.h>
+#import <executorch/extension/llm/runner/text_llm_runner.h>
+#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 
-using executorch::extension::llm::GenerationConfig;
-using executorch::extension::llm::TextLLMRunner;
-using executorch::runtime::Error;
+using namespace executorch::extension;
+using namespace executorch::runtime;
 
 NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
 
@@ -21,7 +21,7 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
 @end
 
 @implementation LLaMARunner {
-  std::unique_ptr<TextLLMRunner> _runner;
+  std::unique_ptr<llm::TextLLMRunner> _runner;
 }
 
 - (instancetype)initWithModelPath:(NSString*)modelPath
@@ -29,8 +29,13 @@ - (instancetype)initWithModelPath:(NSString*)modelPath
   self = [super init];
   if (self) {
     [ExecuTorchLog.sharedLog addSink:self];
-    _runner = example::create_llama_runner(
-        modelPath.UTF8String, tokenizerPath.UTF8String);
+    _runner = llm::create_text_llm_runner(
+      modelPath.UTF8String,
+      llm::load_tokenizer(
+        tokenizerPath.UTF8String,
+        example::get_special_tokens(example::Version::Default)
+      )
+    );
   }
   return self;
 }
@@ -60,11 +65,10 @@ - (BOOL)generate:(NSString*)prompt
        sequenceLength:(NSInteger)seq_len
     withTokenCallback:(nullable void (^)(NSString*))callback
                 error:(NSError**)error {
-  const GenerationConfig config{
-    .seq_len = static_cast<int32_t>(seq_len)
-  };
   const auto status = _runner->generate(
-      prompt.UTF8String, config, [callback](const std::string& token) {
+      prompt.UTF8String,
+      llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
+      [callback](const std::string& token) {
         callback(@(token.c_str()));
       });
   if (status != Error::Ok) {
@@ -72,8 +76,8 @@ - (BOOL)generate:(NSString*)prompt
       *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
                                    code:(NSInteger)status
                                userInfo:nil];
-      return NO;
     }
+    return NO;
   }
   return YES;
 }
@@ -95,15 +99,16 @@ - (void)logWithLevel:(ExecuTorchLogLevel)level
   NSUInteger seconds = totalSeconds % 60;
   NSUInteger microseconds = (timestamp - totalSeconds) * 1000000;
   NSLog(
-      @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s",
-      (char)level,
-      hours,
-      minutes,
-      seconds,
-      microseconds,
-      filename.UTF8String,
-      line,
-      message.UTF8String);
+    @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s",
+    (char)level,
+    hours,
+    minutes,
+    seconds,
+    microseconds,
+    filename.UTF8String,
+    line,
+    message.UTF8String
+  );
 }
 
 @end