From 2d9c20c2721a35381d7c57a8121fa32048ac826d Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <shoumikhin@meta.com>
Date: Thu, 26 Jun 2025 17:34:04 -0700
Subject: [PATCH] Split LLaMA and LLaVA runners. (#12036)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/12036

.

Reviewed By: larryliu0820

Differential Revision: D77407510
---
 .../LLaMA/LLaMA.xcodeproj/project.pbxproj     |   8 ++
 .../LLaMA/LLaMA/Application/ContentView.swift |  14 +--
 .../LLaMARunner/Exported/LLaMARunner.h        |  23 ----
 .../LLaMARunner/Exported/LLaMARunner.mm       | 101 ---------------
 .../LLaMARunner/Exported/LLaVARunner.h        |  35 ++++++
 .../LLaMARunner/Exported/LLaVARunner.mm       | 115 ++++++++++++++++++
 6 files changed, 165 insertions(+), 131 deletions(-)
 create mode 100644 examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h
 create mode 100644 examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
index 042f3903c67..79ae1788ac8 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
@@ -46,6 +46,8 @@
 		03D03DA82C7823620088D6A7 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DA62C7823620088D6A7 /* text_prefiller.h */; };
 		03D03DAB2C7823830088D6A7 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */; };
 		03D03DAC2C7823830088D6A7 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */; };
+		03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 03D151B72E0E0908007A38BE /* LLaVARunner.mm */; };
+		03D151B92E0E0908007A38BE /* LLaVARunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D151B62E0E0908007A38BE /* LLaVARunner.h */; };
 		26A6A4282C8A3769005A761E /* ImagePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 26A6A4272C8A3769005A761E /* ImagePicker.swift */; };
 		306A713D2DC1DC0F00936B1F /* token_decoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 306A713C2DC1DC0F00936B1F /* token_decoder.h */; };
 		306A713E2DC1DC0F00936B1F /* regex.h in Headers */ = {isa = PBXBuildFile; fileRef = 306A71392DC1DC0F00936B1F /* regex.h */; };
@@ -132,6 +134,8 @@
 		03D03DA62C7823620088D6A7 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = "<group>"; };
 		03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = "<group>"; };
 		03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = "<group>"; };
+		03D151B62E0E0908007A38BE /* LLaVARunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LLaVARunner.h; sourceTree = "<group>"; };
+		03D151B72E0E0908007A38BE /* LLaVARunner.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaVARunner.mm; sourceTree = "<group>"; };
 		26A6A4272C8A3769005A761E /* ImagePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImagePicker.swift; sourceTree = "<group>"; };
 		306A71352DC1DC0F00936B1F /* hf_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = hf_tokenizer.h; sourceTree = "<group>"; };
 		306A71362DC1DC0F00936B1F /* pcre2_regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pcre2_regex.h; sourceTree = "<group>"; };
@@ -245,6 +249,8 @@
 		0324D69B2BAACB7C00DEF36F /* Exported */ = {
 			isa = PBXGroup;
 			children = (
+				03D151B62E0E0908007A38BE /* LLaVARunner.h */,
+				03D151B72E0E0908007A38BE /* LLaVARunner.mm */,
 				0324D6992BAACB7C00DEF36F /* LLaMARunner.h */,
 				0324D69A2BAACB7C00DEF36F /* LLaMARunner.mm */,
 			);
@@ -403,6 +409,7 @@
 				306A71422DC1DC0F00936B1F /* pre_tokenizer.h in Headers */,
 				306A71432DC1DC0F00936B1F /* pcre2_regex.h in Headers */,
 				306A71442DC1DC0F00936B1F /* std_regex.h in Headers */,
+				03D151B92E0E0908007A38BE /* LLaVARunner.h in Headers */,
 				03729F122BB2042B00152F2E /* sampler.h in Headers */,
 				03729F0C2BB203B300152F2E /* util.h in Headers */,
 				F292B1012D88B20C00BE6839 /* llama_tiktoken.h in Headers */,
@@ -574,6 +581,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */,
 				03729EE12BB1F93800152F2E /* LLaMARunner.mm in Sources */,
 				0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */,
 				F292B1022D88B20C00BE6839 /* llama_tiktoken.cpp in Sources */,
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift
index 52f18f7a5ea..c6b8b71dfc1 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift
@@ -12,7 +12,7 @@ import UniformTypeIdentifiers
 import LLaMARunner
 
 class RunnerHolder: ObservableObject {
-  var runner: Runner?
+  var llamaRunner: LLaMARunner?
   var llavaRunner: LLaVARunner?
 }
 
@@ -87,7 +87,7 @@ struct ContentView: View {
     case llava
     case qwen3
     case phi4
-    
+
     static func fromPath(_ path: String) -> ModelType {
       let filename = (path as NSString).lastPathComponent.lowercased()
       if filename.hasPrefix("llama") {
@@ -347,7 +347,7 @@ struct ContentView: View {
 
       switch modelType {
       case .llama, .qwen3, .phi4:
-        runnerHolder.runner = runnerHolder.runner ?? Runner(modelPath: modelPath, tokenizerPath: tokenizerPath)
+        runnerHolder.llamaRunner = runnerHolder.llamaRunner ?? LLaMARunner(modelPath: modelPath, tokenizerPath: tokenizerPath)
       case .llava:
         runnerHolder.llavaRunner = runnerHolder.llavaRunner ?? LLaVARunner(modelPath: modelPath, tokenizerPath: tokenizerPath)
       }
@@ -355,7 +355,7 @@ struct ContentView: View {
       guard !shouldStopGenerating else { return }
       switch modelType {
       case .llama, .qwen3, .phi4:
-        if let runner = runnerHolder.runner, !runner.isLoaded() {
+        if let runner = runnerHolder.llamaRunner, !runner.isLoaded() {
           var error: Error?
           let startLoadTime = Date()
           do {
@@ -481,7 +481,7 @@ struct ContentView: View {
               prompt = String(format: Constants.phi4PromptTemplate, text)
           }
 
-          try runnerHolder.runner?.generate(prompt, sequenceLength: seq_len) { token in
+          try runnerHolder.llamaRunner?.generate(prompt, sequenceLength: seq_len) { token in
 
             if token != prompt {
                 if token == "<|eot_id|>" {
@@ -534,7 +534,7 @@ struct ContentView: View {
                   }
                 }
                 if shouldStopGenerating {
-                  runnerHolder.runner?.stop()
+                  runnerHolder.llamaRunner?.stop()
                 }
               }
             }
@@ -577,7 +577,7 @@ struct ContentView: View {
         return
       }
       runnerQueue.async {
-        runnerHolder.runner = nil
+        runnerHolder.llamaRunner = nil
         runnerHolder.llavaRunner = nil
       }
       switch pickerType {
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h
index 1290a75d975..e10606c8c4d 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h
@@ -11,9 +11,7 @@
 NS_ASSUME_NONNULL_BEGIN
 
 FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-FOUNDATION_EXPORT NSErrorDomain const LLaVARunnerErrorDomain;
 
-NS_SWIFT_NAME(Runner)
 @interface LLaMARunner : NSObject
 
 - (instancetype)initWithModelPath:(NSString*)filePath
@@ -31,25 +29,4 @@ NS_SWIFT_NAME(Runner)
 
 @end
 
-NS_SWIFT_NAME(LLaVARunner)
-@interface LLaVARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString*)filePath
-                    tokenizerPath:(NSString*)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError**)error;
-- (BOOL)generate:(void*)imageBuffer
-                width:(CGFloat)width
-               height:(CGFloat)height
-               prompt:(NSString*)prompt
-       sequenceLength:(NSInteger)seq_len
-    withTokenCallback:(nullable void (^)(NSString*))callback
-                error:(NSError**)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
 NS_ASSUME_NONNULL_END
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
index fc7f440d999..e6304b21e08 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm
@@ -10,15 +10,12 @@
 
 #import <ExecuTorch/ExecuTorchLog.h>
 #import <executorch/examples/models/llama/runner/runner.h>
-#import <executorch/examples/models/llava/runner/llava_runner.h>
 
 using executorch::extension::llm::GenerationConfig;
-using executorch::extension::llm::Image;
 using executorch::extension::llm::TextLLMRunner;
 using executorch::runtime::Error;
 
 NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
-NSErrorDomain const LLaVARunnerErrorDomain = @"LLaVARunnerErrorDomain";
 
 @interface LLaMARunner ()<ExecuTorchLogSink>
 @end
@@ -110,101 +107,3 @@ - (void)logWithLevel:(ExecuTorchLogLevel)level
 }
 
 @end
-
-@interface LLaVARunner ()<ExecuTorchLogSink>
-@end
-
-@implementation LLaVARunner {
-  std::unique_ptr<example::LlavaRunner> _runner;
-}
-
-- (instancetype)initWithModelPath:(NSString*)modelPath
-                    tokenizerPath:(NSString*)tokenizerPath {
-  self = [super init];
-  if (self) {
-    [ExecuTorchLog.sharedLog addSink:self];
-    _runner = std::make_unique<example::LlavaRunner>(
-        modelPath.UTF8String, tokenizerPath.UTF8String);
-  }
-  return self;
-}
-
-- (void)dealloc {
-  [ExecuTorchLog.sharedLog removeSink:self];
-}
-
-- (BOOL)isLoaded {
-  return _runner->is_loaded();
-}
-
-- (BOOL)loadWithError:(NSError**)error {
-  const auto status = _runner->load();
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaVARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-    }
-    return NO;
-  }
-  return YES;
-}
-
-- (BOOL)generate:(void*)imageBuffer
-                width:(CGFloat)width
-               height:(CGFloat)height
-               prompt:(NSString*)prompt
-       sequenceLength:(NSInteger)seq_len
-    withTokenCallback:(nullable void (^)(NSString*))callback
-                error:(NSError**)error {
-  const auto* data = static_cast<uint8_t*>(imageBuffer);
-  const auto status = _runner->generate(
-      {Image{
-          std::vector<uint8_t>(
-              data, data + (int32_t)width * (int32_t)height * 3),
-          (int32_t)width,
-          (int32_t)height,
-          3}},
-      prompt.UTF8String,
-      seq_len,
-      [callback](const std::string& token) { callback(@(token.c_str())); });
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-      return NO;
-    }
-  }
-  return YES;
-}
-
-- (void)stop {
-  _runner->stop();
-}
-
-#pragma mark - ExecuTorchLogSink
-
-- (void)logWithLevel:(ExecuTorchLogLevel)level
-           timestamp:(NSTimeInterval)timestamp
-            filename:(NSString*)filename
-                line:(NSUInteger)line
-             message:(NSString*)message {
-  NSUInteger totalSeconds = (NSUInteger)timestamp;
-  NSUInteger hours = (totalSeconds / 3600) % 24;
-  NSUInteger minutes = (totalSeconds / 60) % 60;
-  NSUInteger seconds = totalSeconds % 60;
-  NSUInteger microseconds = (timestamp - totalSeconds) * 1000000;
-  NSLog(
-      @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s",
-      (char)level,
-      hours,
-      minutes,
-      seconds,
-      microseconds,
-      filename.UTF8String,
-      line,
-      message.UTF8String);
-}
-
-@end
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h
new file mode 100644
index 00000000000..4214a9ad390
--- /dev/null
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#import <UIKit/UIKit.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+FOUNDATION_EXPORT NSErrorDomain const LLaVARunnerErrorDomain;
+
+@interface LLaVARunner : NSObject
+
+- (instancetype)initWithModelPath:(NSString*)filePath
+                    tokenizerPath:(NSString*)tokenizerPath;
+- (BOOL)isLoaded;
+- (BOOL)loadWithError:(NSError**)error;
+- (BOOL)generate:(void*)imageBuffer
+                width:(CGFloat)width
+               height:(CGFloat)height
+               prompt:(NSString*)prompt
+       sequenceLength:(NSInteger)seq_len
+    withTokenCallback:(nullable void (^)(NSString*))callback
+                error:(NSError**)error;
+- (void)stop;
+
++ (instancetype)new NS_UNAVAILABLE;
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm
new file mode 100644
index 00000000000..8864e45f52b
--- /dev/null
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#import "LLaVARunner.h"
+
+#import <ExecuTorch/ExecuTorchLog.h>
+#import <executorch/examples/models/llava/runner/llava_runner.h>
+
+using executorch::extension::llm::Image;
+using executorch::runtime::Error;
+
+NSErrorDomain const LLaVARunnerErrorDomain = @"LLaVARunnerErrorDomain";
+
+@interface LLaVARunner ()<ExecuTorchLogSink>
+@end
+
+@implementation LLaVARunner {
+  std::unique_ptr<example::LlavaRunner> _runner;
+}
+
+- (instancetype)initWithModelPath:(NSString*)modelPath
+                    tokenizerPath:(NSString*)tokenizerPath {
+  self = [super init];
+  if (self) {
+    [ExecuTorchLog.sharedLog addSink:self];
+    _runner = std::make_unique<example::LlavaRunner>(
+        modelPath.UTF8String, tokenizerPath.UTF8String);
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [ExecuTorchLog.sharedLog removeSink:self];
+}
+
+- (BOOL)isLoaded {
+  return _runner->is_loaded();
+}
+
+- (BOOL)loadWithError:(NSError**)error {
+  const auto status = _runner->load();
+  if (status != Error::Ok) {
+    if (error) {
+      *error = [NSError errorWithDomain:LLaVARunnerErrorDomain
+                                   code:(NSInteger)status
+                               userInfo:nil];
+    }
+    return NO;
+  }
+  return YES;
+}
+
+- (BOOL)generate:(void*)imageBuffer
+                width:(CGFloat)width
+               height:(CGFloat)height
+               prompt:(NSString*)prompt
+       sequenceLength:(NSInteger)seq_len
+    withTokenCallback:(nullable void (^)(NSString*))callback
+                error:(NSError**)error {
+  const auto* data = static_cast<uint8_t*>(imageBuffer);
+  const auto status = _runner->generate(
+      {Image{
+          std::vector<uint8_t>(
+              data, data + (int32_t)width * (int32_t)height * 3),
+          (int32_t)width,
+          (int32_t)height,
+          3}},
+      prompt.UTF8String,
+      seq_len,
+      [callback](const std::string& token) { callback(@(token.c_str())); });
+  if (status != Error::Ok) {
+    if (error) {
+      *error = [NSError errorWithDomain:LLaVARunnerErrorDomain
+                                   code:(NSInteger)status
+                               userInfo:nil];
+      return NO;
+    }
+  }
+  return YES;
+}
+
+- (void)stop {
+  _runner->stop();
+}
+
+#pragma mark - ExecuTorchLogSink
+
+- (void)logWithLevel:(ExecuTorchLogLevel)level
+           timestamp:(NSTimeInterval)timestamp
+            filename:(NSString*)filename
+                line:(NSUInteger)line
+             message:(NSString*)message {
+  NSUInteger totalSeconds = (NSUInteger)timestamp;
+  NSUInteger hours = (totalSeconds / 3600) % 24;
+  NSUInteger minutes = (totalSeconds / 60) % 60;
+  NSUInteger seconds = totalSeconds % 60;
+  NSUInteger microseconds = (timestamp - totalSeconds) * 1000000;
+  NSLog(
+      @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s",
+      (char)level,
+      hours,
+      minutes,
+      seconds,
+      microseconds,
+      filename.UTF8String,
+      line,
+      message.UTF8String);
+}
+
+@end