From 2d9c20c2721a35381d7c57a8121fa32048ac826d Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 26 Jun 2025 17:34:04 -0700 Subject: [PATCH] Split LLaMA and LLaVA runners. (#12036) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/12036 . Reviewed By: larryliu0820 Differential Revision: D77407510 --- .../LLaMA/LLaMA.xcodeproj/project.pbxproj | 8 ++ .../LLaMA/LLaMA/Application/ContentView.swift | 14 +-- .../LLaMARunner/Exported/LLaMARunner.h | 23 ---- .../LLaMARunner/Exported/LLaMARunner.mm | 101 --------------- .../LLaMARunner/Exported/LLaVARunner.h | 35 ++++++ .../LLaMARunner/Exported/LLaVARunner.mm | 115 ++++++++++++++++++ 6 files changed, 165 insertions(+), 131 deletions(-) create mode 100644 examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h create mode 100644 examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj index 042f3903c67..79ae1788ac8 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj @@ -46,6 +46,8 @@ 03D03DA82C7823620088D6A7 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DA62C7823620088D6A7 /* text_prefiller.h */; }; 03D03DAB2C7823830088D6A7 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */; }; 03D03DAC2C7823830088D6A7 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */; }; + 03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 03D151B72E0E0908007A38BE /* LLaVARunner.mm */; }; + 03D151B92E0E0908007A38BE /* LLaVARunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D151B62E0E0908007A38BE /* LLaVARunner.h */; }; 26A6A4282C8A3769005A761E /* ImagePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 26A6A4272C8A3769005A761E /* ImagePicker.swift */; }; 306A713D2DC1DC0F00936B1F /* token_decoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 306A713C2DC1DC0F00936B1F /* token_decoder.h */; }; 306A713E2DC1DC0F00936B1F /* regex.h in Headers */ = {isa = PBXBuildFile; fileRef = 306A71392DC1DC0F00936B1F /* regex.h */; }; @@ -132,6 +134,8 @@ 03D03DA62C7823620088D6A7 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = ""; }; 03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = ""; }; 03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = ""; }; + 03D151B62E0E0908007A38BE /* LLaVARunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LLaVARunner.h; sourceTree = ""; }; + 03D151B72E0E0908007A38BE /* LLaVARunner.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaVARunner.mm; sourceTree = ""; }; 26A6A4272C8A3769005A761E /* ImagePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImagePicker.swift; sourceTree = ""; }; 306A71352DC1DC0F00936B1F /* hf_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = hf_tokenizer.h; sourceTree = ""; }; 306A71362DC1DC0F00936B1F /* pcre2_regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pcre2_regex.h; sourceTree = ""; }; @@ -245,6 +249,8 @@ 0324D69B2BAACB7C00DEF36F /* Exported */ = { isa = PBXGroup; children = ( + 03D151B62E0E0908007A38BE /* LLaVARunner.h */, + 03D151B72E0E0908007A38BE /* LLaVARunner.mm */, 0324D6992BAACB7C00DEF36F /* LLaMARunner.h */, 0324D69A2BAACB7C00DEF36F /* LLaMARunner.mm */, ); @@ -403,6 +409,7 @@ 306A71422DC1DC0F00936B1F /* pre_tokenizer.h in Headers */, 306A71432DC1DC0F00936B1F /* pcre2_regex.h in Headers */, 306A71442DC1DC0F00936B1F /* std_regex.h in Headers */, + 03D151B92E0E0908007A38BE /* LLaVARunner.h in Headers */, 03729F122BB2042B00152F2E /* sampler.h in Headers */, 03729F0C2BB203B300152F2E /* util.h in Headers */, F292B1012D88B20C00BE6839 /* llama_tiktoken.h in Headers */, @@ -574,6 +581,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */, 03729EE12BB1F93800152F2E /* LLaMARunner.mm in Sources */, 0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */, F292B1022D88B20C00BE6839 /* llama_tiktoken.cpp in Sources */, diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift index 52f18f7a5ea..c6b8b71dfc1 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift @@ -12,7 +12,7 @@ import UniformTypeIdentifiers import LLaMARunner class RunnerHolder: ObservableObject { - var runner: Runner? + var llamaRunner: LLaMARunner? var llavaRunner: LLaVARunner? } @@ -87,7 +87,7 @@ struct ContentView: View { case llava case qwen3 case phi4 - + static func fromPath(_ path: String) -> ModelType { let filename = (path as NSString).lastPathComponent.lowercased() if filename.hasPrefix("llama") { @@ -347,7 +347,7 @@ struct ContentView: View { switch modelType { case .llama, .qwen3, .phi4: - runnerHolder.runner = runnerHolder.runner ?? Runner(modelPath: modelPath, tokenizerPath: tokenizerPath) + runnerHolder.llamaRunner = runnerHolder.llamaRunner ?? LLaMARunner(modelPath: modelPath, tokenizerPath: tokenizerPath) case .llava: runnerHolder.llavaRunner = runnerHolder.llavaRunner ?? LLaVARunner(modelPath: modelPath, tokenizerPath: tokenizerPath) } @@ -355,7 +355,7 @@ struct ContentView: View { guard !shouldStopGenerating else { return } switch modelType { case .llama, .qwen3, .phi4: - if let runner = runnerHolder.runner, !runner.isLoaded() { + if let runner = runnerHolder.llamaRunner, !runner.isLoaded() { var error: Error? let startLoadTime = Date() do { @@ -481,7 +481,7 @@ struct ContentView: View { prompt = String(format: Constants.phi4PromptTemplate, text) } - try runnerHolder.runner?.generate(prompt, sequenceLength: seq_len) { token in + try runnerHolder.llamaRunner?.generate(prompt, sequenceLength: seq_len) { token in if token != prompt { if token == "<|eot_id|>" { @@ -534,7 +534,7 @@ struct ContentView: View { } } if shouldStopGenerating { - runnerHolder.runner?.stop() + runnerHolder.llamaRunner?.stop() } } } @@ -577,7 +577,7 @@ struct ContentView: View { return } runnerQueue.async { - runnerHolder.runner = nil + runnerHolder.llamaRunner = nil runnerHolder.llavaRunner = nil } switch pickerType { diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h index 1290a75d975..e10606c8c4d 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h @@ -11,9 +11,7 @@ NS_ASSUME_NONNULL_BEGIN FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain; -FOUNDATION_EXPORT NSErrorDomain const LLaVARunnerErrorDomain; -NS_SWIFT_NAME(Runner) @interface LLaMARunner : NSObject - (instancetype)initWithModelPath:(NSString*)filePath @@ -31,25 +29,4 @@ NS_SWIFT_NAME(Runner) @end -NS_SWIFT_NAME(LLaVARunner) -@interface LLaVARunner : NSObject - -- (instancetype)initWithModelPath:(NSString*)filePath - tokenizerPath:(NSString*)tokenizerPath; -- (BOOL)isLoaded; -- (BOOL)loadWithError:(NSError**)error; -- (BOOL)generate:(void*)imageBuffer - width:(CGFloat)width - height:(CGFloat)height - prompt:(NSString*)prompt - sequenceLength:(NSInteger)seq_len - withTokenCallback:(nullable void (^)(NSString*))callback - error:(NSError**)error; -- (void)stop; - -+ (instancetype)new NS_UNAVAILABLE; -- (instancetype)init NS_UNAVAILABLE; - -@end - NS_ASSUME_NONNULL_END diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm index fc7f440d999..e6304b21e08 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm @@ -10,15 +10,12 @@ #import #import -#import using executorch::extension::llm::GenerationConfig; -using executorch::extension::llm::Image; using executorch::extension::llm::TextLLMRunner; using executorch::runtime::Error; NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain"; -NSErrorDomain const LLaVARunnerErrorDomain = @"LLaVARunnerErrorDomain"; @interface LLaMARunner () @end @@ -110,101 +107,3 @@ - (void)logWithLevel:(ExecuTorchLogLevel)level } @end - -@interface LLaVARunner () -@end - -@implementation LLaVARunner { - std::unique_ptr _runner; -} - -- (instancetype)initWithModelPath:(NSString*)modelPath - tokenizerPath:(NSString*)tokenizerPath { - self = [super init]; - if (self) { - [ExecuTorchLog.sharedLog addSink:self]; - _runner = std::make_unique( - modelPath.UTF8String, tokenizerPath.UTF8String); - } - return self; -} - -- (void)dealloc { - [ExecuTorchLog.sharedLog removeSink:self]; -} - -- (BOOL)isLoaded { - return _runner->is_loaded(); -} - -- (BOOL)loadWithError:(NSError**)error { - const auto status = _runner->load(); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaVARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - } - return NO; - } - return YES; -} - -- (BOOL)generate:(void*)imageBuffer - width:(CGFloat)width - height:(CGFloat)height - prompt:(NSString*)prompt - sequenceLength:(NSInteger)seq_len - withTokenCallback:(nullable void (^)(NSString*))callback - error:(NSError**)error { - const auto* data = static_cast(imageBuffer); - const auto status = _runner->generate( - {Image{ - std::vector( - data, data + (int32_t)width * (int32_t)height * 3), - (int32_t)width, - (int32_t)height, - 3}}, - prompt.UTF8String, - seq_len, - [callback](const std::string& token) { callback(@(token.c_str())); }); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaMARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - return NO; - } - } - return YES; -} - -- (void)stop { - _runner->stop(); -} - -#pragma mark - ExecuTorchLogSink - -- (void)logWithLevel:(ExecuTorchLogLevel)level - timestamp:(NSTimeInterval)timestamp - filename:(NSString*)filename - line:(NSUInteger)line - message:(NSString*)message { - NSUInteger totalSeconds = (NSUInteger)timestamp; - NSUInteger hours = (totalSeconds / 3600) % 24; - NSUInteger minutes = (totalSeconds / 60) % 60; - NSUInteger seconds = totalSeconds % 60; - NSUInteger microseconds = (timestamp - totalSeconds) * 1000000; - NSLog( - @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s", - (char)level, - hours, - minutes, - seconds, - microseconds, - filename.UTF8String, - line, - message.UTF8String); -} - -@end diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h new file mode 100644 index 00000000000..4214a9ad390 --- /dev/null +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +FOUNDATION_EXPORT NSErrorDomain const LLaVARunnerErrorDomain; + +@interface LLaVARunner : NSObject + +- (instancetype)initWithModelPath:(NSString*)filePath + tokenizerPath:(NSString*)tokenizerPath; +- (BOOL)isLoaded; +- (BOOL)loadWithError:(NSError**)error; +- (BOOL)generate:(void*)imageBuffer + width:(CGFloat)width + height:(CGFloat)height + prompt:(NSString*)prompt + sequenceLength:(NSInteger)seq_len + withTokenCallback:(nullable void (^)(NSString*))callback + error:(NSError**)error; +- (void)stop; + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm new file mode 100644 index 00000000000..8864e45f52b --- /dev/null +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaVARunner.mm @@ -0,0 +1,115 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "LLaVARunner.h" + +#import +#import + +using executorch::extension::llm::Image; +using executorch::runtime::Error; + +NSErrorDomain const LLaVARunnerErrorDomain = @"LLaVARunnerErrorDomain"; + +@interface LLaVARunner () +@end + +@implementation LLaVARunner { + std::unique_ptr _runner; +} + +- (instancetype)initWithModelPath:(NSString*)modelPath + tokenizerPath:(NSString*)tokenizerPath { + self = [super init]; + if (self) { + [ExecuTorchLog.sharedLog addSink:self]; + _runner = std::make_unique( + modelPath.UTF8String, tokenizerPath.UTF8String); + } + return self; +} + +- (void)dealloc { + [ExecuTorchLog.sharedLog removeSink:self]; +} + +- (BOOL)isLoaded { + return _runner->is_loaded(); +} + +- (BOOL)loadWithError:(NSError**)error { + const auto status = _runner->load(); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:LLaVARunnerErrorDomain + code:(NSInteger)status + userInfo:nil]; + } + return NO; + } + return YES; +} + +- (BOOL)generate:(void*)imageBuffer + width:(CGFloat)width + height:(CGFloat)height + prompt:(NSString*)prompt + sequenceLength:(NSInteger)seq_len + withTokenCallback:(nullable void (^)(NSString*))callback + error:(NSError**)error { + const auto* data = static_cast(imageBuffer); + const auto status = _runner->generate( + {Image{ + std::vector( + data, data + (int32_t)width * (int32_t)height * 3), + (int32_t)width, + (int32_t)height, + 3}}, + prompt.UTF8String, + seq_len, + [callback](const std::string& token) { callback(@(token.c_str())); }); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:LLaVARunnerErrorDomain + code:(NSInteger)status + userInfo:nil]; + return NO; + } + } + return YES; +} + +- (void)stop { + _runner->stop(); +} + +#pragma mark - ExecuTorchLogSink + +- (void)logWithLevel:(ExecuTorchLogLevel)level + timestamp:(NSTimeInterval)timestamp + filename:(NSString*)filename + line:(NSUInteger)line + message:(NSString*)message { + NSUInteger totalSeconds = (NSUInteger)timestamp; + NSUInteger hours = (totalSeconds / 3600) % 24; + NSUInteger minutes = (totalSeconds / 60) % 60; + NSUInteger seconds = totalSeconds % 60; + NSUInteger microseconds = (timestamp - totalSeconds) * 1000000; + NSLog( + @"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s", + (char)level, + hours, + minutes, + seconds, + microseconds, + filename.UTF8String, + line, + message.UTF8String); +} + +@end