diff --git a/docs/source/llm/run-on-ios.md b/docs/source/llm/run-on-ios.md index 3348a03feb3..c4994dd0e06 100644 --- a/docs/source/llm/run-on-ios.md +++ b/docs/source/llm/run-on-ios.md @@ -24,7 +24,7 @@ import ExecuTorchLLM ### TextLLMRunner -The `ExecuTorchTextLLMRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution. +The `ExecuTorchLLMTextRunner` class (bridged to Swift as `TextLLMRunner`) provides a simple Objective-C/Swift interface for loading a text-generation model, configuring its tokenizer with custom special tokens, generating token streams, and stopping execution. This API is experimental and subject to change. #### Initialization @@ -38,7 +38,7 @@ NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"llama-3.2-ins NSString *tokenizerPath = [[NSBundle mainBundle] pathForResource:@"tokenizer" ofType:@"model"]; NSArray *specialTokens = @[ @"<|bos|>", @"<|eos|>" ]; -ExecuTorchTextLLMRunner *runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath +ExecuTorchLLMTextRunner *runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath tokenizerPath:tokenizerPath specialTokens:specialTokens]; ``` diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm index a4725246ca0..c3179f150c3 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm @@ -10,7 +10,7 @@ #import #if BUILD_WITH_XCODE -#import "ExecuTorchTextLLMRunner.h" +#import "ExecuTorchLLMTextRunner.h" #else #import #endif @@ -20,7 +20,7 @@ @interface LLaMARunner () @end @implementation LLaMARunner { - ExecuTorchTextLLMRunner *_runner; + ExecuTorchLLMTextRunner *_runner; } - (instancetype)initWithModelPath:(NSString *)modelPath @@ -33,7 +33,7 @@ - (instancetype)initWithModelPath:(NSString *)modelPath for (const auto &token : *tokens) { [specialTokens addObject:(NSString *)@(token.c_str())]; } - _runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath + _runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath tokenizerPath:tokenizerPath specialTokens:specialTokens]; } diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm index fbf1a6c5889..0f509f2809c 100644 --- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm +++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm @@ -87,7 +87,7 @@ @implementation LLaMATests for (NSUInteger index = 2; specialTokens.count < 256; ++index) { [specialTokens addObject:[NSString stringWithFormat:@"<|reserved_special_token_%zu|>", index]]; } - auto __block runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath + auto __block runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath tokenizerPath:tokenizerPath specialTokens:specialTokens]; NSError *error; diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h index 42e08d18030..11cdaf63d0b 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h @@ -6,4 +6,6 @@ * LICENSE file in the root directory of this source tree. */ -#import "ExecuTorchTextLLMRunner.h" +#import "ExecuTorchLLMError.h" +#import "ExecuTorchLLMMultimodalRunner.h" +#import "ExecuTorchLLMTextRunner.h" diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.h new file mode 100644 index 00000000000..13b51a0652b --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +FOUNDATION_EXPORT NSErrorDomain const ExecuTorchLLMErrorDomain NS_SWIFT_NAME(ErrorDomain); + +NS_ASSUME_NONNULL_END diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.m b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.m new file mode 100644 index 00000000000..52a9ac8c9da --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMError.m @@ -0,0 +1,11 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchLLMError.h" + +NSErrorDomain const ExecuTorchLLMErrorDomain = @"org.pytorch.executorch.llm.error"; diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h new file mode 100644 index 00000000000..747286b9ec3 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + Types of multimodal inputs supported by the ExecuTorch LLM APIs. + Must be in sync with the C++ enum in llm/runner/multimodal_input.h +*/ +typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) { + ExecuTorchLLMMultimodalInputTypeText, + ExecuTorchLLMMultimodalInputTypeImage, + ExecuTorchLLMMultimodalInputTypeAudio, + ExecuTorchLLMMultimodalInputTypeUnsupported, +} NS_SWIFT_NAME(MultimodalInputType); + +/** + A container for image inputs used with multimodal generation APIs. +*/ +NS_SWIFT_NAME(Image) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchLLMImage : NSObject + +/** + Initializes an image container with the provided data and dimensions. + + @param data Raw image bytes. + @param width Image width in pixels. + @param height Image height in pixels. + @param channels Number of channels. + @return An initialized ExecuTorchLLMImage instance. +*/ +- (instancetype)initWithData:(NSData *)data + width:(NSInteger)width + height:(NSInteger)height + channels:(NSInteger)channels + NS_DESIGNATED_INITIALIZER; + +@property(nonatomic, readonly) NSData *data; +@property(nonatomic, readonly) NSInteger width; +@property(nonatomic, readonly) NSInteger height; +@property(nonatomic, readonly) NSInteger channels; + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +/** + A container for pre-processed audio features. +*/ +NS_SWIFT_NAME(Audio) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchLLMAudio : NSObject + +/** + Initializes an audio features container with the provided data and shape. + + @param data Feature buffer. + @param batchSize Batch dimension size. + @param bins Number of frequency bins. + @param frames Number of time frames. + @return An initialized ExecuTorchLLMAudio instance. +*/ +- (instancetype)initWithData:(NSData *)data + batchSize:(NSInteger)batchSize + bins:(NSInteger)bins + frames:(NSInteger)frames + NS_DESIGNATED_INITIALIZER; + +@property(nonatomic, readonly) NSData *data; +@property(nonatomic, readonly) NSInteger batchSize; +@property(nonatomic, readonly) NSInteger bins; +@property(nonatomic, readonly) NSInteger frames; + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +/** + A tagged container for a single multimodal input item used by + multimodal generation APIs. +*/ +NS_SWIFT_NAME(MultimodalInput) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchLLMMultimodalInput : NSObject + +/** + Creates a text input. + + @param text The UTF-8 text to provide as input. + @return A retained ExecuTorchLLMMultimodalInput instance of type Text. +*/ ++ (instancetype)inputWithText:(NSString *)text + NS_SWIFT_NAME(init(_:)) + NS_RETURNS_RETAINED; + +/** + Creates an image input. + + @param image The image payload to provide as input. + @return A retained ExecuTorchLLMMultimodalInput instance of type Image. +*/ ++ (instancetype)inputWithImage:(ExecuTorchLLMImage *)image + NS_SWIFT_NAME(init(_:)) + NS_RETURNS_RETAINED; + +/** + Creates an audio-features input. + + @param audio The pre-processed audio features to provide as input. + @return A retained ExecuTorchLLMMultimodalInput instance of type Audio. +*/ ++ (instancetype)inputWithAudio:(ExecuTorchLLMAudio *)audio + NS_SWIFT_NAME(init(audio:)) + NS_RETURNS_RETAINED; + +@property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type; +@property(nonatomic, readonly, nullable) NSString *text; +@property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image; +@property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio; + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +/** + A wrapper class for the C++ llm::MultimodalLLMRunner that provides + Objective-C APIs to load models, manage tokenization, accept mixed + input modalities, generate text sequences, and stop the runner. +*/ +NS_SWIFT_NAME(MultimodalRunner) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchLLMMultimodalRunner : NSObject + +/** + Initializes a multimodal LLM runner with the given model and tokenizer paths. + + @param modelPath File system path to the serialized model. + @param tokenizerPath File system path to the tokenizer data. + @return An initialized ExecuTorchLLMMultimodalRunner instance. +*/ +- (instancetype)initWithModelPath:(NSString *)modelPath + tokenizerPath:(NSString *)tokenizerPath + NS_DESIGNATED_INITIALIZER; + +/** + Checks whether the underlying model has been successfully loaded. + + @return YES if the model is loaded, NO otherwise. +*/ +- (BOOL)isLoaded; + +/** + Loads the model into memory, returning an error if loading fails. + + @param error On failure, populated with an NSError explaining the issue. + @return YES if loading succeeds, NO if an error occurred. +*/ +- (BOOL)loadWithError:(NSError **)error; + +/** + Generates text given a list of multimodal inputs, up to a specified sequence length. + Invokes the provided callback for each generated token. + + @param inputs An ordered array of multimodal inputs. + @param seq_len The maximum number of tokens to generate. + @param callback A block called with each generated token as an NSString. + @param error On failure, populated with an NSError explaining the issue. + @return YES if generation completes successfully, NO if an error occurred. +*/ +- (BOOL)generate:(NSArray *)inputs + sequenceLength:(NSInteger)seq_len +withTokenCallback:(nullable void (^)(NSString *))callback + error:(NSError **)error; + +/** + Stops any ongoing generation and cleans up internal resources. +*/ +- (void)stop; + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm new file mode 100644 index 00000000000..dcc5dc98806 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm @@ -0,0 +1,219 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchLLMMultimodalRunner.h" + +#import "ExecuTorchLLMError.h" + +#import + +using namespace executorch::extension; +using namespace executorch::runtime; + +@implementation ExecuTorchLLMImage + +- (instancetype)initWithData:(NSData *)data + width:(NSInteger)width + height:(NSInteger)height + channels:(NSInteger)channels { + if (self = [super init]) { + _data = [data copy]; + _width = width; + _height = height; + _channels = channels; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + return self; +} + +@end + +@implementation ExecuTorchLLMAudio + +- (instancetype)initWithData:(NSData *)data + batchSize:(NSInteger)batchSize + bins:(NSInteger)bins + frames:(NSInteger)frames { + if (self = [super init]) { + _data = [data copy]; + _batchSize = batchSize; + _bins = bins; + _frames = frames; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + return self; +} + +@end + +@interface ExecuTorchLLMMultimodalInput () + +- (instancetype)initWithType:(ExecuTorchLLMMultimodalInputType)type + text:(NSString * __nullable)text + image:(ExecuTorchLLMImage * __nullable)image + audio:(ExecuTorchLLMAudio * __nullable)audio + NS_DESIGNATED_INITIALIZER; + +@end + +@implementation ExecuTorchLLMMultimodalInput + ++ (instancetype)inputWithText:(NSString *)text { + return [[self alloc] initWithType:ExecuTorchLLMMultimodalInputTypeText + text:text + image:nil + audio:nil]; +} + ++ (instancetype)inputWithImage:(ExecuTorchLLMImage *)image { + return [[self alloc] initWithType:ExecuTorchLLMMultimodalInputTypeImage + text:nil + image:image + audio:nil]; +} + ++ (instancetype)inputWithAudio:(ExecuTorchLLMAudio *)audio { + return [[self alloc] initWithType:ExecuTorchLLMMultimodalInputTypeAudio + text:nil + image:nil + audio:audio]; +} + +- (instancetype)initWithType:(ExecuTorchLLMMultimodalInputType)type + text:(NSString * __nullable)text + image:(ExecuTorchLLMImage * __nullable)image + audio:(ExecuTorchLLMAudio * __nullable)audio { + if (self = [super init]) { + _type = type; + _text = [text copy]; + _image = image; + _audio = audio; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + return self; +} + +@end + +@implementation ExecuTorchLLMMultimodalRunner { + NSString *_modelPath; + NSString *_tokenizerPath; + std::unique_ptr _runner; +} + +- (instancetype)initWithModelPath:(NSString*)modelPath + tokenizerPath:(NSString*)tokenizerPath { + self = [super init]; + if (self) { + _modelPath = [modelPath copy]; + _tokenizerPath = [tokenizerPath copy]; + } + return self; +} + +- (BOOL)isLoaded { + return _runner && _runner->is_loaded(); +} + +- (BOOL)loadWithError:(NSError**)error { + if (![self isLoaded]) { + _runner = llm::create_multimodal_runner( + _modelPath.UTF8String, + llm::load_tokenizer(_tokenizerPath.UTF8String) + ); + if (!_runner) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain + code:-1 + userInfo:@{NSLocalizedDescriptionKey: @"Failed to create runner"}]; + } + return NO; + } + } + auto status = _runner->load(); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain + code:(NSInteger)status + userInfo:nil]; + } + return NO; + } + return YES; +} + +- (BOOL)generate:(NSArray *)inputs + sequenceLength:(NSInteger)seq_len +withTokenCallback:(nullable void (^)(NSString *))callback + error:(NSError **)error { + if (![self loadWithError:error]) { + return NO; + } + std::vector nativeInputs; + for (ExecuTorchLLMMultimodalInput *input in inputs) { + switch (input.type) { + case ExecuTorchLLMMultimodalInputTypeText: + nativeInputs.emplace_back(llm::MultimodalInput(input.text.UTF8String)); + break; + case ExecuTorchLLMMultimodalInputTypeImage: { + ExecuTorchLLMImage *image = input.image; + std::vector data((uint8_t *)image.data.bytes, (uint8_t *)image.data.bytes + image.data.length); + nativeInputs.emplace_back(llm::MultimodalInput(llm::Image{ + .data = std::move(data), + .width = (int32_t)image.width, + .height = (int32_t)image.height, + .channels = (int32_t)image.channels + })); + break; + } + default: { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain + code:-2 + userInfo:@{NSLocalizedDescriptionKey: @"Failed to create input"}]; + } + return NO; + } + } + } + auto status = _runner->generate( + std::move(nativeInputs), + llm::GenerationConfig{.seq_len = static_cast(seq_len)}, + [callback](const std::string& token) { + if (callback) { + callback(@(token.c_str())); + } + } + ); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain + code:(NSInteger)status + userInfo:nil]; + } + return NO; + } + return YES; +} + +- (void)stop { + if (_runner) { + _runner->stop(); + } +} + +@end diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h similarity index 87% rename from extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h rename to extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h index 74518605e90..b2c628fadf6 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h @@ -10,16 +10,14 @@ NS_ASSUME_NONNULL_BEGIN -FOUNDATION_EXPORT NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain; - /** A wrapper class for the C++ llm::TextLLMRunner that provides Objective-C APIs to load models, manage tokenization with custom special tokens, generate text sequences, and stop the runner. */ -NS_SWIFT_NAME(TextLLMRunner) +NS_SWIFT_NAME(TextRunner) __attribute__((deprecated("This API is experimental."))) -@interface ExecuTorchTextLLMRunner : NSObject +@interface ExecuTorchLLMTextRunner : NSObject /** Initializes a text LLM runner with the given model and tokenizer paths, @@ -28,11 +26,12 @@ __attribute__((deprecated("This API is experimental."))) @param modelPath File system path to the serialized model. @param tokenizerPath File system path to the tokenizer data. @param tokens An array of NSString special tokens to use during tokenization. - @return An initialized ExecuTorchTextLLMRunner instance. + @return An initialized ExecuTorchLLMTextRunner instance. */ - (instancetype)initWithModelPath:(NSString *)modelPath tokenizerPath:(NSString *)tokenizerPath - specialTokens:(NSArray *)tokens; + specialTokens:(NSArray *)tokens + NS_DESIGNATED_INITIALIZER; /** Checks whether the underlying model has been successfully loaded. @@ -69,6 +68,9 @@ withTokenCallback:(nullable void (^)(NSString *))callback */ - (void)stop; ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + @end NS_ASSUME_NONNULL_END diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm similarity index 84% rename from extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm rename to extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm index dd57d25dcde..ac50b000704 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm @@ -6,16 +6,16 @@ * LICENSE file in the root directory of this source tree. */ -#import "ExecuTorchTextLLMRunner.h" +#import "ExecuTorchLLMTextRunner.h" + +#import "ExecuTorchLLMError.h" #import using namespace executorch::extension; using namespace executorch::runtime; -NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain = @"ExecuTorchTextLLMRunnerErrorDomain"; - -@implementation ExecuTorchTextLLMRunner { +@implementation ExecuTorchLLMTextRunner { NSString *_modelPath; NSString *_tokenizerPath; std::unique_ptr> _specialTokens; @@ -49,7 +49,7 @@ - (BOOL)loadWithError:(NSError**)error { ); if (!_runner) { if (error) { - *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain code:-1 userInfo:@{NSLocalizedDescriptionKey: @"Failed to create runner"}]; } @@ -59,7 +59,7 @@ - (BOOL)loadWithError:(NSError**)error { auto status = _runner->load(); if (status != Error::Ok) { if (error) { - *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain code:(NSInteger)status userInfo:nil]; } @@ -79,12 +79,14 @@ - (BOOL)generate:(NSString*)prompt prompt.UTF8String, llm::GenerationConfig{.seq_len = static_cast(seq_len)}, [callback](const std::string& token) { - if (callback) callback(@(token.c_str())); + if (callback) { + callback(@(token.c_str())); + } } ); if (status != Error::Ok) { if (error) { - *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + *error = [NSError errorWithDomain:ExecuTorchLLMErrorDomain code:(NSInteger)status userInfo:nil]; } diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift new file mode 100644 index 00000000000..55bcbb0f407 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift @@ -0,0 +1,33 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +import ExecuTorchLLM +import XCTest + +class MultimodalRunnerTest: XCTestCase { + func test() { + let bundle = Bundle(for: type(of: self)) + guard let modelPath = bundle.path(forResource: "llava", ofType: "pte"), + let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "bin") else { + XCTFail("Couldn't find model or tokenizer files") + return + } + return + let runner = MultimodalRunner(modelPath: modelPath, tokenizerPath: tokenizerPath) + var text = "" + + do { + try runner.generate([MultimodalInput("hello")], sequenceLength: 2) { token in + text += token + } + } catch { + XCTFail("Failed to generate text with error \(error)") + } + XCTAssertEqual("hello,", text.lowercased()) + } +} diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/TextLLMRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift similarity index 89% rename from extension/llm/apple/ExecuTorchLLM/__tests__/TextLLMRunnerTest.swift rename to extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift index 030da22295d..42dbac8ae30 100644 --- a/extension/llm/apple/ExecuTorchLLM/__tests__/TextLLMRunnerTest.swift +++ b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift @@ -35,7 +35,7 @@ struct SpecialTokens { } } -class TextLLMRunnerTest: XCTestCase { +class TextRunnerTest: XCTestCase { func test() { let bundle = Bundle(for: type(of: self)) guard let modelPath = bundle.path(forResource: "llama3_2-1B", ofType: "pte"), @@ -43,7 +43,7 @@ class TextLLMRunnerTest: XCTestCase { XCTFail("Couldn't find model or tokenizer files") return } - let runner = TextLLMRunner(modelPath: modelPath, tokenizerPath: tokenizerPath, specialTokens: SpecialTokens.defaultSpecialTokens()) + let runner = TextRunner(modelPath: modelPath, tokenizerPath: tokenizerPath, specialTokens: SpecialTokens.defaultSpecialTokens()) var text = "" do {