diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h index 11cdaf63d0b..cef90617a11 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#import "ExecuTorchLLMConfig.h" #import "ExecuTorchLLMError.h" #import "ExecuTorchLLMMultimodalRunner.h" #import "ExecuTorchLLMTextRunner.h" diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h new file mode 100644 index 00000000000..5ecc9a0f004 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + A configuration object for text generation. + + This class wraps the underlying C++ GenerationConfig so that default + values and future fields remain a single source of truth in C++. +*/ +NS_SWIFT_NAME(Config) +__attribute__((deprecated("This API is experimental."))) +__attribute__((objc_subclassing_restricted)) +@interface ExecuTorchLLMConfig : NSObject + +/** Whether to echo the input prompt in the output. */ +@property(nonatomic, getter=isEchoEnabled) BOOL echoEnabled; + +/** Maximum number of new tokens to generate. */ +@property(nonatomic) NSInteger maximumNewTokens; + +/** Whether this is a warmup run. */ +@property(nonatomic, getter=isWarming) BOOL warming; + +/** Maximum total sequence length. */ +@property(nonatomic) NSInteger sequenceLength; + +/** Temperature for sampling. */ +@property(nonatomic) double temperature; + +/** Number of BOS tokens to add. */ +@property(nonatomic) NSInteger bosCount; + +/** Number of EOS tokens to add. */ +@property(nonatomic) NSInteger eosCount; + +/** + Initializes a configuration and invokes the block to mutate it. + + @param block A block that receives the newly initialized configuration. + @return An initialized ExecuTorchLLMConfig instance. +*/ +- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block + NS_SWIFT_NAME(init(_:)); + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm new file mode 100644 index 00000000000..911f66e7d65 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm @@ -0,0 +1,115 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchLLMConfig.h" + +#import + +using namespace executorch::extension; + +@interface ExecuTorchLLMConfig () + +- (const llm::GenerationConfig &)nativeConfig; + +@end + +@implementation ExecuTorchLLMConfig { + std::unique_ptr _config; +} + +@dynamic echoEnabled; +@dynamic maximumNewTokens; +@dynamic warming; +@dynamic sequenceLength; +@dynamic temperature; +@dynamic bosCount; +@dynamic eosCount; + +- (instancetype)init { + if (self = [super init]) { + _config = std::make_unique(); + } + return self; +} + +- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block { + if (self = [self init]) { + if (block) { + block(self); + } + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + ExecuTorchLLMConfig *config = [[[self class] allocWithZone:zone] init]; + *config->_config = *_config; + return config; +} + +- (const llm::GenerationConfig &)nativeConfig { + return *_config; +} + +- (BOOL)echoEnabled { + return _config->echo; +} + +- (void)setEchoEnabled:(BOOL)echoEnabled { + _config->echo = echoEnabled; +} + +- (NSInteger)maximumNewTokens { + return _config->max_new_tokens; +} + +- (void)setMaximumNewTokens:(NSInteger)maximumNewTokens { + _config->max_new_tokens = (int32_t)maximumNewTokens; +} + +- (BOOL)warming { + return _config->warming; +} + +- (void)setWarming:(BOOL)warming { + _config->warming = warming; +} + +- (NSInteger)sequenceLength { + return _config->seq_len; +} + +- (void)setSequenceLength:(NSInteger)sequenceLength { + _config->seq_len = (int32_t)sequenceLength; +} + +- (double)temperature { + return _config->temperature; +} + +- (void)setTemperature:(double)temperature { + _config->temperature = (float)temperature; +} + +- (NSInteger)bosCount { + return _config->num_bos; +} + +- (void)setBosCount:(NSInteger)bosCount { + _config->num_bos = (int32_t)bosCount; +} + +- (NSInteger)eosCount { + return _config->num_eos; +} + +- (void)setEosCount:(NSInteger)eosCount { + _config->num_eos = (int32_t)eosCount; +} + +@end diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h index 3121259921a..3eb7226ba76 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#import +#import "ExecuTorchLLMConfig.h" NS_ASSUME_NONNULL_BEGIN @@ -26,6 +26,7 @@ typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) { */ NS_SWIFT_NAME(Image) __attribute__((deprecated("This API is experimental."))) +__attribute__((objc_subclassing_restricted)) @interface ExecuTorchLLMImage : NSObject /** @@ -44,8 +45,11 @@ __attribute__((deprecated("This API is experimental."))) NS_DESIGNATED_INITIALIZER; @property(nonatomic, readonly) NSData *data; + @property(nonatomic, readonly) NSInteger width; + @property(nonatomic, readonly) NSInteger height; + @property(nonatomic, readonly) NSInteger channels; + (instancetype)new NS_UNAVAILABLE; @@ -58,6 +62,7 @@ __attribute__((deprecated("This API is experimental."))) */ NS_SWIFT_NAME(Audio) __attribute__((deprecated("This API is experimental."))) +__attribute__((objc_subclassing_restricted)) @interface ExecuTorchLLMAudio : NSObject /** @@ -76,8 +81,11 @@ __attribute__((deprecated("This API is experimental."))) NS_DESIGNATED_INITIALIZER; @property(nonatomic, readonly) NSData *data; + @property(nonatomic, readonly) NSInteger batchSize; + @property(nonatomic, readonly) NSInteger bins; + @property(nonatomic, readonly) NSInteger frames; + (instancetype)new NS_UNAVAILABLE; @@ -91,6 +99,7 @@ __attribute__((deprecated("This API is experimental."))) */ NS_SWIFT_NAME(MultimodalInput) __attribute__((deprecated("This API is experimental."))) +__attribute__((objc_subclassing_restricted)) @interface ExecuTorchLLMMultimodalInput : NSObject /** @@ -124,8 +133,11 @@ __attribute__((deprecated("This API is experimental."))) NS_RETURNS_RETAINED; @property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type; + @property(nonatomic, readonly, nullable) NSString *text; + @property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image; + @property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio; + (instancetype)new NS_UNAVAILABLE; @@ -134,12 +146,13 @@ __attribute__((deprecated("This API is experimental."))) @end /** - A wrapper class for the C++ llm::MultimodalLLMRunner that provides + A wrapper class for the C++ llm::MultimodalRunner that provides Objective-C APIs to load models, manage tokenization, accept mixed input modalities, generate text sequences, and stop the runner. */ NS_SWIFT_NAME(MultimodalRunner) __attribute__((deprecated("This API is experimental."))) +__attribute__((objc_subclassing_restricted)) @interface ExecuTorchLLMMultimodalRunner : NSObject /** @@ -169,19 +182,22 @@ __attribute__((deprecated("This API is experimental."))) - (BOOL)loadWithError:(NSError **)error; /** - Generates text given a list of multimodal inputs, up to a specified sequence length. - Invokes the provided callback for each generated token. + Generates text given a list of multimodal inputs. A default configuration + is created and passed to the configuration block for in-place mutation. - @param inputs An ordered array of multimodal inputs. - @param seq_len The maximum number of tokens to generate. - @param callback A block called with each generated token as an NSString. - @param error On failure, populated with an NSError explaining the issue. + The token callback, if provided, is invoked for each generated token. + + @param inputs An ordered array of multimodal inputs. + @param config A configuration object. + @param callback A block called with each generated token as an NSString. + @param error On failure, populated with an NSError explaining the issue. @return YES if generation completes successfully, NO if an error occurred. */ - (BOOL)generate:(NSArray *)inputs - sequenceLength:(NSInteger)seq_len + config:(ExecuTorchLLMConfig *)config withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error; + error:(NSError **)error + NS_SWIFT_NAME(generate(_:_:tokenCallback:)); /** Stop producing new tokens and terminate the current generation process. @@ -189,9 +205,9 @@ withTokenCallback:(nullable void (^)(NSString *))callback - (void)stop; /** - Remove the prefilled tokens from the KV cache and resets the start position - to 0. It also clears the stats for previous runs. - */ + Remove the prefilled tokens from the KV cache and reset the start position + to 0. It also clears the stats for previous runs. +*/ - (void)reset; + (instancetype)new NS_UNAVAILABLE; diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm index bdf78d3f15e..dd9b2065a26 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm @@ -15,6 +15,12 @@ using namespace executorch::extension; using namespace executorch::runtime; +@interface ExecuTorchLLMConfig () + +- (const llm::GenerationConfig &)nativeConfig; + +@end + @implementation ExecuTorchLLMImage - (instancetype)initWithData:(NSData *)data @@ -157,7 +163,7 @@ - (BOOL)loadWithError:(NSError**)error { } - (BOOL)generate:(NSArray *)inputs - sequenceLength:(NSInteger)seq_len + config:(ExecuTorchLLMConfig *)config withTokenCallback:(nullable void (^)(NSString *))callback error:(NSError **)error { if (![self loadWithError:error]) { @@ -192,7 +198,7 @@ - (BOOL)generate:(NSArray *)inputs } auto status = _runner->generate( std::move(nativeInputs), - llm::GenerationConfig{.seq_len = static_cast(seq_len)}, + config.nativeConfig, [callback](const std::string& token) { if (callback) { callback(@(token.c_str())); diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h index ca9867ebbb0..3d42c4853f1 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#import +#import "ExecuTorchLLMConfig.h" NS_ASSUME_NONNULL_BEGIN @@ -49,19 +49,22 @@ __attribute__((deprecated("This API is experimental."))) - (BOOL)loadWithError:(NSError **)error; /** - Generates text given an input prompt, up to a specified sequence length. - Invokes the provided callback for each generated token. + Generates text given an input prompt. A default configuration + is created and passed to the configuration block for in-place mutation. - @param prompt The initial text prompt to generate from. - @param seq_len The maximum number of tokens to generate. - @param callback A block called with each generated token as an NSString. - @param error On failure, populated with an NSError explaining the issue. + The token callback, if provided, is invoked for each generated token. + + @param prompt The initial text prompt to generate from. + @param config A configuration object. + @param callback A block called with each generated token as an NSString. + @param error On failure, populated with an NSError explaining the issue. @return YES if generation completes successfully, NO if an error occurred. */ - (BOOL)generate:(NSString *)prompt - sequenceLength:(NSInteger)seq_len -withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error; + config:(ExecuTorchLLMConfig *)config +withTokenCallback:(nullable void (^)(NSString *token))callback + error:(NSError **)error + NS_SWIFT_NAME(generate(_:_:tokenCallback:)); /** Stop producing new tokens and terminate the current generation process. @@ -69,9 +72,9 @@ withTokenCallback:(nullable void (^)(NSString *))callback - (void)stop; /** - Remove the prefilled tokens from the KV cache and resets the start position - to 0. It also clears the stats for previous runs. - */ + Remove the prefilled tokens from the KV cache and reset the start position + to 0. It also clears the stats for previous runs. +*/ - (void)reset; + (instancetype)new NS_UNAVAILABLE; diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm index f4516009694..6ce854a52f8 100644 --- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm @@ -15,6 +15,12 @@ using namespace executorch::extension; using namespace executorch::runtime; +@interface ExecuTorchLLMConfig () + +- (const llm::GenerationConfig &)nativeConfig; + +@end + @implementation ExecuTorchLLMTextRunner { NSString *_modelPath; NSString *_tokenizerPath; @@ -69,15 +75,15 @@ - (BOOL)loadWithError:(NSError**)error { } - (BOOL)generate:(NSString*)prompt - sequenceLength:(NSInteger)seq_len + config:(ExecuTorchLLMConfig *)config withTokenCallback:(nullable void (^)(NSString*))callback - error:(NSError**)error { + error:(NSError**)error { if (![self loadWithError:error]) { return NO; } auto status = _runner->generate( prompt.UTF8String, - llm::GenerationConfig{.seq_len = static_cast(seq_len)}, + config.nativeConfig, [callback](const std::string& token) { if (callback) { callback(@(token.c_str())); diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift index b3de9b07a9d..cdf15f12350 100644 --- a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift +++ b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift @@ -77,7 +77,9 @@ class MultimodalRunnerTest: XCTestCase { MultimodalInput(systemPrompt), MultimodalInput(uiImage.asImage()), MultimodalInput("\(userPrompt) \(assistantPrompt)"), - ], sequenceLength: sequenceLength) { token in + ], Config { + $0.sequenceLength = sequenceLength + }) { token in text += token } } catch { @@ -92,7 +94,9 @@ class MultimodalRunnerTest: XCTestCase { MultimodalInput(systemPrompt), MultimodalInput(uiImage.asImage()), MultimodalInput("\(userPrompt) \(assistantPrompt)"), - ], sequenceLength: sequenceLength) { token in + ], Config { + $0.sequenceLength = sequenceLength + }) { token in text += token } } catch { diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift index 6a91960b088..5e99af0c57f 100644 --- a/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift +++ b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift @@ -50,7 +50,9 @@ class TextRunnerTest: XCTestCase { var text = "" do { - try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in + try runner.generate(userPrompt, Config { + $0.sequenceLength = sequenceLength + }) { token in text += token } } catch { @@ -61,7 +63,9 @@ class TextRunnerTest: XCTestCase { text = "" runner.reset() do { - try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in + try runner.generate(userPrompt, Config { + $0.sequenceLength = sequenceLength + }) { token in text += token } } catch {