Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/

#import "ExecuTorchLLMConfig.h"
#import "ExecuTorchLLMError.h"
#import "ExecuTorchLLMMultimodalRunner.h"
#import "ExecuTorchLLMTextRunner.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

/**
A configuration object for text generation.

This class wraps the underlying C++ GenerationConfig so that default
values and future fields remain a single source of truth in C++.
*/
NS_SWIFT_NAME(Config)
__attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMConfig : NSObject<NSCopying>

/** Whether to echo the input prompt in the output. */
@property(nonatomic, getter=isEchoEnabled) BOOL echoEnabled;

/** Maximum number of new tokens to generate. */
@property(nonatomic) NSInteger maximumNewTokens;

/** Whether this is a warmup run. */
@property(nonatomic, getter=isWarming) BOOL warming;

/** Maximum total sequence length. */
@property(nonatomic) NSInteger sequenceLength;

/** Temperature for sampling. */
@property(nonatomic) double temperature;

/** Number of BOS tokens to add. */
@property(nonatomic) NSInteger bosCount;

/** Number of EOS tokens to add. */
@property(nonatomic) NSInteger eosCount;

/**
Initializes a configuration and invokes the block to mutate it.

@param block A block that receives the newly initialized configuration.
@return An initialized ExecuTorchLLMConfig instance.
*/
- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block
NS_SWIFT_NAME(init(_:));

@end

NS_ASSUME_NONNULL_END
115 changes: 115 additions & 0 deletions extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import "ExecuTorchLLMConfig.h"

#import <executorch/extension/llm/runner/irunner.h>

using namespace executorch::extension;

@interface ExecuTorchLLMConfig ()

- (const llm::GenerationConfig &)nativeConfig;

@end

@implementation ExecuTorchLLMConfig {
std::unique_ptr<llm::GenerationConfig> _config;
}

@dynamic echoEnabled;
@dynamic maximumNewTokens;
@dynamic warming;
@dynamic sequenceLength;
@dynamic temperature;
@dynamic bosCount;
@dynamic eosCount;

- (instancetype)init {
if (self = [super init]) {
_config = std::make_unique<llm::GenerationConfig>();
}
return self;
}

- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block {
if (self = [self init]) {
if (block) {
block(self);
}
}
return self;
}

- (id)copyWithZone:(NSZone *)zone {
ExecuTorchLLMConfig *config = [[[self class] allocWithZone:zone] init];
*config->_config = *_config;
return config;
}

- (const llm::GenerationConfig &)nativeConfig {
return *_config;
}

- (BOOL)echoEnabled {
return _config->echo;
}

- (void)setEchoEnabled:(BOOL)echoEnabled {
_config->echo = echoEnabled;
}

- (NSInteger)maximumNewTokens {
return _config->max_new_tokens;
}

- (void)setMaximumNewTokens:(NSInteger)maximumNewTokens {
_config->max_new_tokens = (int32_t)maximumNewTokens;
}

- (BOOL)warming {
return _config->warming;
}

- (void)setWarming:(BOOL)warming {
_config->warming = warming;
}

- (NSInteger)sequenceLength {
return _config->seq_len;
}

- (void)setSequenceLength:(NSInteger)sequenceLength {
_config->seq_len = (int32_t)sequenceLength;
}

- (double)temperature {
return _config->temperature;
}

- (void)setTemperature:(double)temperature {
_config->temperature = (float)temperature;
}

- (NSInteger)bosCount {
return _config->num_bos;
}

- (void)setBosCount:(NSInteger)bosCount {
_config->num_bos = (int32_t)bosCount;
}

- (NSInteger)eosCount {
return _config->num_eos;
}

- (void)setEosCount:(NSInteger)eosCount {
_config->num_eos = (int32_t)eosCount;
}

@end
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/

#import <Foundation/Foundation.h>
#import "ExecuTorchLLMConfig.h"

NS_ASSUME_NONNULL_BEGIN

Expand All @@ -26,6 +26,7 @@ typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) {
*/
NS_SWIFT_NAME(Image)
__attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMImage : NSObject<NSCopying>

/**
Expand All @@ -44,8 +45,11 @@ __attribute__((deprecated("This API is experimental.")))
NS_DESIGNATED_INITIALIZER;

@property(nonatomic, readonly) NSData *data;

@property(nonatomic, readonly) NSInteger width;

@property(nonatomic, readonly) NSInteger height;

@property(nonatomic, readonly) NSInteger channels;

+ (instancetype)new NS_UNAVAILABLE;
Expand All @@ -58,6 +62,7 @@ __attribute__((deprecated("This API is experimental.")))
*/
NS_SWIFT_NAME(Audio)
__attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMAudio : NSObject<NSCopying>

/**
Expand All @@ -76,8 +81,11 @@ __attribute__((deprecated("This API is experimental.")))
NS_DESIGNATED_INITIALIZER;

@property(nonatomic, readonly) NSData *data;

@property(nonatomic, readonly) NSInteger batchSize;

@property(nonatomic, readonly) NSInteger bins;

@property(nonatomic, readonly) NSInteger frames;

+ (instancetype)new NS_UNAVAILABLE;
Expand All @@ -91,6 +99,7 @@ __attribute__((deprecated("This API is experimental.")))
*/
NS_SWIFT_NAME(MultimodalInput)
__attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMMultimodalInput : NSObject<NSCopying>

/**
Expand Down Expand Up @@ -124,8 +133,11 @@ __attribute__((deprecated("This API is experimental.")))
NS_RETURNS_RETAINED;

@property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type;

@property(nonatomic, readonly, nullable) NSString *text;

@property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image;

@property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio;

+ (instancetype)new NS_UNAVAILABLE;
Expand All @@ -134,12 +146,13 @@ __attribute__((deprecated("This API is experimental.")))
@end

/**
A wrapper class for the C++ llm::MultimodalLLMRunner that provides
A wrapper class for the C++ llm::MultimodalRunner that provides
Objective-C APIs to load models, manage tokenization, accept mixed
input modalities, generate text sequences, and stop the runner.
*/
NS_SWIFT_NAME(MultimodalRunner)
__attribute__((deprecated("This API is experimental.")))
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchLLMMultimodalRunner : NSObject

/**
Expand Down Expand Up @@ -169,29 +182,32 @@ __attribute__((deprecated("This API is experimental.")))
- (BOOL)loadWithError:(NSError **)error;

/**
Generates text given a list of multimodal inputs, up to a specified sequence length.
Invokes the provided callback for each generated token.
Generates text given a list of multimodal inputs. A default configuration
is created and passed to the configuration block for in-place mutation.

@param inputs An ordered array of multimodal inputs.
@param seq_len The maximum number of tokens to generate.
@param callback A block called with each generated token as an NSString.
@param error On failure, populated with an NSError explaining the issue.
The token callback, if provided, is invoked for each generated token.

@param inputs An ordered array of multimodal inputs.
@param config A configuration object.
@param callback A block called with each generated token as an NSString.
@param error On failure, populated with an NSError explaining the issue.
@return YES if generation completes successfully, NO if an error occurred.
*/
- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
sequenceLength:(NSInteger)seq_len
config:(ExecuTorchLLMConfig *)config
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error;
error:(NSError **)error
NS_SWIFT_NAME(generate(_:_:tokenCallback:));

/**
Stop producing new tokens and terminate the current generation process.
*/
- (void)stop;

/**
Remove the prefilled tokens from the KV cache and resets the start position
to 0. It also clears the stats for previous runs.
*/
Remove the prefilled tokens from the KV cache and reset the start position
to 0. It also clears the stats for previous runs.
*/
- (void)reset;

+ (instancetype)new NS_UNAVAILABLE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
using namespace executorch::extension;
using namespace executorch::runtime;

@interface ExecuTorchLLMConfig ()

- (const llm::GenerationConfig &)nativeConfig;

@end

@implementation ExecuTorchLLMImage

- (instancetype)initWithData:(NSData *)data
Expand Down Expand Up @@ -157,7 +163,7 @@ - (BOOL)loadWithError:(NSError**)error {
}

- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
sequenceLength:(NSInteger)seq_len
config:(ExecuTorchLLMConfig *)config
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error {
if (![self loadWithError:error]) {
Expand Down Expand Up @@ -192,7 +198,7 @@ - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
}
auto status = _runner->generate(
std::move(nativeInputs),
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
config.nativeConfig,
[callback](const std::string& token) {
if (callback) {
callback(@(token.c_str()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/

#import <Foundation/Foundation.h>
#import "ExecuTorchLLMConfig.h"

NS_ASSUME_NONNULL_BEGIN

Expand Down Expand Up @@ -49,29 +49,32 @@ __attribute__((deprecated("This API is experimental.")))
- (BOOL)loadWithError:(NSError **)error;

/**
Generates text given an input prompt, up to a specified sequence length.
Invokes the provided callback for each generated token.
Generates text given an input prompt. A default configuration
is created and passed to the configuration block for in-place mutation.

@param prompt The initial text prompt to generate from.
@param seq_len The maximum number of tokens to generate.
@param callback A block called with each generated token as an NSString.
@param error On failure, populated with an NSError explaining the issue.
The token callback, if provided, is invoked for each generated token.

@param prompt The initial text prompt to generate from.
@param config A configuration object.
@param callback A block called with each generated token as an NSString.
@param error On failure, populated with an NSError explaining the issue.
@return YES if generation completes successfully, NO if an error occurred.
*/
- (BOOL)generate:(NSString *)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error;
config:(ExecuTorchLLMConfig *)config
withTokenCallback:(nullable void (^)(NSString *token))callback
error:(NSError **)error
NS_SWIFT_NAME(generate(_:_:tokenCallback:));

/**
Stop producing new tokens and terminate the current generation process.
*/
- (void)stop;

/**
Remove the prefilled tokens from the KV cache and resets the start position
to 0. It also clears the stats for previous runs.
*/
Remove the prefilled tokens from the KV cache and reset the start position
to 0. It also clears the stats for previous runs.
*/
- (void)reset;

+ (instancetype)new NS_UNAVAILABLE;
Expand Down
Loading
Loading