Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions backends/apple/coreml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ if(APPLE)
target_link_libraries(coremldelegate PRIVATE libprotobuf-lite)
endif()

# Add nlohmann_json include directory (header-only library) Define
# JSON_NOEXCEPTION since coremldelegate is compiled with -fno-exceptions
target_include_directories(
coremldelegate
PRIVATE ${PROJECT_SOURCE_DIR}/third-party/json/single_include
)
target_compile_definitions(coremldelegate PRIVATE JSON_NOEXCEPTION)

target_link_libraries(
coremldelegate
PUBLIC coreml_util coreml_inmemoryfs
Expand Down
486 changes: 423 additions & 63 deletions backends/apple/coreml/compiler/coreml_preprocess.py

Large diffs are not rendered by default.

31 changes: 29 additions & 2 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,26 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
if (self) {
_mlModel = mlModel;
_asset = asset;
_orderedInputNames = [orderedInputNames copy];
_orderedOutputNames = [orderedOutputNames copy];

// Use provided ordered names, or derive from model description as fallback
if (orderedInputNames != nil) {
_orderedInputNames = [orderedInputNames copy];
} else {
// Derive input names from the model's description in sorted order for determinism
NSArray<NSString *> *inputKeys = mlModel.modelDescription.inputDescriptionsByName.allKeys;
NSArray<NSString *> *sortedInputKeys = [inputKeys sortedArrayUsingSelector:@selector(compare:)];
_orderedInputNames = [NSMutableOrderedSet orderedSetWithArray:sortedInputKeys];
}

if (orderedOutputNames != nil) {
_orderedOutputNames = [orderedOutputNames copy];
} else {
// Derive output names from the model's description in sorted order for determinism
NSArray<NSString *> *outputKeys = mlModel.modelDescription.outputDescriptionsByName.allKeys;
NSArray<NSString *> *sortedOutputKeys = [outputKeys sortedArrayUsingSelector:@selector(compare:)];
_orderedOutputNames = [NSMutableOrderedSet orderedSetWithArray:sortedOutputKeys];
}

_cache = [[NSCache alloc] init];
_inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
_outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
Expand Down Expand Up @@ -234,6 +252,15 @@ - (NSString *)identifier {
BOOL lCopyData = copyData;
NSString *argName = [nameEnumerator nextObject];
MLMultiArrayConstraint *constraint = argConstraintsByName[argName];

if (constraint == nil) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedModel,
"No constraint found for arg '%@'. Model may have mismatched input/output names.",
argName);
return nil;
}

const auto& layout = arg.layout();
auto dataType = to_ml_multiarray_data_type(layout.dataType());
MLMultiArray *multiArrayArg = nil;
Expand Down
37 changes: 15 additions & 22 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,6 @@

return result;
}

ETCoreMLModel * _Nullable get_model_from_asset(ETCoreMLAsset *asset,
MLModelConfiguration *configuration,
const executorchcoreml::ModelMetadata& metadata,
NSError * __autoreleasing *error) {
NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:asset
configuration:configuration
orderedInputNames:orderedInputNames
orderedOutputNames:orderedOutputNames
error:error];
return model;
}
} // namespace

@implementation ETCoreMLModelLoader
Expand All @@ -48,15 +34,22 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
configuration:(MLModelConfiguration *)configuration
metadata:(const executorchcoreml::ModelMetadata&)metadata
error:(NSError * __autoreleasing *)error {
NSError *localError = nil;
ETCoreMLModel *model = (compiledAsset != nil) ? get_model_from_asset(compiledAsset, configuration, metadata, &localError) : nil;
if (model) {
return model;
if (compiledAsset == nil) {
return nil;
}
if (error) {
*error = localError;
}
return nil;

// Use the metadata's ordered input/output names.
// For multifunction models, the caller should load the per-method metadata
// which contains the correct input/output names for that method.
NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);

ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset
configuration:configuration
orderedInputNames:orderedInputNames
orderedOutputNames:orderedOutputNames
error:error];
return model;
}


Expand Down
17 changes: 17 additions & 0 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,23 @@ __attribute__((objc_subclassing_restricted))
configuration:(MLModelConfiguration*)configuration
error:(NSError* __autoreleasing*)error;

/// Loads the model from the AOT data with an optional method name for cache differentiation.
///
/// The data is the AOT blob stored in the executorch Program. The method first parses the model
/// metadata stored in the blob and extracts the identifier. If a methodName is provided, it is
/// appended to the identifier to create separate cache entries for different ExecuTorch methods
/// that may share the same underlying partition but have different input shapes.
///
/// @param data The AOT blob data.
/// @param configuration The model configuration that will be used to load the model.
/// @param methodName Optional method name (e.g., "forward", "prefill") for cache key differentiation.
/// @param error On failure, error is filled with the failure information.
/// @retval An opaque handle that points to the loaded model.
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
configuration:(MLModelConfiguration*)configuration
methodName:(nullable NSString*)methodName
error:(NSError* __autoreleasing*)error;

/// Executes the loaded model.
///
/// @param handle The handle to the loaded model.
Expand Down
108 changes: 104 additions & 4 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,39 @@ void set_outputs(std::vector<executorchcoreml::MultiArray>& outputs,
return std::nullopt;
}

std::optional<ModelMetadata> get_model_metadata_for_method(const inmemoryfs::InMemoryFileSystem *inMemoryFS,
NSString *methodName) {
// Load the metadata.json file
auto metadata_opt = get_model_metadata(inMemoryFS);
if (!metadata_opt.has_value()) {
return std::nullopt;
}

ModelMetadata& metadata = metadata_opt.value();

// If this is a multifunction model and a method name is provided,
// populate the top-level input_names/output_names from the method's metadata
if (metadata.is_multifunction() && methodName != nil && methodName.length > 0) {
std::string method_name_str = [methodName UTF8String];
const MethodMetadata* method_metadata = metadata.get_method_metadata(method_name_str);
if (method_metadata != nullptr) {
metadata.input_names = method_metadata->input_names;
metadata.output_names = method_metadata->output_names;
} else {
// Method not found - fall back to default method if available
if (!metadata.default_method.empty()) {
const MethodMetadata* default_metadata = metadata.get_method_metadata(metadata.default_method);
if (default_metadata != nullptr) {
metadata.input_names = default_metadata->input_names;
metadata.output_names = default_metadata->output_names;
}
}
}
}

return metadata;
}

NSOrderedSet<NSString *> *get_ordered_set(const std::vector<std::string>& values) {
NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()];
for (const auto& value : values) {
Expand Down Expand Up @@ -285,8 +318,13 @@ void set_outputs(std::vector<executorchcoreml::MultiArray>& outputs,

ETCoreMLModel * _Nullable get_model_from_asset(ETCoreMLAsset *asset,
MLModelConfiguration *configuration,
const ModelMetadata& metadata,
const executorchcoreml::ModelMetadata& metadata,
NSError * __autoreleasing *error) {
// Always use the metadata's ordered input/output names for consistency.
// The pytree flatten order during export determines the correct input order,
// and metadata captures this order.
// For multifunction models, all functions share the same input/output names
// (they differ only in shapes, which are handled by multiArrayConstraint).
NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:asset
Expand Down Expand Up @@ -322,6 +360,29 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) {
identifier.append(to_string(compute_units));
}

void add_function_name(std::string& identifier, MLModelConfiguration *configuration) {
// NOTE: For multifunction CoreML models, we intentionally do NOT include the
// function name in the cache key. The multifunction model should be compiled
// only once since it contains ALL functions. The functionName setting on
// MLModelConfiguration determines which function is invoked at runtime when
// creating the MLModel from the cached compiled files.
//
// Previously this added "_func_{name}" to the identifier, which caused
// redundant compilations (once per function). Now we compile once and reuse.
(void)identifier;
(void)configuration;
}

void add_method_name(std::string& identifier, NSString *methodName) {
// NOTE: For multifunction CoreML models, we intentionally do NOT include the
// method name in the cache key. The multifunction model should be compiled
// only once and shared across all methods/functions. The functionName setting
// on MLModelConfiguration determines which function is invoked at runtime,
// but the compiled model is the same for all functions.
(void)identifier;
(void)methodName;
}

#if ET_EVENT_TRACER_ENABLED
ETCoreMLAsset * _Nullable make_asset(NSURL *url,
NSString *identifier,
Expand Down Expand Up @@ -612,8 +673,9 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat


- (nullable id<ETCoreMLModelExecutor>)_modelExecutorWithAOTData:(NSData *)data
configuration:(MLModelConfiguration *)configuration
error:(NSError * __autoreleasing *)error {
configuration:(MLModelConfiguration *)configuration
methodName:(nullable NSString *)methodName
error:(NSError * __autoreleasing *)error {
using namespace inmemoryfs;

auto buffer = MemoryBuffer::make_unowned(const_cast<void *>(data.bytes), data.length);
Expand All @@ -625,7 +687,9 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
return nil;
}

std::optional<ModelMetadata> metadata = ::get_model_metadata(inMemoryFS.get());
// For multifunction models, try to load method-specific metadata first.
// This ensures we get the correct input/output names for this method.
std::optional<ModelMetadata> metadata = ::get_model_metadata_for_method(inMemoryFS.get(), methodName);
if (!metadata) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedMetadata,
Expand All @@ -634,7 +698,32 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
}

auto metadataValue = metadata.value();

// For multifunction CoreML models (ML Programs with multiple functions),
// we need to set functionName to select the correct function within the model.
// However, legacy single-function models require functionName to be nil.
// The metadata's "methods" field indicates if this is a multifunction model.
if (metadataValue.is_multifunction() && methodName != nil) {
#if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0)
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
configuration.functionName = methodName;
} else {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedModel,
"Multifunction CoreML models require iOS 18.0+ / macOS 15.0+.");
return nil;
}
#else
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedModel,
"Multifunction CoreML models require iOS 18.0+ / macOS 15.0+ SDK to build.");
return nil;
#endif
}

add_compute_unit(metadataValue.identifier, configuration.computeUnits);
add_function_name(metadataValue.identifier, configuration);
add_method_name(metadataValue.identifier, methodName);
NSString *identifier = @(metadataValue.identifier.c_str());
// If there are multiple calls to load the same model, we only want to compile it once.
__block id<ETCoreMLModelExecutor> executor = nil;
Expand Down Expand Up @@ -665,8 +754,19 @@ - (dispatch_queue_t)queueForLoadingModelWithIdentifier:(NSString *)identifier {
- (ModelHandle *)loadModelFromAOTData:(NSData*)data
configuration:(MLModelConfiguration*)configuration
error:(NSError* __autoreleasing*)error {
return [self loadModelFromAOTData:data
configuration:configuration
methodName:nil
error:error];
}

- (ModelHandle *)loadModelFromAOTData:(NSData*)data
configuration:(MLModelConfiguration*)configuration
methodName:(nullable NSString*)methodName
error:(NSError* __autoreleasing*)error {
id<ETCoreMLModelExecutor> executor = [self _modelExecutorWithAOTData:data
configuration:configuration
methodName:methodName
error:error];
{
os_unfair_lock_lock(&_lock);
Expand Down
5 changes: 4 additions & 1 deletion backends/apple/coreml/runtime/delegate/backend_delegate.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,12 @@ class BackendDelegate {
///
/// @param processed The AOT blob.
/// @param specs The specs at the time of compilation.
/// @param method_name The method name for multifunction model support (optional, may be nullptr).
/// @retval An opaque handle to the initialized blob or `nullptr` if the
/// initialization failed.
virtual Handle* init(Buffer processed, const std::unordered_map<std::string, Buffer>& specs) const noexcept = 0;
virtual Handle* init(Buffer processed,
const std::unordered_map<std::string, Buffer>& specs,
const char* method_name = nullptr) const noexcept = 0;

/// Must execute the CoreML model with the specified handle.
///
Expand Down
Loading
Loading