pytorch · metascroy · Jan 15, 2026
@@ -220,6 +220,14 @@ if(APPLE)
     target_link_libraries(coremldelegate PRIVATE libprotobuf-lite)
   endif()
 
+  # Add nlohmann_json include directory (header-only library) Define
+  # JSON_NOEXCEPTION since coremldelegate is compiled with -fno-exceptions
+  target_include_directories(
+    coremldelegate
+    PRIVATE ${PROJECT_SOURCE_DIR}/third-party/json/single_include
+  )
+  target_compile_definitions(coremldelegate PRIVATE JSON_NOEXCEPTION)
+
   target_link_libraries(
     coremldelegate
     PUBLIC coreml_util coreml_inmemoryfs

@@ -204,8 +204,26 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
     if (self) {
         _mlModel = mlModel;
         _asset = asset;
-        _orderedInputNames = [orderedInputNames copy];
-        _orderedOutputNames = [orderedOutputNames copy];
+
+        // Use provided ordered names, or derive from model description as fallback
+        if (orderedInputNames != nil) {
+            _orderedInputNames = [orderedInputNames copy];
+        } else {
+            // Derive input names from the model's description in sorted order for determinism
+            NSArray<NSString *> *inputKeys = mlModel.modelDescription.inputDescriptionsByName.allKeys;
+            NSArray<NSString *> *sortedInputKeys = [inputKeys sortedArrayUsingSelector:@selector(compare:)];
+            _orderedInputNames = [NSMutableOrderedSet orderedSetWithArray:sortedInputKeys];
+        }
+
+        if (orderedOutputNames != nil) {
+            _orderedOutputNames = [orderedOutputNames copy];
+        } else {
+            // Derive output names from the model's description in sorted order for determinism
+            NSArray<NSString *> *outputKeys = mlModel.modelDescription.outputDescriptionsByName.allKeys;
+            NSArray<NSString *> *sortedOutputKeys = [outputKeys sortedArrayUsingSelector:@selector(compare:)];
+            _orderedOutputNames = [NSMutableOrderedSet orderedSetWithArray:sortedOutputKeys];
+        }
+
         _cache = [[NSCache alloc] init];
         _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
         _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
@@ -234,6 +252,15 @@ - (NSString *)identifier {
         BOOL lCopyData = copyData;
         NSString *argName = [nameEnumerator nextObject];
         MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
+
+        if (constraint == nil) {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "No constraint found for arg '%@'. Model may have mismatched input/output names.",
+                                          argName);
+            return nil;
+        }
+
         const auto& layout = arg.layout();
         auto dataType = to_ml_multiarray_data_type(layout.dataType());
         MLMultiArray *multiArrayArg = nil;

@@ -26,20 +26,6 @@
 
         return result;
     }
-
-    ETCoreMLModel * _Nullable get_model_from_asset(ETCoreMLAsset *asset,
-                                                   MLModelConfiguration *configuration,
-                                                   const executorchcoreml::ModelMetadata& metadata,
-                                                   NSError * __autoreleasing *error) {
-        NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
-        NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
-        ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:asset
-                                                      configuration:configuration
-                                                  orderedInputNames:orderedInputNames
-                                                 orderedOutputNames:orderedOutputNames
-                                                              error:error];
-        return model;
-    }
 } // namespace
 
 @implementation ETCoreMLModelLoader
@@ -48,15 +34,22 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
                                           configuration:(MLModelConfiguration *)configuration
                                                metadata:(const executorchcoreml::ModelMetadata&)metadata
                                                 error:(NSError * __autoreleasing *)error {
-    NSError *localError = nil;
-    ETCoreMLModel *model = (compiledAsset != nil) ? get_model_from_asset(compiledAsset, configuration, metadata, &localError) : nil;
-    if (model) {
-        return model;
+    if (compiledAsset == nil) {
+        return nil;
     }
-    if (error) {
-        *error = localError;
-    }
-    return nil;
+
+    // Use the metadata's ordered input/output names.
+    // For multifunction models, the caller should load the per-method metadata
+    // which contains the correct input/output names for that method.
+    NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
+    NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
+
+    ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset
+                                                  configuration:configuration
+                                              orderedInputNames:orderedInputNames
+                                             orderedOutputNames:orderedOutputNames
+                                                          error:error];
+    return model;
 }
 
 

@@ -50,6 +50,23 @@ __attribute__((objc_subclassing_restricted))
                        configuration:(MLModelConfiguration*)configuration
                                error:(NSError* __autoreleasing*)error;
 
+/// Loads the model from the AOT  data with an optional method name for cache differentiation.
+///
+/// The data is the AOT blob stored in the executorch Program. The method first parses the model
+/// metadata stored in the blob and extracts the identifier. If a methodName is provided, it is
+/// appended to the identifier to create separate cache entries for different ExecuTorch methods
+/// that may share the same underlying partition but have different input shapes.
+///
+/// @param data The AOT blob data.
+/// @param configuration The model configuration that will be used to load the model.
+/// @param methodName Optional method name (e.g., "forward", "prefill") for cache key differentiation.
+/// @param error   On failure, error is filled with the failure information.
+/// @retval An opaque handle that points to the loaded model.
+- (ModelHandle*)loadModelFromAOTData:(NSData*)data
+                       configuration:(MLModelConfiguration*)configuration
+                          methodName:(nullable NSString*)methodName
+                               error:(NSError* __autoreleasing*)error;
+
 /// Executes the loaded model.
 ///
 /// @param handle The handle to the loaded model.

@@ -211,6 +211,39 @@ void set_outputs(std::vector<executorchcoreml::MultiArray>& outputs,
     return std::nullopt;
 }
 
+std::optional<ModelMetadata> get_model_metadata_for_method(const inmemoryfs::InMemoryFileSystem *inMemoryFS,
+                                                           NSString *methodName) {
+    // Load the metadata.json file
+    auto metadata_opt = get_model_metadata(inMemoryFS);
+    if (!metadata_opt.has_value()) {
+        return std::nullopt;
+    }
+
+    ModelMetadata& metadata = metadata_opt.value();
+
+    // If this is a multifunction model and a method name is provided,
+    // populate the top-level input_names/output_names from the method's metadata
+    if (metadata.is_multifunction() && methodName != nil && methodName.length > 0) {
+        std::string method_name_str = [methodName UTF8String];
+        const MethodMetadata* method_metadata = metadata.get_method_metadata(method_name_str);
+        if (method_metadata != nullptr) {
+            metadata.input_names = method_metadata->input_names;
+            metadata.output_names = method_metadata->output_names;
+        } else {
+            // Method not found - fall back to default method if available
+            if (!metadata.default_method.empty()) {
+                const MethodMetadata* default_metadata = metadata.get_method_metadata(metadata.default_method);
+                if (default_metadata != nullptr) {
+                    metadata.input_names = default_metadata->input_names;
+                    metadata.output_names = default_metadata->output_names;
+                }
+            }
+        }
+    }
+
+    return metadata;
+}
+
 NSOrderedSet<NSString *> *get_ordered_set(const std::vector<std::string>& values) {
     NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()];
     for (const auto& value : values) {
@@ -285,8 +318,13 @@ void set_outputs(std::vector<executorchcoreml::MultiArray>& outputs,
 
 ETCoreMLModel * _Nullable get_model_from_asset(ETCoreMLAsset *asset,
                                                MLModelConfiguration *configuration,
-                                               const ModelMetadata& metadata,
+                                               const executorchcoreml::ModelMetadata& metadata,
                                                NSError * __autoreleasing *error) {
+    // Always use the metadata's ordered input/output names for consistency.
+    // The pytree flatten order during export determines the correct input order,
+    // and metadata captures this order.
+    // For multifunction models, all functions share the same input/output names
+    // (they differ only in shapes, which are handled by multiArrayConstraint).
     NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
     NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
     ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:asset
@@ -322,6 +360,29 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) {
     identifier.append(to_string(compute_units));
 }
 
+void add_function_name(std::string& identifier, MLModelConfiguration *configuration) {
+    // NOTE: For multifunction CoreML models, we intentionally do NOT include the
+    // function name in the cache key. The multifunction model should be compiled
+    // only once since it contains ALL functions. The functionName setting on
+    // MLModelConfiguration determines which function is invoked at runtime when
+    // creating the MLModel from the cached compiled files.
+    //
+    // Previously this added "_func_{name}" to the identifier, which caused
+    // redundant compilations (once per function). Now we compile once and reuse.
+    (void)identifier;
+    (void)configuration;
+}
+
+void add_method_name(std::string& identifier, NSString *methodName) {
+    // NOTE: For multifunction CoreML models, we intentionally do NOT include the
+    // method name in the cache key. The multifunction model should be compiled
+    // only once and shared across all methods/functions. The functionName setting
+    // on MLModelConfiguration determines which function is invoked at runtime,
+    // but the compiled model is the same for all functions.
+    (void)identifier;
+    (void)methodName;
+}
+
 #if ET_EVENT_TRACER_ENABLED
 ETCoreMLAsset * _Nullable make_asset(NSURL *url,
                                      NSString *identifier,
@@ -612,8 +673,9 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
 
 
 - (nullable id<ETCoreMLModelExecutor>)_modelExecutorWithAOTData:(NSData *)data
-                                                  configuration:(MLModelConfiguration *)configuration
-                                                          error:(NSError * __autoreleasing *)error {
+                                                   configuration:(MLModelConfiguration *)configuration
+                                                      methodName:(nullable NSString *)methodName
+                                                           error:(NSError * __autoreleasing *)error {
     using namespace inmemoryfs;
 
     auto buffer = MemoryBuffer::make_unowned(const_cast<void *>(data.bytes), data.length);
@@ -625,7 +687,9 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
         return nil;
     }
 
-    std::optional<ModelMetadata> metadata = ::get_model_metadata(inMemoryFS.get());
+    // For multifunction models, try to load method-specific metadata first.
+    // This ensures we get the correct input/output names for this method.
+    std::optional<ModelMetadata> metadata = ::get_model_metadata_for_method(inMemoryFS.get(), methodName);
     if (!metadata) {
         ETCoreMLLogErrorAndSetNSError(error,
                                       ETCoreMLErrorCorruptedMetadata,
@@ -634,7 +698,32 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
     }
 
     auto metadataValue = metadata.value();
+
+    // For multifunction CoreML models (ML Programs with multiple functions),
+    // we need to set functionName to select the correct function within the model.
+    // However, legacy single-function models require functionName to be nil.
+    // The metadata's "methods" field indicates if this is a multifunction model.
+    if (metadataValue.is_multifunction() && methodName != nil) {
+#if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0)
+        if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+            configuration.functionName = methodName;
+        } else {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+.");
+            return nil;
+        }
+#else
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+ SDK to build.");
+        return nil;
+#endif
+    }
+
     add_compute_unit(metadataValue.identifier, configuration.computeUnits);
+    add_function_name(metadataValue.identifier, configuration);
+    add_method_name(metadataValue.identifier, methodName);
     NSString *identifier = @(metadataValue.identifier.c_str());
     // If there are multiple calls to load the same model, we only want to compile it once.
     __block id<ETCoreMLModelExecutor> executor = nil;
@@ -665,8 +754,19 @@ - (dispatch_queue_t)queueForLoadingModelWithIdentifier:(NSString *)identifier {
 - (ModelHandle *)loadModelFromAOTData:(NSData*)data
                         configuration:(MLModelConfiguration*)configuration
                                 error:(NSError* __autoreleasing*)error {
+    return [self loadModelFromAOTData:data
+                        configuration:configuration
+                           methodName:nil
+                                error:error];
+}
+
+- (ModelHandle *)loadModelFromAOTData:(NSData*)data
+                        configuration:(MLModelConfiguration*)configuration
+                           methodName:(nullable NSString*)methodName
+                                error:(NSError* __autoreleasing*)error {
     id<ETCoreMLModelExecutor> executor = [self _modelExecutorWithAOTData:data
                                                            configuration:configuration
+                                                              methodName:methodName
                                                                    error:error];
     {
         os_unfair_lock_lock(&_lock);

@@ -72,9 +72,12 @@ class BackendDelegate {
     ///
     /// @param processed The AOT blob.
     /// @param specs The specs at the time of compilation.
+    /// @param method_name The method name for multifunction model support (optional, may be nullptr).
     /// @retval An opaque handle to the initialized blob or `nullptr` if the
     /// initialization failed.
-    virtual Handle* init(Buffer processed, const std::unordered_map<std::string, Buffer>& specs) const noexcept = 0;
+    virtual Handle* init(Buffer processed,
+                         const std::unordered_map<std::string, Buffer>& specs,
+                         const char* method_name = nullptr) const noexcept = 0;
 
     /// Must execute the CoreML model with the specified handle.
     ///