From 383607aed33ca3634c02343b044a466dae6e5707 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:53:02 -0700
Subject: [PATCH 01/11] Update LLMTests with new APIs

---
 .../apple/Benchmark/Tests/LLaMA/LLaMATests.mm   | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
index 0f509f2809c..104ec46e097 100644
--- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
+++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
@@ -87,22 +87,27 @@ @implementation LLaMATests
       for (NSUInteger index = 2; specialTokens.count < 256; ++index) {
         [specialTokens addObject:[NSString stringWithFormat:@"<|reserved_special_token_%zu|>", index]];
       }
-      auto __block runner = [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
-                                                                 tokenizerPath:tokenizerPath
-                                                                 specialTokens:specialTokens];
+      ExecuTorchLLMTextRunner *__block runner =
+          [[ExecuTorchLLMTextRunner alloc] initWithModelPath:modelPath
+                                               tokenizerPath:tokenizerPath
+                                               specialTokens:specialTokens];
       NSError *error;
       BOOL status = [runner loadWithError:&error];
       if (!status) {
         XCTFail("Load failed with error %zi", error.code);
         return;
       }
+      ExecuTorchLLMGenerationConfig *config =
+          [[ExecuTorchLLMGenerationConfig alloc] initWithConfigurationHandler:^(ExecuTorchLLMGenerationConfig *config) {
+            config.sequenceLength = 50;
+          }];
       TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new];
       [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ]
                             block:^{
                               tokensPerSecondMetric.tokenCount = 0;
-                              BOOL status = [runner generate:@"Once upon a time"
-                                              sequenceLength:50
-                                           withTokenCallback:^(NSString *token) {
+                              BOOL status = [runner generateWithPrompt:@"Once upon a time"
+                                                                config:config
+                                                     withTokenCallback:^(NSString *token) {
                                 tokensPerSecondMetric.tokenCount++;
                               }
                                                        error:NULL];

From c51599ef00190c8422c30d4707ec19996e3550f7 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:55:53 -0700
Subject: [PATCH 02/11] Rename generate method to generateWithPrompt

---
 .../apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
index 3d42c4853f1..66f9515eb45 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
@@ -60,9 +60,9 @@ __attribute__((deprecated("This API is experimental.")))
  @param error      On failure, populated with an NSError explaining the issue.
  @return YES if generation completes successfully, NO if an error occurred.
 */
-- (BOOL)generate:(NSString *)prompt
-           config:(ExecuTorchLLMConfig *)config
-withTokenCallback:(nullable void (^)(NSString *token))callback
+- (BOOL)generateWithPrompt:(NSString *)prompt
+                    config:(ExecuTorchLLMConfig *)config
+             tokenCallback:(nullable void (^)(NSString *token))callback
             error:(NSError **)error
     NS_SWIFT_NAME(generate(_:_:tokenCallback:));
 

From 506ad109c8c37e09c6498fc7f794fd21aaacc9b3 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:56:25 -0700
Subject: [PATCH 03/11] Rename generate method to generateWithPrompt

---
 .../ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
index 6ce854a52f8..4ea1bd921f7 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
@@ -74,10 +74,10 @@ - (BOOL)loadWithError:(NSError**)error {
   return YES;
 }
 
-- (BOOL)generate:(NSString*)prompt
-           config:(ExecuTorchLLMConfig *)config
-withTokenCallback:(nullable void (^)(NSString*))callback
-            error:(NSError**)error {
+- (BOOL)generateWithPrompt:(NSString*)prompt
+                    config:(ExecuTorchLLMConfig *)config
+             tokenCallback:(nullable void (^)(NSString*))callback
+                     error:(NSError**)error {
   if (![self loadWithError:error]) {
     return NO;
   }

From c36919272b7e3c55b018d23f0710523fcce57a78 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:56:45 -0700
Subject: [PATCH 04/11] Fix formatting of method declaration in
 ExecuTorchLLMTextRunner

---
 .../llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
index 66f9515eb45..550a20ea633 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
@@ -63,7 +63,7 @@ __attribute__((deprecated("This API is experimental.")))
 - (BOOL)generateWithPrompt:(NSString *)prompt
                     config:(ExecuTorchLLMConfig *)config
              tokenCallback:(nullable void (^)(NSString *token))callback
-            error:(NSError **)error
+                     error:(NSError **)error
     NS_SWIFT_NAME(generate(_:_:tokenCallback:));
 
 /**

From 4f10d9742ab1ad0d609beda7a8eba7942d9a52b8 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:58:23 -0700
Subject: [PATCH 05/11] Rename generate method to generateWithInputs

---
 .../Exported/ExecuTorchLLMMultimodalRunner.h              | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
index 3eb7226ba76..8523581da8a 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
@@ -193,10 +193,10 @@ __attribute__((objc_subclassing_restricted))
  @param error      On failure, populated with an NSError explaining the issue.
  @return YES if generation completes successfully, NO if an error occurred.
 */
-- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
-           config:(ExecuTorchLLMConfig *)config
-withTokenCallback:(nullable void (^)(NSString *))callback
-            error:(NSError **)error
+- (BOOL)generateWithInputs:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
+                    config:(ExecuTorchLLMConfig *)config
+             tokenCallback:(nullable void (^)(NSString *))callback
+                     error:(NSError **)error
     NS_SWIFT_NAME(generate(_:_:tokenCallback:));
 
 /**

From b95c60af41fdc20059f89d43df7aa33b75ea222c Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 13:58:54 -0700
Subject: [PATCH 06/11] Rename generate method to generateWithInputs

---
 .../Exported/ExecuTorchLLMMultimodalRunner.mm             | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
index dd9b2065a26..a3dc3e6afd1 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
@@ -162,10 +162,10 @@ - (BOOL)loadWithError:(NSError**)error {
   return YES;
 }
 
-- (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
-           config:(ExecuTorchLLMConfig *)config
-withTokenCallback:(nullable void (^)(NSString *))callback
-            error:(NSError **)error {
+- (BOOL)generateWithInputs:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
+                    config:(ExecuTorchLLMConfig *)config
+             tokenCallback:(nullable void (^)(NSString *))callback
+                     error:(NSError **)error {
   if (![self loadWithError:error]) {
     return NO;
   }

From dbf40fa85413f151d1e3b246473b4aae6b7a25c5 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 14:09:01 -0700
Subject: [PATCH 07/11] Refactor LLaMATests to use block syntax for config

---
 .../benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
index 104ec46e097..cf596d49844 100644
--- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
+++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
@@ -98,7 +98,7 @@ @implementation LLaMATests
         return;
       }
       ExecuTorchLLMGenerationConfig *config =
-          [[ExecuTorchLLMGenerationConfig alloc] initWithConfigurationHandler:^(ExecuTorchLLMGenerationConfig *config) {
+          [[ExecuTorchLLMGenerationConfig alloc] initWithBlock:^(ExecuTorchLLMGenerationConfig *config) {
             config.sequenceLength = 50;
           }];
       TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new];
@@ -107,10 +107,10 @@ @implementation LLaMATests
                               tokensPerSecondMetric.tokenCount = 0;
                               BOOL status = [runner generateWithPrompt:@"Once upon a time"
                                                                 config:config
-                                                     withTokenCallback:^(NSString *token) {
+                                                         tokenCallback:^(NSString *token) {
                                 tokensPerSecondMetric.tokenCount++;
                               }
-                                                       error:NULL];
+                                                                 error:NULL];
                               XCTAssertTrue(status);
                             }];
     },

From fda9b9a3e510f3b268cbeb5fae70a1e6b10ddf15 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 14:12:16 -0700
Subject: [PATCH 08/11] Refactor LLaMATests to initialize config inline

---
 .../apple/Benchmark/Tests/LLaMA/LLaMATests.mm          | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
index cf596d49844..07b32bc1cb8 100644
--- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
+++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
@@ -97,18 +97,16 @@ @implementation LLaMATests
         XCTFail("Load failed with error %zi", error.code);
         return;
       }
-      ExecuTorchLLMGenerationConfig *config =
-          [[ExecuTorchLLMGenerationConfig alloc] initWithBlock:^(ExecuTorchLLMGenerationConfig *config) {
-            config.sequenceLength = 50;
-          }];
       TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new];
       [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ]
                             block:^{
                               tokensPerSecondMetric.tokenCount = 0;
                               BOOL status = [runner generateWithPrompt:@"Once upon a time"
-                                                                config:config
+                                                                config:[[ExecuTorchLLMGenerationConfig alloc] initWithBlock:^(ExecuTorchLLMGenerationConfig *config) {
+                                config.sequenceLength = 50;
+                              }]
                                                          tokenCallback:^(NSString *token) {
-                                tokensPerSecondMetric.tokenCount++;
+                                ++tokensPerSecondMetric.tokenCount;
                               }
                                                                  error:NULL];
                               XCTAssertTrue(status);

From 0711cf442006befef575d6d743d863952af06deb Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 14:31:07 -0700
Subject: [PATCH 09/11] Rename LLaMATests.mm to LLMTests.mm

---
 .../Benchmark/Benchmark.xcodeproj/project.pbxproj  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
index 3c8173d5bff..8a75010af6f 100644
--- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
+++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
@@ -8,7 +8,7 @@
 
 /* Begin PBXBuildFile section */
 		0314AE3A2E2AAEE700DDE821 /* executorch_llm in Frameworks */ = {isa = PBXBuildFile; productRef = 0314AE392E2AAEE700DDE821 /* executorch_llm */; };
-		032A73CA2CAFBA8600932D36 /* LLaMATests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 032A73C82CAFBA8600932D36 /* LLaMATests.mm */; };
+		032A73CA2CAFBA8600932D36 /* LLMTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 032A73C82CAFBA8600932D36 /* LLMTests.mm */; };
 		0351D9D72CAFC9A200607121 /* Resources in Resources */ = {isa = PBXBuildFile; fileRef = 03C7FA322C8AA24200E6E9AE /* Resources */; };
 		03B0118E2CAC567900054791 /* DynamicTestCase.m in Sources */ = {isa = PBXBuildFile; fileRef = 03B0118C2CAC567900054791 /* DynamicTestCase.m */; };
 		03B011912CAD114E00054791 /* ResourceTestCase.m in Sources */ = {isa = PBXBuildFile; fileRef = 03B011902CAD114E00054791 /* ResourceTestCase.m */; };
@@ -35,7 +35,7 @@
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
-		032A73C82CAFBA8600932D36 /* LLaMATests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaMATests.mm; sourceTree = "<group>"; };
+		032A73C82CAFBA8600932D36 /* LLMTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LLMTests.mm; sourceTree = "<group>"; };
 		037C96A02C8A570B00B3DF38 /* Tests.xctestplan */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Tests.xctestplan; sourceTree = "<group>"; };
 		03B0118B2CAC567900054791 /* DynamicTestCase.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DynamicTestCase.h; sourceTree = "<group>"; };
 		03B0118C2CAC567900054791 /* DynamicTestCase.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DynamicTestCase.m; sourceTree = "<group>"; };
@@ -76,12 +76,12 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
-		032A73C92CAFBA8600932D36 /* LLaMA */ = {
+		032A73C92CAFBA8600932D36 /* LLM */ = {
 			isa = PBXGroup;
 			children = (
-				032A73C82CAFBA8600932D36 /* LLaMATests.mm */,
+				032A73C82CAFBA8600932D36 /* LLMTests.mm */,
 			);
-			path = LLaMA;
+			path = LLM;
 			sourceTree = "<group>";
 		};
 		03B0118D2CAC567900054791 /* TestUtils */ = {
@@ -127,7 +127,7 @@
 		03B2D3782C8A515C0046936E /* Tests */ = {
 			isa = PBXGroup;
 			children = (
-				032A73C92CAFBA8600932D36 /* LLaMA */,
+				032A73C92CAFBA8600932D36 /* LLM */,
 				03E7E6782CBDC1C900205E71 /* CoreMLTests.mm */,
 				03B2D3792C8A515C0046936E /* GenericTests.mm */,
 				037C96A02C8A570B00B3DF38 /* Tests.xctestplan */,
@@ -241,7 +241,7 @@
 				03B011912CAD114E00054791 /* ResourceTestCase.m in Sources */,
 				03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */,
 				03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */,
-				032A73CA2CAFBA8600932D36 /* LLaMATests.mm in Sources */,
+				032A73CA2CAFBA8600932D36 /* LLMTests.mm in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

From 951341346128464b83d922771a2430fb5f04d3f1 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 14:32:28 -0700
Subject: [PATCH 10/11] Rename LLaMATests to LLMTests

---
 .../Benchmark/Tests/{LLaMA/LLaMATests.mm => LLM/LLMTests.mm}  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename extension/benchmark/apple/Benchmark/Tests/{LLaMA/LLaMATests.mm => LLM/LLMTests.mm} (98%)

diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
similarity index 98%
rename from extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
rename to extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
index 07b32bc1cb8..7a9ff40d079 100644
--- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
+++ b/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
@@ -42,10 +42,10 @@ - (id)copyWithZone:(NSZone *)zone {
 
 @end
 
-@interface LLaMATests : ResourceTestCase
+@interface LLMTests : ResourceTestCase
 @end
 
-@implementation LLaMATests
+@implementation LLMTests
 
 + (NSArray<NSString *> *)directories {
   return @[

From e00177a116221a5d255b0b7a8d9567bae1e7209d Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <anthony@shoumikh.in>
Date: Mon, 29 Sep 2025 15:10:30 -0700
Subject: [PATCH 11/11] Update LLM generation config initialization

---
 extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm b/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
index 7a9ff40d079..eac95073d95 100644
--- a/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
+++ b/extension/benchmark/apple/Benchmark/Tests/LLM/LLMTests.mm
@@ -102,7 +102,7 @@ @implementation LLMTests
                             block:^{
                               tokensPerSecondMetric.tokenCount = 0;
                               BOOL status = [runner generateWithPrompt:@"Once upon a time"
-                                                                config:[[ExecuTorchLLMGenerationConfig alloc] initWithBlock:^(ExecuTorchLLMGenerationConfig *config) {
+                                                                config:[[ExecuTorchLLMConfig alloc] initWithBlock:^(ExecuTorchLLMConfig *config) {
                                 config.sequenceLength = 50;
                               }]
                                                          tokenCallback:^(NSString *token) {