diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h
index 11cdaf63d0b..cef90617a11 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#import "ExecuTorchLLMConfig.h"
 #import "ExecuTorchLLMError.h"
 #import "ExecuTorchLLMMultimodalRunner.h"
 #import "ExecuTorchLLMTextRunner.h"
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h
new file mode 100644
index 00000000000..5ecc9a0f004
--- /dev/null
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ A configuration object for text generation.
+
+ This class wraps the underlying C++ GenerationConfig so that default
+ values and future fields remain a single source of truth in C++.
+*/
+NS_SWIFT_NAME(Config)
+__attribute__((deprecated("This API is experimental.")))
+__attribute__((objc_subclassing_restricted))
+@interface ExecuTorchLLMConfig : NSObject<NSCopying>
+
+/** Whether to echo the input prompt in the output. */
+@property(nonatomic, getter=isEchoEnabled) BOOL echoEnabled;
+
+/** Maximum number of new tokens to generate. */
+@property(nonatomic) NSInteger maximumNewTokens;
+
+/** Whether this is a warmup run. */
+@property(nonatomic, getter=isWarming) BOOL warming;
+
+/** Maximum total sequence length. */
+@property(nonatomic) NSInteger sequenceLength;
+
+/** Temperature for sampling. */
+@property(nonatomic) double temperature;
+
+/** Number of BOS tokens to add. */
+@property(nonatomic) NSInteger bosCount;
+
+/** Number of EOS tokens to add. */
+@property(nonatomic) NSInteger eosCount;
+
+/**
+ Initializes a configuration and invokes the block to mutate it.
+
+ @param block  A block that receives the newly initialized configuration.
+ @return An initialized ExecuTorchLLMConfig instance.
+*/
+- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block
+    NS_SWIFT_NAME(init(_:));
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm
new file mode 100644
index 00000000000..911f66e7d65
--- /dev/null
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMConfig.mm
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#import "ExecuTorchLLMConfig.h"
+
+#import <executorch/extension/llm/runner/irunner.h>
+
+using namespace executorch::extension;
+
+@interface ExecuTorchLLMConfig ()
+
+- (const llm::GenerationConfig &)nativeConfig;
+
+@end
+
+@implementation ExecuTorchLLMConfig {
+  std::unique_ptr<llm::GenerationConfig> _config;
+}
+
+@dynamic echoEnabled;
+@dynamic maximumNewTokens;
+@dynamic warming;
+@dynamic sequenceLength;
+@dynamic temperature;
+@dynamic bosCount;
+@dynamic eosCount;
+
+- (instancetype)init {
+  if (self = [super init]) {
+    _config = std::make_unique<llm::GenerationConfig>();
+  }
+  return self;
+}
+
+- (instancetype)initWithBlock:(NS_NOESCAPE void (^)(ExecuTorchLLMConfig *))block {
+  if (self = [self init]) {
+    if (block) {
+      block(self);
+    }
+  }
+  return self;
+}
+
+- (id)copyWithZone:(NSZone *)zone {
+  ExecuTorchLLMConfig *config = [[[self class] allocWithZone:zone] init];
+  *config->_config = *_config;
+  return config;
+}
+
+- (const llm::GenerationConfig &)nativeConfig {
+  return *_config;
+}
+
+- (BOOL)echoEnabled {
+  return _config->echo;
+}
+
+- (void)setEchoEnabled:(BOOL)echoEnabled {
+  _config->echo = echoEnabled;
+}
+
+- (NSInteger)maximumNewTokens {
+  return _config->max_new_tokens;
+}
+
+- (void)setMaximumNewTokens:(NSInteger)maximumNewTokens {
+  _config->max_new_tokens = (int32_t)maximumNewTokens;
+}
+
+- (BOOL)warming {
+  return _config->warming;
+}
+
+- (void)setWarming:(BOOL)warming {
+  _config->warming = warming;
+}
+
+- (NSInteger)sequenceLength {
+  return _config->seq_len;
+}
+
+- (void)setSequenceLength:(NSInteger)sequenceLength {
+  _config->seq_len = (int32_t)sequenceLength;
+}
+
+- (double)temperature {
+  return _config->temperature;
+}
+
+- (void)setTemperature:(double)temperature {
+  _config->temperature = (float)temperature;
+}
+
+- (NSInteger)bosCount {
+  return _config->num_bos;
+}
+
+- (void)setBosCount:(NSInteger)bosCount {
+  _config->num_bos = (int32_t)bosCount;
+}
+
+- (NSInteger)eosCount {
+  return _config->num_eos;
+}
+
+- (void)setEosCount:(NSInteger)eosCount {
+  _config->num_eos = (int32_t)eosCount;
+}
+
+@end
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
index 3121259921a..3eb7226ba76 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.h
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#import <Foundation/Foundation.h>
+#import "ExecuTorchLLMConfig.h"
 
 NS_ASSUME_NONNULL_BEGIN
 
@@ -26,6 +26,7 @@ typedef NS_ENUM(NSInteger, ExecuTorchLLMMultimodalInputType) {
 */
 NS_SWIFT_NAME(Image)
 __attribute__((deprecated("This API is experimental.")))
+__attribute__((objc_subclassing_restricted))
 @interface ExecuTorchLLMImage : NSObject<NSCopying>
 
 /**
@@ -44,8 +45,11 @@ __attribute__((deprecated("This API is experimental.")))
     NS_DESIGNATED_INITIALIZER;
 
 @property(nonatomic, readonly) NSData *data;
+
 @property(nonatomic, readonly) NSInteger width;
+
 @property(nonatomic, readonly) NSInteger height;
+
 @property(nonatomic, readonly) NSInteger channels;
 
 + (instancetype)new NS_UNAVAILABLE;
@@ -58,6 +62,7 @@ __attribute__((deprecated("This API is experimental.")))
 */
 NS_SWIFT_NAME(Audio)
 __attribute__((deprecated("This API is experimental.")))
+__attribute__((objc_subclassing_restricted))
 @interface ExecuTorchLLMAudio : NSObject<NSCopying>
 
 /**
@@ -76,8 +81,11 @@ __attribute__((deprecated("This API is experimental.")))
     NS_DESIGNATED_INITIALIZER;
 
 @property(nonatomic, readonly) NSData *data;
+
 @property(nonatomic, readonly) NSInteger batchSize;
+
 @property(nonatomic, readonly) NSInteger bins;
+
 @property(nonatomic, readonly) NSInteger frames;
 
 + (instancetype)new NS_UNAVAILABLE;
@@ -91,6 +99,7 @@ __attribute__((deprecated("This API is experimental.")))
 */
 NS_SWIFT_NAME(MultimodalInput)
 __attribute__((deprecated("This API is experimental.")))
+__attribute__((objc_subclassing_restricted))
 @interface ExecuTorchLLMMultimodalInput : NSObject<NSCopying>
 
 /**
@@ -124,8 +133,11 @@ __attribute__((deprecated("This API is experimental.")))
     NS_RETURNS_RETAINED;
 
 @property(nonatomic, readonly) ExecuTorchLLMMultimodalInputType type;
+
 @property(nonatomic, readonly, nullable) NSString *text;
+
 @property(nonatomic, readonly, nullable) ExecuTorchLLMImage *image;
+
 @property(nonatomic, readonly, nullable) ExecuTorchLLMAudio *audio;
 
 + (instancetype)new NS_UNAVAILABLE;
@@ -134,12 +146,13 @@ __attribute__((deprecated("This API is experimental.")))
 @end
 
 /**
- A wrapper class for the C++ llm::MultimodalLLMRunner that provides
+ A wrapper class for the C++ llm::MultimodalRunner that provides
  Objective-C APIs to load models, manage tokenization, accept mixed
  input modalities, generate text sequences, and stop the runner.
 */
 NS_SWIFT_NAME(MultimodalRunner)
 __attribute__((deprecated("This API is experimental.")))
+__attribute__((objc_subclassing_restricted))
 @interface ExecuTorchLLMMultimodalRunner : NSObject
 
 /**
@@ -169,19 +182,22 @@ __attribute__((deprecated("This API is experimental.")))
 - (BOOL)loadWithError:(NSError **)error;
 
 /**
- Generates text given a list of multimodal inputs, up to a specified sequence length.
- Invokes the provided callback for each generated token.
+ Generates text given a list of multimodal inputs. A default configuration
+ is created and passed to the configuration block for in-place mutation.
 
- @param inputs    An ordered array of multimodal inputs.
- @param seq_len   The maximum number of tokens to generate.
- @param callback  A block called with each generated token as an NSString.
- @param error     On failure, populated with an NSError explaining the issue.
+ The token callback, if provided, is invoked for each generated token.
+
+ @param inputs     An ordered array of multimodal inputs.
+ @param config     A configuration object.
+ @param callback   A block called with each generated token as an NSString.
+ @param error      On failure, populated with an NSError explaining the issue.
  @return YES if generation completes successfully, NO if an error occurred.
 */
 - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
-   sequenceLength:(NSInteger)seq_len
+           config:(ExecuTorchLLMConfig *)config
 withTokenCallback:(nullable void (^)(NSString *))callback
-            error:(NSError **)error;
+            error:(NSError **)error
+    NS_SWIFT_NAME(generate(_:_:tokenCallback:));
 
 /**
  Stop producing new tokens and terminate the current generation process.
@@ -189,9 +205,9 @@ withTokenCallback:(nullable void (^)(NSString *))callback
 - (void)stop;
 
 /**
-  Remove the prefilled tokens from the KV cache and resets the start position
-  to 0. It also clears the stats for previous runs.
- */
+ Remove the prefilled tokens from the KV cache and reset the start position
+ to 0. It also clears the stats for previous runs.
+*/
 - (void)reset;
 
 + (instancetype)new NS_UNAVAILABLE;
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
index bdf78d3f15e..dd9b2065a26 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm
@@ -15,6 +15,12 @@
 using namespace executorch::extension;
 using namespace executorch::runtime;
 
+@interface ExecuTorchLLMConfig ()
+
+- (const llm::GenerationConfig &)nativeConfig;
+
+@end
+
 @implementation ExecuTorchLLMImage
 
 - (instancetype)initWithData:(NSData *)data
@@ -157,7 +163,7 @@ - (BOOL)loadWithError:(NSError**)error {
 }
 
 - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
-   sequenceLength:(NSInteger)seq_len
+           config:(ExecuTorchLLMConfig *)config
 withTokenCallback:(nullable void (^)(NSString *))callback
             error:(NSError **)error {
   if (![self loadWithError:error]) {
@@ -192,7 +198,7 @@ - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
   }
   auto status = _runner->generate(
     std::move(nativeInputs),
-    llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
+    config.nativeConfig,
     [callback](const std::string& token) {
       if (callback) {
         callback(@(token.c_str()));
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
index ca9867ebbb0..3d42c4853f1 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.h
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#import <Foundation/Foundation.h>
+#import "ExecuTorchLLMConfig.h"
 
 NS_ASSUME_NONNULL_BEGIN
 
@@ -49,19 +49,22 @@ __attribute__((deprecated("This API is experimental.")))
 - (BOOL)loadWithError:(NSError **)error;
 
 /**
- Generates text given an input prompt, up to a specified sequence length.
- Invokes the provided callback for each generated token.
+ Generates text given an input prompt. A default configuration
+ is created and passed to the configuration block for in-place mutation.
 
- @param prompt    The initial text prompt to generate from.
- @param seq_len   The maximum number of tokens to generate.
- @param callback  A block called with each generated token as an NSString.
- @param error     On failure, populated with an NSError explaining the issue.
+ The token callback, if provided, is invoked for each generated token.
+
+ @param prompt     The initial text prompt to generate from.
+ @param config     A configuration object.
+ @param callback   A block called with each generated token as an NSString.
+ @param error      On failure, populated with an NSError explaining the issue.
  @return YES if generation completes successfully, NO if an error occurred.
 */
 - (BOOL)generate:(NSString *)prompt
-   sequenceLength:(NSInteger)seq_len
-withTokenCallback:(nullable void (^)(NSString *))callback
-            error:(NSError **)error;
+           config:(ExecuTorchLLMConfig *)config
+withTokenCallback:(nullable void (^)(NSString *token))callback
+            error:(NSError **)error
+    NS_SWIFT_NAME(generate(_:_:tokenCallback:));
 
 /**
  Stop producing new tokens and terminate the current generation process.
@@ -69,9 +72,9 @@ withTokenCallback:(nullable void (^)(NSString *))callback
 - (void)stop;
 
 /**
-  Remove the prefilled tokens from the KV cache and resets the start position
-  to 0. It also clears the stats for previous runs.
- */
+ Remove the prefilled tokens from the KV cache and reset the start position
+ to 0. It also clears the stats for previous runs.
+*/
 - (void)reset;
 
 + (instancetype)new NS_UNAVAILABLE;
diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
index f4516009694..6ce854a52f8 100644
--- a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
+++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMTextRunner.mm
@@ -15,6 +15,12 @@
 using namespace executorch::extension;
 using namespace executorch::runtime;
 
+@interface ExecuTorchLLMConfig ()
+
+- (const llm::GenerationConfig &)nativeConfig;
+
+@end
+
 @implementation ExecuTorchLLMTextRunner {
   NSString *_modelPath;
   NSString *_tokenizerPath;
@@ -69,15 +75,15 @@ - (BOOL)loadWithError:(NSError**)error {
 }
 
 - (BOOL)generate:(NSString*)prompt
-    sequenceLength:(NSInteger)seq_len
+           config:(ExecuTorchLLMConfig *)config
 withTokenCallback:(nullable void (^)(NSString*))callback
-                error:(NSError**)error {
+            error:(NSError**)error {
   if (![self loadWithError:error]) {
     return NO;
   }
   auto status = _runner->generate(
     prompt.UTF8String,
-    llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
+    config.nativeConfig,
     [callback](const std::string& token) {
       if (callback) {
         callback(@(token.c_str()));
diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift
index b3de9b07a9d..cdf15f12350 100644
--- a/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift
+++ b/extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift
@@ -77,7 +77,9 @@ class MultimodalRunnerTest: XCTestCase {
         MultimodalInput(systemPrompt),
         MultimodalInput(uiImage.asImage()),
         MultimodalInput("\(userPrompt) \(assistantPrompt)"),
-      ], sequenceLength: sequenceLength) { token in
+      ], Config {
+        $0.sequenceLength = sequenceLength
+      }) { token in
         text += token
       }
     } catch {
@@ -92,7 +94,9 @@ class MultimodalRunnerTest: XCTestCase {
         MultimodalInput(systemPrompt),
         MultimodalInput(uiImage.asImage()),
         MultimodalInput("\(userPrompt) \(assistantPrompt)"),
-      ], sequenceLength: sequenceLength) { token in
+      ], Config {
+        $0.sequenceLength = sequenceLength
+      }) { token in
         text += token
       }
     } catch {
diff --git a/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift
index 6a91960b088..5e99af0c57f 100644
--- a/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift
+++ b/extension/llm/apple/ExecuTorchLLM/__tests__/TextRunnerTest.swift
@@ -50,7 +50,9 @@ class TextRunnerTest: XCTestCase {
     var text = ""
 
     do {
-      try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in
+      try runner.generate(userPrompt, Config {
+        $0.sequenceLength = sequenceLength
+      }) { token in
         text += token
       }
     } catch {
@@ -61,7 +63,9 @@ class TextRunnerTest: XCTestCase {
     text = ""
     runner.reset()
     do {
-      try runner.generate(userPrompt, sequenceLength: sequenceLength) { token in
+      try runner.generate(userPrompt, Config {
+        $0.sequenceLength = sequenceLength
+      }) { token in
         text += token
       }
     } catch {