From 261e3640c942c60860af08cd4d205d8e402bb702 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 17 Nov 2025 19:37:08 +0000
Subject: [PATCH 1/2] fix(openapi): restore embedded request wrappers

---
 .stats.yml                                    |   4 +-
 .../resources/alpha/eval/eval.py              |  82 +----
 .../resources/beta/datasets.py                |  35 +-
 .../resources/scoring_functions.py            |  30 +-
 .../types/alpha/eval_run_eval_alpha_params.py |  80 +----
 .../types/alpha/eval_run_eval_params.py       |  80 +----
 .../types/beta/dataset_register_params.py     |  30 +-
 .../types/scoring_function_register_params.py |  78 ++++-
 tests/api_resources/alpha/test_eval.py        | 304 ++++++++++--------
 tests/api_resources/beta/test_datasets.py     |  46 +--
 tests/api_resources/test_scoring_functions.py |  72 +++--
 11 files changed, 400 insertions(+), 441 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index 253e4ba9..db94672e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-2b99a80543f8bc8fa164167693c214651ac8e710f4726fb5869183b4d6c71a03.yml
-openapi_spec_hash: a5632057f5e4d956a71c20a79c0d879c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-aab1b331382f758fc255f765e73b62fedf463cf0748bc11b2b08974de9ac816a.yml
+openapi_spec_hash: f717a21f47419aa51e4d9298aa68cc45
 config_hash: 0017f6c419cbbf7b949f9b2842917a79
diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
index c6532721..76969a4b 100644
--- a/src/llama_stack_client/resources/alpha/eval/eval.py
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -8,7 +8,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, Iterable, Optional
+from typing import Dict, Iterable
 
 import httpx
 
@@ -20,7 +20,7 @@
     JobsResourceWithStreamingResponse,
     AsyncJobsResourceWithStreamingResponse,
 )
-from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
 from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
@@ -164,9 +164,7 @@ def run_eval(
         self,
         benchmark_id: str,
         *,
-        eval_candidate: eval_run_eval_params.EvalCandidate,
-        num_examples: Optional[int] | Omit = omit,
-        scoring_params: Dict[str, eval_run_eval_params.ScoringParams] | Omit = omit,
+        benchmark_config: BenchmarkConfigParam,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -178,13 +176,7 @@ def run_eval(
         Run an evaluation on a benchmark.
 
         Args:
-          eval_candidate: A model candidate for evaluation.
-
-          num_examples: Number of examples to evaluate (useful for testing), if not provided, all
-              examples in the dataset will be evaluated
-
-          scoring_params: Map between scoring function id and parameters for each scoring function you
-              want to run
+          benchmark_config: A benchmark configuration for evaluation.
 
           extra_headers: Send extra headers
 
@@ -198,14 +190,7 @@ def run_eval(
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
             f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
-            body=maybe_transform(
-                {
-                    "eval_candidate": eval_candidate,
-                    "num_examples": num_examples,
-                    "scoring_params": scoring_params,
-                },
-                eval_run_eval_params.EvalRunEvalParams,
-            ),
+            body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -216,9 +201,7 @@ def run_eval_alpha(
         self,
         benchmark_id: str,
         *,
-        eval_candidate: eval_run_eval_alpha_params.EvalCandidate,
-        num_examples: Optional[int] | Omit = omit,
-        scoring_params: Dict[str, eval_run_eval_alpha_params.ScoringParams] | Omit = omit,
+        benchmark_config: BenchmarkConfigParam,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -230,13 +213,7 @@ def run_eval_alpha(
         Run an evaluation on a benchmark.
 
         Args:
-          eval_candidate: A model candidate for evaluation.
-
-          num_examples: Number of examples to evaluate (useful for testing), if not provided, all
-              examples in the dataset will be evaluated
-
-          scoring_params: Map between scoring function id and parameters for each scoring function you
-              want to run
+          benchmark_config: A benchmark configuration for evaluation.
 
           extra_headers: Send extra headers
 
@@ -251,12 +228,7 @@ def run_eval_alpha(
         return self._post(
             f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=maybe_transform(
-                {
-                    "eval_candidate": eval_candidate,
-                    "num_examples": num_examples,
-                    "scoring_params": scoring_params,
-                },
-                eval_run_eval_alpha_params.EvalRunEvalAlphaParams,
+                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -385,9 +357,7 @@ async def run_eval(
         self,
         benchmark_id: str,
         *,
-        eval_candidate: eval_run_eval_params.EvalCandidate,
-        num_examples: Optional[int] | Omit = omit,
-        scoring_params: Dict[str, eval_run_eval_params.ScoringParams] | Omit = omit,
+        benchmark_config: BenchmarkConfigParam,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -399,13 +369,7 @@ async def run_eval(
         Run an evaluation on a benchmark.
 
         Args:
-          eval_candidate: A model candidate for evaluation.
-
-          num_examples: Number of examples to evaluate (useful for testing), if not provided, all
-              examples in the dataset will be evaluated
-
-          scoring_params: Map between scoring function id and parameters for each scoring function you
-              want to run
+          benchmark_config: A benchmark configuration for evaluation.
 
           extra_headers: Send extra headers
 
@@ -420,12 +384,7 @@ async def run_eval(
         return await self._post(
             f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
-                {
-                    "eval_candidate": eval_candidate,
-                    "num_examples": num_examples,
-                    "scoring_params": scoring_params,
-                },
-                eval_run_eval_params.EvalRunEvalParams,
+                {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -437,9 +396,7 @@ async def run_eval_alpha(
         self,
         benchmark_id: str,
         *,
-        eval_candidate: eval_run_eval_alpha_params.EvalCandidate,
-        num_examples: Optional[int] | Omit = omit,
-        scoring_params: Dict[str, eval_run_eval_alpha_params.ScoringParams] | Omit = omit,
+        benchmark_config: BenchmarkConfigParam,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -451,13 +408,7 @@ async def run_eval_alpha(
         Run an evaluation on a benchmark.
 
         Args:
-          eval_candidate: A model candidate for evaluation.
-
-          num_examples: Number of examples to evaluate (useful for testing), if not provided, all
-              examples in the dataset will be evaluated
-
-          scoring_params: Map between scoring function id and parameters for each scoring function you
-              want to run
+          benchmark_config: A benchmark configuration for evaluation.
 
           extra_headers: Send extra headers
 
@@ -472,12 +423,7 @@ async def run_eval_alpha(
         return await self._post(
             f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
-                {
-                    "eval_candidate": eval_candidate,
-                    "num_examples": num_examples,
-                    "scoring_params": scoring_params,
-                },
-                eval_run_eval_alpha_params.EvalRunEvalAlphaParams,
+                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py
index ba7841df..439c91f8 100644
--- a/src/llama_stack_client/resources/beta/datasets.py
+++ b/src/llama_stack_client/resources/beta/datasets.py
@@ -10,6 +10,7 @@
 
 import typing_extensions
 from typing import Dict, Type, Iterable, Optional, cast
+from typing_extensions import Literal
 
 import httpx
 
@@ -205,10 +206,10 @@ def iterrows(
     def register(
         self,
         *,
-        purpose: object,
-        source: object,
-        dataset_id: object | Omit = omit,
-        metadata: object | Omit = omit,
+        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
+        source: dataset_register_params.Source,
+        dataset_id: Optional[str] | Omit = omit,
+        metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -216,10 +217,15 @@ def register(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> DatasetRegisterResponse:
-        """
-        Register a new dataset.
+        """Register a new dataset.
 
         Args:
+          purpose: Purpose of the dataset.
+
+        Each purpose has a required input data schema.
+
+          source: A dataset that can be obtained from a URI.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -452,10 +458,10 @@ async def iterrows(
     async def register(
         self,
         *,
-        purpose: object,
-        source: object,
-        dataset_id: object | Omit = omit,
-        metadata: object | Omit = omit,
+        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
+        source: dataset_register_params.Source,
+        dataset_id: Optional[str] | Omit = omit,
+        metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -463,10 +469,15 @@ async def register(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> DatasetRegisterResponse:
-        """
-        Register a new dataset.
+        """Register a new dataset.
 
         Args:
+          purpose: Purpose of the dataset.
+
+        Each purpose has a required input data schema.
+
+          source: A dataset that can be obtained from a URI.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
index ed341614..cc19e340 100644
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ b/src/llama_stack_client/resources/scoring_functions.py
@@ -9,7 +9,7 @@
 from __future__ import annotations
 
 import typing_extensions
-from typing import Type, cast
+from typing import Type, Optional, cast
 
 import httpx
 
@@ -112,12 +112,12 @@ def list(
     def register(
         self,
         *,
-        description: object,
-        return_type: object,
-        scoring_fn_id: object,
-        params: object | Omit = omit,
-        provider_id: object | Omit = omit,
-        provider_scoring_fn_id: object | Omit = omit,
+        description: str,
+        return_type: scoring_function_register_params.ReturnType,
+        scoring_fn_id: str,
+        params: Optional[scoring_function_register_params.Params] | Omit = omit,
+        provider_id: Optional[str] | Omit = omit,
+        provider_scoring_fn_id: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -129,6 +129,8 @@ def register(
         Register a scoring function.
 
         Args:
+          params: Parameters for LLM-as-judge scoring function configuration.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -273,12 +275,12 @@ async def list(
     async def register(
         self,
         *,
-        description: object,
-        return_type: object,
-        scoring_fn_id: object,
-        params: object | Omit = omit,
-        provider_id: object | Omit = omit,
-        provider_scoring_fn_id: object | Omit = omit,
+        description: str,
+        return_type: scoring_function_register_params.ReturnType,
+        scoring_fn_id: str,
+        params: Optional[scoring_function_register_params.Params] | Omit = omit,
+        provider_id: Optional[str] | Omit = omit,
+        provider_scoring_fn_id: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -290,6 +292,8 @@ async def register(
         Register a scoring function.
 
         Args:
+          params: Parameters for LLM-as-judge scoring function configuration.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
index 658ee047..7d2d87a8 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
@@ -8,83 +8,13 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing_extensions import Required, TypedDict
 
-from ..._types import SequenceNotStr
-from ..shared_params.system_message import SystemMessage
-from ..shared_params.sampling_params import SamplingParams
+from .benchmark_config_param import BenchmarkConfigParam
 
-__all__ = [
-    "EvalRunEvalAlphaParams",
-    "EvalCandidate",
-    "ScoringParams",
-    "ScoringParamsLlmAsJudgeScoringFnParams",
-    "ScoringParamsRegexParserScoringFnParams",
-    "ScoringParamsBasicScoringFnParams",
-]
+__all__ = ["EvalRunEvalAlphaParams"]
 
 
 class EvalRunEvalAlphaParams(TypedDict, total=False):
-    eval_candidate: Required[EvalCandidate]
-    """A model candidate for evaluation."""
-
-    num_examples: Optional[int]
-    """
-    Number of examples to evaluate (useful for testing), if not provided, all
-    examples in the dataset will be evaluated
-    """
-
-    scoring_params: Dict[str, ScoringParams]
-    """
-    Map between scoring function id and parameters for each scoring function you
-    want to run
-    """
-
-
-class EvalCandidate(TypedDict, total=False):
-    model: Required[str]
-
-    sampling_params: Required[SamplingParams]
-    """Sampling parameters."""
-
-    system_message: Optional[SystemMessage]
-    """A system message providing instructions or context to the model."""
-
-    type: Literal["model"]
-
-
-class ScoringParamsLlmAsJudgeScoringFnParams(TypedDict, total=False):
-    judge_model: Required[str]
-
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    judge_score_regexes: SequenceNotStr[str]
-    """Regexes to extract the answer from generated response"""
-
-    prompt_template: Optional[str]
-
-    type: Literal["llm_as_judge"]
-
-
-class ScoringParamsRegexParserScoringFnParams(TypedDict, total=False):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    parsing_regexes: SequenceNotStr[str]
-    """Regex to extract the answer from generated response"""
-
-    type: Literal["regex_parser"]
-
-
-class ScoringParamsBasicScoringFnParams(TypedDict, total=False):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    type: Literal["basic"]
-
-
-ScoringParams: TypeAlias = Union[
-    ScoringParamsLlmAsJudgeScoringFnParams, ScoringParamsRegexParserScoringFnParams, ScoringParamsBasicScoringFnParams
-]
+    benchmark_config: Required[BenchmarkConfigParam]
+    """A benchmark configuration for evaluation."""
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
index 46f5f5e6..f7e31f1f 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
@@ -8,83 +8,13 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing_extensions import Required, TypedDict
 
-from ..._types import SequenceNotStr
-from ..shared_params.system_message import SystemMessage
-from ..shared_params.sampling_params import SamplingParams
+from .benchmark_config_param import BenchmarkConfigParam
 
-__all__ = [
-    "EvalRunEvalParams",
-    "EvalCandidate",
-    "ScoringParams",
-    "ScoringParamsLlmAsJudgeScoringFnParams",
-    "ScoringParamsRegexParserScoringFnParams",
-    "ScoringParamsBasicScoringFnParams",
-]
+__all__ = ["EvalRunEvalParams"]
 
 
 class EvalRunEvalParams(TypedDict, total=False):
-    eval_candidate: Required[EvalCandidate]
-    """A model candidate for evaluation."""
-
-    num_examples: Optional[int]
-    """
-    Number of examples to evaluate (useful for testing), if not provided, all
-    examples in the dataset will be evaluated
-    """
-
-    scoring_params: Dict[str, ScoringParams]
-    """
-    Map between scoring function id and parameters for each scoring function you
-    want to run
-    """
-
-
-class EvalCandidate(TypedDict, total=False):
-    model: Required[str]
-
-    sampling_params: Required[SamplingParams]
-    """Sampling parameters."""
-
-    system_message: Optional[SystemMessage]
-    """A system message providing instructions or context to the model."""
-
-    type: Literal["model"]
-
-
-class ScoringParamsLlmAsJudgeScoringFnParams(TypedDict, total=False):
-    judge_model: Required[str]
-
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    judge_score_regexes: SequenceNotStr[str]
-    """Regexes to extract the answer from generated response"""
-
-    prompt_template: Optional[str]
-
-    type: Literal["llm_as_judge"]
-
-
-class ScoringParamsRegexParserScoringFnParams(TypedDict, total=False):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    parsing_regexes: SequenceNotStr[str]
-    """Regex to extract the answer from generated response"""
-
-    type: Literal["regex_parser"]
-
-
-class ScoringParamsBasicScoringFnParams(TypedDict, total=False):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    """Aggregation functions to apply to the scores of each row"""
-
-    type: Literal["basic"]
-
-
-ScoringParams: TypeAlias = Union[
-    ScoringParamsLlmAsJudgeScoringFnParams, ScoringParamsRegexParserScoringFnParams, ScoringParamsBasicScoringFnParams
-]
+    benchmark_config: Required[BenchmarkConfigParam]
+    """A benchmark configuration for evaluation."""
diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py
index 6192f3e0..639f260d 100644
--- a/src/llama_stack_client/types/beta/dataset_register_params.py
+++ b/src/llama_stack_client/types/beta/dataset_register_params.py
@@ -8,16 +8,34 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, TypedDict
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-__all__ = ["DatasetRegisterParams"]
+__all__ = ["DatasetRegisterParams", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
 
 
 class DatasetRegisterParams(TypedDict, total=False):
-    purpose: Required[object]
+    purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]]
+    """Purpose of the dataset. Each purpose has a required input data schema."""
 
-    source: Required[object]
+    source: Required[Source]
+    """A dataset that can be obtained from a URI."""
 
-    dataset_id: object
+    dataset_id: Optional[str]
 
-    metadata: object
+    metadata: Optional[Dict[str, object]]
+
+
+class SourceUriDataSource(TypedDict, total=False):
+    uri: Required[str]
+
+    type: Literal["uri"]
+
+
+class SourceRowsDataSource(TypedDict, total=False):
+    rows: Required[Iterable[Dict[str, object]]]
+
+    type: Literal["rows"]
+
+
+Source: TypeAlias = Union[SourceUriDataSource, SourceRowsDataSource]
diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py
index 8780fc15..729ecac5 100644
--- a/src/llama_stack_client/types/scoring_function_register_params.py
+++ b/src/llama_stack_client/types/scoring_function_register_params.py
@@ -2,20 +2,82 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, TypedDict
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-__all__ = ["ScoringFunctionRegisterParams"]
+from .._types import SequenceNotStr
+
+__all__ = [
+    "ScoringFunctionRegisterParams",
+    "ReturnType",
+    "Params",
+    "ParamsLlmAsJudgeScoringFnParams",
+    "ParamsRegexParserScoringFnParams",
+    "ParamsBasicScoringFnParams",
+]
 
 
 class ScoringFunctionRegisterParams(TypedDict, total=False):
-    description: Required[object]
+    description: Required[str]
+
+    return_type: Required[ReturnType]
+
+    scoring_fn_id: Required[str]
+
+    params: Optional[Params]
+    """Parameters for LLM-as-judge scoring function configuration."""
+
+    provider_id: Optional[str]
+
+    provider_scoring_fn_id: Optional[str]
+
+
+class ReturnType(TypedDict, total=False):
+    type: Required[
+        Literal[
+            "string",
+            "number",
+            "boolean",
+            "array",
+            "object",
+            "json",
+            "union",
+            "chat_completion_input",
+            "completion_input",
+            "agent_turn_input",
+        ]
+    ]
+
+
+class ParamsLlmAsJudgeScoringFnParams(TypedDict, total=False):
+    judge_model: Required[str]
+
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    """Aggregation functions to apply to the scores of each row"""
+
+    judge_score_regexes: SequenceNotStr[str]
+    """Regexes to extract the answer from generated response"""
+
+    prompt_template: Optional[str]
+
+    type: Literal["llm_as_judge"]
+
+
+class ParamsRegexParserScoringFnParams(TypedDict, total=False):
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    """Aggregation functions to apply to the scores of each row"""
+
+    parsing_regexes: SequenceNotStr[str]
+    """Regex to extract the answer from generated response"""
+
+    type: Literal["regex_parser"]
 
-    return_type: Required[object]
 
-    scoring_fn_id: Required[object]
+class ParamsBasicScoringFnParams(TypedDict, total=False):
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    """Aggregation functions to apply to the scores of each row"""
 
-    params: object
+    type: Literal["basic"]
 
-    provider_id: object
 
-    provider_scoring_fn_id: object
+Params: TypeAlias = Union[ParamsLlmAsJudgeScoringFnParams, ParamsRegexParserScoringFnParams, ParamsBasicScoringFnParams]
diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py
index bc172330..fc290960 100644
--- a/tests/api_resources/alpha/test_eval.py
+++ b/tests/api_resources/alpha/test_eval.py
@@ -240,9 +240,11 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None
     def test_method_run_eval(self, client: LlamaStackClient) -> None:
         eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -251,29 +253,31 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None:
     def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
         eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                    "strategy": {"type": "greedy"},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                        "strategy": {"type": "greedy"},
+                    },
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                    "type": "model",
                 },
-                "system_message": {
-                    "content": "string",
-                    "role": "system",
+                "num_examples": 0,
+                "scoring_params": {
+                    "foo": {
+                        "judge_model": "judge_model",
+                        "aggregation_functions": ["average"],
+                        "judge_score_regexes": ["string"],
+                        "prompt_template": "prompt_template",
+                        "type": "llm_as_judge",
+                    }
                 },
-                "type": "model",
-            },
-            num_examples=0,
-            scoring_params={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                    "type": "llm_as_judge",
-                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -282,9 +286,11 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None
     def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
         response = client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
 
@@ -297,9 +303,11 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
     def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
         with client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         ) as response:
             assert not response.is_closed
@@ -315,9 +323,11 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
             client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
-                eval_candidate={
-                    "model": "model",
-                    "sampling_params": {},
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {},
+                    }
                 },
             )
 
@@ -325,9 +335,11 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
     def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
         eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -336,29 +348,31 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
     def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
         eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                    "strategy": {"type": "greedy"},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                        "strategy": {"type": "greedy"},
+                    },
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                    "type": "model",
                 },
-                "system_message": {
-                    "content": "string",
-                    "role": "system",
+                "num_examples": 0,
+                "scoring_params": {
+                    "foo": {
+                        "judge_model": "judge_model",
+                        "aggregation_functions": ["average"],
+                        "judge_score_regexes": ["string"],
+                        "prompt_template": "prompt_template",
+                        "type": "llm_as_judge",
+                    }
                 },
-                "type": "model",
-            },
-            num_examples=0,
-            scoring_params={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                    "type": "llm_as_judge",
-                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -367,9 +381,11 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -
     def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
         response = client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
 
@@ -382,9 +398,11 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
     def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
         with client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         ) as response:
             assert not response.is_closed
@@ -400,9 +418,11 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
             client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
-                eval_candidate={
-                    "model": "model",
-                    "sampling_params": {},
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {},
+                    }
                 },
             )
 
@@ -626,9 +646,11 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta
     async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -637,29 +659,31 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non
     async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                    "strategy": {"type": "greedy"},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                        "strategy": {"type": "greedy"},
+                    },
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                    "type": "model",
                 },
-                "system_message": {
-                    "content": "string",
-                    "role": "system",
+                "num_examples": 0,
+                "scoring_params": {
+                    "foo": {
+                        "judge_model": "judge_model",
+                        "aggregation_functions": ["average"],
+                        "judge_score_regexes": ["string"],
+                        "prompt_template": "prompt_template",
+                        "type": "llm_as_judge",
+                    }
                 },
-                "type": "model",
-            },
-            num_examples=0,
-            scoring_params={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                    "type": "llm_as_judge",
-                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -668,9 +692,11 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta
     async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
 
@@ -683,9 +709,11 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient)
     async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         ) as response:
             assert not response.is_closed
@@ -701,9 +729,11 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
             await async_client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
-                eval_candidate={
-                    "model": "model",
-                    "sampling_params": {},
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {},
+                    }
                 },
             )
 
@@ -711,9 +741,11 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -
     async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -722,29 +754,31 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient)
     async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                    "strategy": {"type": "greedy"},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                        "strategy": {"type": "greedy"},
+                    },
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                    "type": "model",
                 },
-                "system_message": {
-                    "content": "string",
-                    "role": "system",
+                "num_examples": 0,
+                "scoring_params": {
+                    "foo": {
+                        "judge_model": "judge_model",
+                        "aggregation_functions": ["average"],
+                        "judge_score_regexes": ["string"],
+                        "prompt_template": "prompt_template",
+                        "type": "llm_as_judge",
+                    }
                 },
-                "type": "model",
-            },
-            num_examples=0,
-            scoring_params={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                    "type": "llm_as_judge",
-                }
             },
         )
         assert_matches_type(Job, eval, path=["response"])
@@ -753,9 +787,11 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl
     async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         )
 
@@ -768,9 +804,11 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl
     async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
-            eval_candidate={
-                "model": "model",
-                "sampling_params": {},
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {},
+                }
             },
         ) as response:
             assert not response.is_closed
@@ -786,8 +824,10 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
             await async_client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
-                eval_candidate={
-                    "model": "model",
-                    "sampling_params": {},
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {},
+                    }
                 },
             )
diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py
index de5faf60..b40b34de 100644
--- a/tests/api_resources/beta/test_datasets.py
+++ b/tests/api_resources/beta/test_datasets.py
@@ -186,8 +186,8 @@ def test_path_params_iterrows(self, client: LlamaStackClient) -> None:
     def test_method_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             dataset = client.beta.datasets.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             )
 
         assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
@@ -196,10 +196,13 @@ def test_method_register(self, client: LlamaStackClient) -> None:
     def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             dataset = client.beta.datasets.register(
-                purpose={},
-                source={},
-                dataset_id={},
-                metadata={},
+                purpose="post-training/messages",
+                source={
+                    "uri": "uri",
+                    "type": "uri",
+                },
+                dataset_id="dataset_id",
+                metadata={"foo": "bar"},
             )
 
         assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
@@ -208,8 +211,8 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
     def test_raw_response_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             response = client.beta.datasets.with_raw_response.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             )
 
         assert response.is_closed is True
@@ -221,8 +224,8 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None:
     def test_streaming_response_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             with client.beta.datasets.with_streaming_response.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -437,8 +440,8 @@ async def test_path_params_iterrows(self, async_client: AsyncLlamaStackClient) -
     async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             dataset = await async_client.beta.datasets.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             )
 
         assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
@@ -447,10 +450,13 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non
     async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             dataset = await async_client.beta.datasets.register(
-                purpose={},
-                source={},
-                dataset_id={},
-                metadata={},
+                purpose="post-training/messages",
+                source={
+                    "uri": "uri",
+                    "type": "uri",
+                },
+                dataset_id="dataset_id",
+                metadata={"foo": "bar"},
             )
 
         assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
@@ -459,8 +465,8 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
     async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             response = await async_client.beta.datasets.with_raw_response.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             )
 
         assert response.is_closed is True
@@ -472,8 +478,8 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient)
     async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             async with async_client.beta.datasets.with_streaming_response.register(
-                purpose={},
-                source={},
+                purpose="post-training/messages",
+                source={"uri": "uri"},
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index 72bea56c..6b0aa723 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -92,9 +92,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
     def test_method_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             scoring_function = client.scoring_functions.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             )
 
         assert scoring_function is None
@@ -103,12 +103,18 @@ def test_method_register(self, client: LlamaStackClient) -> None:
     def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             scoring_function = client.scoring_functions.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
-                params={},
-                provider_id={},
-                provider_scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
+                params={
+                    "judge_model": "judge_model",
+                    "aggregation_functions": ["average"],
+                    "judge_score_regexes": ["string"],
+                    "prompt_template": "prompt_template",
+                    "type": "llm_as_judge",
+                },
+                provider_id="provider_id",
+                provider_scoring_fn_id="provider_scoring_fn_id",
             )
 
         assert scoring_function is None
@@ -117,9 +123,9 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
     def test_raw_response_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             response = client.scoring_functions.with_raw_response.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             )
 
         assert response.is_closed is True
@@ -131,9 +137,9 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None:
     def test_streaming_response_register(self, client: LlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             with client.scoring_functions.with_streaming_response.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -259,9 +265,9 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
     async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             scoring_function = await async_client.scoring_functions.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             )
 
         assert scoring_function is None
@@ -270,12 +276,18 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non
     async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             scoring_function = await async_client.scoring_functions.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
-                params={},
-                provider_id={},
-                provider_scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
+                params={
+                    "judge_model": "judge_model",
+                    "aggregation_functions": ["average"],
+                    "judge_score_regexes": ["string"],
+                    "prompt_template": "prompt_template",
+                    "type": "llm_as_judge",
+                },
+                provider_id="provider_id",
+                provider_scoring_fn_id="provider_scoring_fn_id",
             )
 
         assert scoring_function is None
@@ -284,9 +296,9 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
     async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             response = await async_client.scoring_functions.with_raw_response.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             )
 
         assert response.is_closed is True
@@ -298,9 +310,9 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient)
     async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.warns(DeprecationWarning):
             async with async_client.scoring_functions.with_streaming_response.register(
-                description={},
-                return_type={},
-                scoring_fn_id={},
+                description="description",
+                return_type={"type": "string"},
+                scoring_fn_id="scoring_fn_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"

From fa8bd6557d98d7f8cc068bf71813fbf196795801 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 17 Nov 2025 19:37:33 +0000
Subject: [PATCH 2/2] release: 0.4.0-alpha.10

---
 .release-please-manifest.json | 2 +-
 CHANGELOG.md                  | 8 ++++++++
 pyproject.toml                | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index d12052d9..8615e587 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.9"
+  ".": "0.4.0-alpha.10"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c8d2fac9..f902c89d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.4.0-alpha.10 (2025-11-17)
+
+Full Changelog: [v0.4.0-alpha.9...v0.4.0-alpha.10](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.9...v0.4.0-alpha.10)
+
+### Bug Fixes
+
+* **openapi:** restore embedded request wrappers ([261e364](https://github.com/llamastack/llama-stack-client-python/commit/261e3640c942c60860af08cd4d205d8e402bb702))
+
 ## 0.4.0-alpha.9 (2025-11-14)
 
 Full Changelog: [v0.4.0-alpha.8...v0.4.0-alpha.9](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.8...v0.4.0-alpha.9)
diff --git a/pyproject.toml b/pyproject.toml
index c4d1d77f..e4ed9244 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.9"
+version = "0.4.0-alpha.10"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"