From 261e3640c942c60860af08cd4d205d8e402bb702 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:37:08 +0000 Subject: [PATCH 1/2] fix(openapi): restore embedded request wrappers --- .stats.yml | 4 +- .../resources/alpha/eval/eval.py | 82 +---- .../resources/beta/datasets.py | 35 +- .../resources/scoring_functions.py | 30 +- .../types/alpha/eval_run_eval_alpha_params.py | 80 +---- .../types/alpha/eval_run_eval_params.py | 80 +---- .../types/beta/dataset_register_params.py | 30 +- .../types/scoring_function_register_params.py | 78 ++++- tests/api_resources/alpha/test_eval.py | 304 ++++++++++-------- tests/api_resources/beta/test_datasets.py | 46 +-- tests/api_resources/test_scoring_functions.py | 72 +++-- 11 files changed, 400 insertions(+), 441 deletions(-) diff --git a/.stats.yml b/.stats.yml index 253e4ba9..db94672e 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 103 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-2b99a80543f8bc8fa164167693c214651ac8e710f4726fb5869183b4d6c71a03.yml -openapi_spec_hash: a5632057f5e4d956a71c20a79c0d879c +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-aab1b331382f758fc255f765e73b62fedf463cf0748bc11b2b08974de9ac816a.yml +openapi_spec_hash: f717a21f47419aa51e4d9298aa68cc45 config_hash: 0017f6c419cbbf7b949f9b2842917a79 diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py index c6532721..76969a4b 100644 --- a/src/llama_stack_client/resources/alpha/eval/eval.py +++ b/src/llama_stack_client/resources/alpha/eval/eval.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Dict, Iterable, Optional +from typing import Dict, Iterable import httpx @@ -20,7 +20,7 @@ JobsResourceWithStreamingResponse, AsyncJobsResourceWithStreamingResponse, ) -from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given +from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given from ...._utils import maybe_transform, async_maybe_transform from ...._compat import cached_property from ...._resource import SyncAPIResource, AsyncAPIResource @@ -164,9 +164,7 @@ def run_eval( self, benchmark_id: str, *, - eval_candidate: eval_run_eval_params.EvalCandidate, - num_examples: Optional[int] | Omit = omit, - scoring_params: Dict[str, eval_run_eval_params.ScoringParams] | Omit = omit, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -178,13 +176,7 @@ def run_eval( Run an evaluation on a benchmark. Args: - eval_candidate: A model candidate for evaluation. - - num_examples: Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - - scoring_params: Map between scoring function id and parameters for each scoring function you - want to run + benchmark_config: A benchmark configuration for evaluation. extra_headers: Send extra headers @@ -198,14 +190,7 @@ def run_eval( raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return self._post( f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs", - body=maybe_transform( - { - "eval_candidate": eval_candidate, - "num_examples": num_examples, - "scoring_params": scoring_params, - }, - eval_run_eval_params.EvalRunEvalParams, - ), + body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -216,9 +201,7 @@ def run_eval_alpha( self, benchmark_id: str, *, - eval_candidate: eval_run_eval_alpha_params.EvalCandidate, - num_examples: Optional[int] | Omit = omit, - scoring_params: Dict[str, eval_run_eval_alpha_params.ScoringParams] | Omit = omit, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -230,13 +213,7 @@ def run_eval_alpha( Run an evaluation on a benchmark. Args: - eval_candidate: A model candidate for evaluation. - - num_examples: Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - - scoring_params: Map between scoring function id and parameters for each scoring function you - want to run + benchmark_config: A benchmark configuration for evaluation. extra_headers: Send extra headers @@ -251,12 +228,7 @@ def run_eval_alpha( return self._post( f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs", body=maybe_transform( - { - "eval_candidate": eval_candidate, - "num_examples": num_examples, - "scoring_params": scoring_params, - }, - eval_run_eval_alpha_params.EvalRunEvalAlphaParams, + {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -385,9 +357,7 @@ async def run_eval( self, benchmark_id: str, *, - eval_candidate: eval_run_eval_params.EvalCandidate, - num_examples: Optional[int] | Omit = omit, - scoring_params: Dict[str, eval_run_eval_params.ScoringParams] | Omit = omit, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -399,13 +369,7 @@ async def run_eval( Run an evaluation on a benchmark. Args: - eval_candidate: A model candidate for evaluation. - - num_examples: Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - - scoring_params: Map between scoring function id and parameters for each scoring function you - want to run + benchmark_config: A benchmark configuration for evaluation. extra_headers: Send extra headers @@ -420,12 +384,7 @@ async def run_eval( return await self._post( f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs", body=await async_maybe_transform( - { - "eval_candidate": eval_candidate, - "num_examples": num_examples, - "scoring_params": scoring_params, - }, - eval_run_eval_params.EvalRunEvalParams, + {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -437,9 +396,7 @@ async def run_eval_alpha( self, benchmark_id: str, *, - eval_candidate: eval_run_eval_alpha_params.EvalCandidate, - num_examples: Optional[int] | Omit = omit, - scoring_params: Dict[str, eval_run_eval_alpha_params.ScoringParams] | Omit = omit, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -451,13 +408,7 @@ async def run_eval_alpha( Run an evaluation on a benchmark. Args: - eval_candidate: A model candidate for evaluation. - - num_examples: Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - - scoring_params: Map between scoring function id and parameters for each scoring function you - want to run + benchmark_config: A benchmark configuration for evaluation. extra_headers: Send extra headers @@ -472,12 +423,7 @@ async def run_eval_alpha( return await self._post( f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs", body=await async_maybe_transform( - { - "eval_candidate": eval_candidate, - "num_examples": num_examples, - "scoring_params": scoring_params, - }, - eval_run_eval_alpha_params.EvalRunEvalAlphaParams, + {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py index ba7841df..439c91f8 100644 --- a/src/llama_stack_client/resources/beta/datasets.py +++ b/src/llama_stack_client/resources/beta/datasets.py @@ -10,6 +10,7 @@ import typing_extensions from typing import Dict, Type, Iterable, Optional, cast +from typing_extensions import Literal import httpx @@ -205,10 +206,10 @@ def iterrows( def register( self, *, - purpose: object, - source: object, - dataset_id: object | Omit = omit, - metadata: object | Omit = omit, + purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"], + source: dataset_register_params.Source, + dataset_id: Optional[str] | Omit = omit, + metadata: Optional[Dict[str, object]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -216,10 +217,15 @@ def register( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> DatasetRegisterResponse: - """ - Register a new dataset. + """Register a new dataset. Args: + purpose: Purpose of the dataset. + + Each purpose has a required input data schema. + + source: A dataset that can be obtained from a URI. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -452,10 +458,10 @@ async def iterrows( async def register( self, *, - purpose: object, - source: object, - dataset_id: object | Omit = omit, - metadata: object | Omit = omit, + purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"], + source: dataset_register_params.Source, + dataset_id: Optional[str] | Omit = omit, + metadata: Optional[Dict[str, object]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -463,10 +469,15 @@ async def register( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> DatasetRegisterResponse: - """ - Register a new dataset. + """Register a new dataset. Args: + purpose: Purpose of the dataset. + + Each purpose has a required input data schema. + + source: A dataset that can be obtained from a URI. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py index ed341614..cc19e340 100644 --- a/src/llama_stack_client/resources/scoring_functions.py +++ b/src/llama_stack_client/resources/scoring_functions.py @@ -9,7 +9,7 @@ from __future__ import annotations import typing_extensions -from typing import Type, cast +from typing import Type, Optional, cast import httpx @@ -112,12 +112,12 @@ def list( def register( self, *, - description: object, - return_type: object, - scoring_fn_id: object, - params: object | Omit = omit, - provider_id: object | Omit = omit, - provider_scoring_fn_id: object | Omit = omit, + description: str, + return_type: scoring_function_register_params.ReturnType, + scoring_fn_id: str, + params: Optional[scoring_function_register_params.Params] | Omit = omit, + provider_id: Optional[str] | Omit = omit, + provider_scoring_fn_id: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -129,6 +129,8 @@ def register( Register a scoring function. Args: + params: Parameters for LLM-as-judge scoring function configuration. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -273,12 +275,12 @@ async def list( async def register( self, *, - description: object, - return_type: object, - scoring_fn_id: object, - params: object | Omit = omit, - provider_id: object | Omit = omit, - provider_scoring_fn_id: object | Omit = omit, + description: str, + return_type: scoring_function_register_params.ReturnType, + scoring_fn_id: str, + params: Optional[scoring_function_register_params.Params] | Omit = omit, + provider_id: Optional[str] | Omit = omit, + provider_scoring_fn_id: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -290,6 +292,8 @@ async def register( Register a scoring function. Args: + params: Parameters for LLM-as-judge scoring function configuration. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py index 658ee047..7d2d87a8 100644 --- a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py +++ b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py @@ -8,83 +8,13 @@ from __future__ import annotations -from typing import Dict, List, Union, Optional -from typing_extensions import Literal, Required, TypeAlias, TypedDict +from typing_extensions import Required, TypedDict -from ..._types import SequenceNotStr -from ..shared_params.system_message import SystemMessage -from ..shared_params.sampling_params import SamplingParams +from .benchmark_config_param import BenchmarkConfigParam -__all__ = [ - "EvalRunEvalAlphaParams", - "EvalCandidate", - "ScoringParams", - "ScoringParamsLlmAsJudgeScoringFnParams", - "ScoringParamsRegexParserScoringFnParams", - "ScoringParamsBasicScoringFnParams", -] +__all__ = ["EvalRunEvalAlphaParams"] class EvalRunEvalAlphaParams(TypedDict, total=False): - eval_candidate: Required[EvalCandidate] - """A model candidate for evaluation.""" - - num_examples: Optional[int] - """ - Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - """ - - scoring_params: Dict[str, ScoringParams] - """ - Map between scoring function id and parameters for each scoring function you - want to run - """ - - -class EvalCandidate(TypedDict, total=False): - model: Required[str] - - sampling_params: Required[SamplingParams] - """Sampling parameters.""" - - system_message: Optional[SystemMessage] - """A system message providing instructions or context to the model.""" - - type: Literal["model"] - - -class ScoringParamsLlmAsJudgeScoringFnParams(TypedDict, total=False): - judge_model: Required[str] - - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - judge_score_regexes: SequenceNotStr[str] - """Regexes to extract the answer from generated response""" - - prompt_template: Optional[str] - - type: Literal["llm_as_judge"] - - -class ScoringParamsRegexParserScoringFnParams(TypedDict, total=False): - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - parsing_regexes: SequenceNotStr[str] - """Regex to extract the answer from generated response""" - - type: Literal["regex_parser"] - - -class ScoringParamsBasicScoringFnParams(TypedDict, total=False): - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - type: Literal["basic"] - - -ScoringParams: TypeAlias = Union[ - ScoringParamsLlmAsJudgeScoringFnParams, ScoringParamsRegexParserScoringFnParams, ScoringParamsBasicScoringFnParams -] + benchmark_config: Required[BenchmarkConfigParam] + """A benchmark configuration for evaluation.""" diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py index 46f5f5e6..f7e31f1f 100644 --- a/src/llama_stack_client/types/alpha/eval_run_eval_params.py +++ b/src/llama_stack_client/types/alpha/eval_run_eval_params.py @@ -8,83 +8,13 @@ from __future__ import annotations -from typing import Dict, List, Union, Optional -from typing_extensions import Literal, Required, TypeAlias, TypedDict +from typing_extensions import Required, TypedDict -from ..._types import SequenceNotStr -from ..shared_params.system_message import SystemMessage -from ..shared_params.sampling_params import SamplingParams +from .benchmark_config_param import BenchmarkConfigParam -__all__ = [ - "EvalRunEvalParams", - "EvalCandidate", - "ScoringParams", - "ScoringParamsLlmAsJudgeScoringFnParams", - "ScoringParamsRegexParserScoringFnParams", - "ScoringParamsBasicScoringFnParams", -] +__all__ = ["EvalRunEvalParams"] class EvalRunEvalParams(TypedDict, total=False): - eval_candidate: Required[EvalCandidate] - """A model candidate for evaluation.""" - - num_examples: Optional[int] - """ - Number of examples to evaluate (useful for testing), if not provided, all - examples in the dataset will be evaluated - """ - - scoring_params: Dict[str, ScoringParams] - """ - Map between scoring function id and parameters for each scoring function you - want to run - """ - - -class EvalCandidate(TypedDict, total=False): - model: Required[str] - - sampling_params: Required[SamplingParams] - """Sampling parameters.""" - - system_message: Optional[SystemMessage] - """A system message providing instructions or context to the model.""" - - type: Literal["model"] - - -class ScoringParamsLlmAsJudgeScoringFnParams(TypedDict, total=False): - judge_model: Required[str] - - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - judge_score_regexes: SequenceNotStr[str] - """Regexes to extract the answer from generated response""" - - prompt_template: Optional[str] - - type: Literal["llm_as_judge"] - - -class ScoringParamsRegexParserScoringFnParams(TypedDict, total=False): - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - parsing_regexes: SequenceNotStr[str] - """Regex to extract the answer from generated response""" - - type: Literal["regex_parser"] - - -class ScoringParamsBasicScoringFnParams(TypedDict, total=False): - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - """Aggregation functions to apply to the scores of each row""" - - type: Literal["basic"] - - -ScoringParams: TypeAlias = Union[ - ScoringParamsLlmAsJudgeScoringFnParams, ScoringParamsRegexParserScoringFnParams, ScoringParamsBasicScoringFnParams -] + benchmark_config: Required[BenchmarkConfigParam] + """A benchmark configuration for evaluation.""" diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py index 6192f3e0..639f260d 100644 --- a/src/llama_stack_client/types/beta/dataset_register_params.py +++ b/src/llama_stack_client/types/beta/dataset_register_params.py @@ -8,16 +8,34 @@ from __future__ import annotations -from typing_extensions import Required, TypedDict +from typing import Dict, Union, Iterable, Optional +from typing_extensions import Literal, Required, TypeAlias, TypedDict -__all__ = ["DatasetRegisterParams"] +__all__ = ["DatasetRegisterParams", "Source", "SourceUriDataSource", "SourceRowsDataSource"] class DatasetRegisterParams(TypedDict, total=False): - purpose: Required[object] + purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]] + """Purpose of the dataset. Each purpose has a required input data schema.""" - source: Required[object] + source: Required[Source] + """A dataset that can be obtained from a URI.""" - dataset_id: object + dataset_id: Optional[str] - metadata: object + metadata: Optional[Dict[str, object]] + + +class SourceUriDataSource(TypedDict, total=False): + uri: Required[str] + + type: Literal["uri"] + + +class SourceRowsDataSource(TypedDict, total=False): + rows: Required[Iterable[Dict[str, object]]] + + type: Literal["rows"] + + +Source: TypeAlias = Union[SourceUriDataSource, SourceRowsDataSource] diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py index 8780fc15..729ecac5 100644 --- a/src/llama_stack_client/types/scoring_function_register_params.py +++ b/src/llama_stack_client/types/scoring_function_register_params.py @@ -2,20 +2,82 @@ from __future__ import annotations -from typing_extensions import Required, TypedDict +from typing import List, Union, Optional +from typing_extensions import Literal, Required, TypeAlias, TypedDict -__all__ = ["ScoringFunctionRegisterParams"] +from .._types import SequenceNotStr + +__all__ = [ + "ScoringFunctionRegisterParams", + "ReturnType", + "Params", + "ParamsLlmAsJudgeScoringFnParams", + "ParamsRegexParserScoringFnParams", + "ParamsBasicScoringFnParams", +] class ScoringFunctionRegisterParams(TypedDict, total=False): - description: Required[object] + description: Required[str] + + return_type: Required[ReturnType] + + scoring_fn_id: Required[str] + + params: Optional[Params] + """Parameters for LLM-as-judge scoring function configuration.""" + + provider_id: Optional[str] + + provider_scoring_fn_id: Optional[str] + + +class ReturnType(TypedDict, total=False): + type: Required[ + Literal[ + "string", + "number", + "boolean", + "array", + "object", + "json", + "union", + "chat_completion_input", + "completion_input", + "agent_turn_input", + ] + ] + + +class ParamsLlmAsJudgeScoringFnParams(TypedDict, total=False): + judge_model: Required[str] + + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + """Aggregation functions to apply to the scores of each row""" + + judge_score_regexes: SequenceNotStr[str] + """Regexes to extract the answer from generated response""" + + prompt_template: Optional[str] + + type: Literal["llm_as_judge"] + + +class ParamsRegexParserScoringFnParams(TypedDict, total=False): + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + """Aggregation functions to apply to the scores of each row""" + + parsing_regexes: SequenceNotStr[str] + """Regex to extract the answer from generated response""" + + type: Literal["regex_parser"] - return_type: Required[object] - scoring_fn_id: Required[object] +class ParamsBasicScoringFnParams(TypedDict, total=False): + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + """Aggregation functions to apply to the scores of each row""" - params: object + type: Literal["basic"] - provider_id: object - provider_scoring_fn_id: object +Params: TypeAlias = Union[ParamsLlmAsJudgeScoringFnParams, ParamsRegexParserScoringFnParams, ParamsBasicScoringFnParams] diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py index bc172330..fc290960 100644 --- a/tests/api_resources/alpha/test_eval.py +++ b/tests/api_resources/alpha/test_eval.py @@ -240,9 +240,11 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None def test_method_run_eval(self, client: LlamaStackClient) -> None: eval = client.alpha.eval.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -251,29 +253,31 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None: eval = client.alpha.eval.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": { - "max_tokens": 0, - "repetition_penalty": 0, - "stop": ["string"], - "strategy": {"type": "greedy"}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "max_tokens": 0, + "repetition_penalty": 0, + "stop": ["string"], + "strategy": {"type": "greedy"}, + }, + "system_message": { + "content": "string", + "role": "system", + }, + "type": "model", }, - "system_message": { - "content": "string", - "role": "system", + "num_examples": 0, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + } }, - "type": "model", - }, - num_examples=0, - scoring_params={ - "foo": { - "judge_model": "judge_model", - "aggregation_functions": ["average"], - "judge_score_regexes": ["string"], - "prompt_template": "prompt_template", - "type": "llm_as_judge", - } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -282,9 +286,11 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: response = client.alpha.eval.with_raw_response.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -297,9 +303,11 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: with client.alpha.eval.with_streaming_response.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) as response: assert not response.is_closed @@ -315,9 +323,11 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.alpha.eval.with_raw_response.run_eval( benchmark_id="", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -325,9 +335,11 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: eval = client.alpha.eval.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -336,29 +348,31 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None: eval = client.alpha.eval.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": { - "max_tokens": 0, - "repetition_penalty": 0, - "stop": ["string"], - "strategy": {"type": "greedy"}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "max_tokens": 0, + "repetition_penalty": 0, + "stop": ["string"], + "strategy": {"type": "greedy"}, + }, + "system_message": { + "content": "string", + "role": "system", + }, + "type": "model", }, - "system_message": { - "content": "string", - "role": "system", + "num_examples": 0, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + } }, - "type": "model", - }, - num_examples=0, - scoring_params={ - "foo": { - "judge_model": "judge_model", - "aggregation_functions": ["average"], - "judge_score_regexes": ["string"], - "prompt_template": "prompt_template", - "type": "llm_as_judge", - } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -367,9 +381,11 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) - def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: response = client.alpha.eval.with_raw_response.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -382,9 +398,11 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None: with client.alpha.eval.with_streaming_response.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) as response: assert not response.is_closed @@ -400,9 +418,11 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.alpha.eval.with_raw_response.run_eval_alpha( benchmark_id="", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -626,9 +646,11 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.alpha.eval.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -637,29 +659,31 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.alpha.eval.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": { - "max_tokens": 0, - "repetition_penalty": 0, - "stop": ["string"], - "strategy": {"type": "greedy"}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "max_tokens": 0, + "repetition_penalty": 0, + "stop": ["string"], + "strategy": {"type": "greedy"}, + }, + "system_message": { + "content": "string", + "role": "system", + }, + "type": "model", }, - "system_message": { - "content": "string", - "role": "system", + "num_examples": 0, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + } }, - "type": "model", - }, - num_examples=0, - scoring_params={ - "foo": { - "judge_model": "judge_model", - "aggregation_functions": ["average"], - "judge_score_regexes": ["string"], - "prompt_template": "prompt_template", - "type": "llm_as_judge", - } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -668,9 +692,11 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.alpha.eval.with_raw_response.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -683,9 +709,11 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.alpha.eval.with_streaming_response.run_eval( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) as response: assert not response.is_closed @@ -701,9 +729,11 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.alpha.eval.with_raw_response.run_eval( benchmark_id="", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -711,9 +741,11 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.alpha.eval.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -722,29 +754,31 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.alpha.eval.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": { - "max_tokens": 0, - "repetition_penalty": 0, - "stop": ["string"], - "strategy": {"type": "greedy"}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "max_tokens": 0, + "repetition_penalty": 0, + "stop": ["string"], + "strategy": {"type": "greedy"}, + }, + "system_message": { + "content": "string", + "role": "system", + }, + "type": "model", }, - "system_message": { - "content": "string", - "role": "system", + "num_examples": 0, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + } }, - "type": "model", - }, - num_examples=0, - scoring_params={ - "foo": { - "judge_model": "judge_model", - "aggregation_functions": ["average"], - "judge_score_regexes": ["string"], - "prompt_template": "prompt_template", - "type": "llm_as_judge", - } }, ) assert_matches_type(Job, eval, path=["response"]) @@ -753,9 +787,11 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.alpha.eval.with_raw_response.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) @@ -768,9 +804,11 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.alpha.eval.with_streaming_response.run_eval_alpha( benchmark_id="benchmark_id", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) as response: assert not response.is_closed @@ -786,8 +824,10 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.alpha.eval.with_raw_response.run_eval_alpha( benchmark_id="", - eval_candidate={ - "model": "model", - "sampling_params": {}, + benchmark_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {}, + } }, ) diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py index de5faf60..b40b34de 100644 --- a/tests/api_resources/beta/test_datasets.py +++ b/tests/api_resources/beta/test_datasets.py @@ -186,8 +186,8 @@ def test_path_params_iterrows(self, client: LlamaStackClient) -> None: def test_method_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): dataset = client.beta.datasets.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) assert_matches_type(DatasetRegisterResponse, dataset, path=["response"]) @@ -196,10 +196,13 @@ def test_method_register(self, client: LlamaStackClient) -> None: def test_method_register_with_all_params(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): dataset = client.beta.datasets.register( - purpose={}, - source={}, - dataset_id={}, - metadata={}, + purpose="post-training/messages", + source={ + "uri": "uri", + "type": "uri", + }, + dataset_id="dataset_id", + metadata={"foo": "bar"}, ) assert_matches_type(DatasetRegisterResponse, dataset, path=["response"]) @@ -208,8 +211,8 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None def test_raw_response_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): response = client.beta.datasets.with_raw_response.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) assert response.is_closed is True @@ -221,8 +224,8 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None: def test_streaming_response_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): with client.beta.datasets.with_streaming_response.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -437,8 +440,8 @@ async def test_path_params_iterrows(self, async_client: AsyncLlamaStackClient) - async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): dataset = await async_client.beta.datasets.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) assert_matches_type(DatasetRegisterResponse, dataset, path=["response"]) @@ -447,10 +450,13 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): dataset = await async_client.beta.datasets.register( - purpose={}, - source={}, - dataset_id={}, - metadata={}, + purpose="post-training/messages", + source={ + "uri": "uri", + "type": "uri", + }, + dataset_id="dataset_id", + metadata={"foo": "bar"}, ) assert_matches_type(DatasetRegisterResponse, dataset, path=["response"]) @@ -459,8 +465,8 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): response = await async_client.beta.datasets.with_raw_response.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) assert response.is_closed is True @@ -472,8 +478,8 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): async with async_client.beta.datasets.with_streaming_response.register( - purpose={}, - source={}, + purpose="post-training/messages", + source={"uri": "uri"}, ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py index 72bea56c..6b0aa723 100644 --- a/tests/api_resources/test_scoring_functions.py +++ b/tests/api_resources/test_scoring_functions.py @@ -92,9 +92,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None: def test_method_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): scoring_function = client.scoring_functions.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) assert scoring_function is None @@ -103,12 +103,18 @@ def test_method_register(self, client: LlamaStackClient) -> None: def test_method_register_with_all_params(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): scoring_function = client.scoring_functions.register( - description={}, - return_type={}, - scoring_fn_id={}, - params={}, - provider_id={}, - provider_scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", + params={ + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + }, + provider_id="provider_id", + provider_scoring_fn_id="provider_scoring_fn_id", ) assert scoring_function is None @@ -117,9 +123,9 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None def test_raw_response_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): response = client.scoring_functions.with_raw_response.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) assert response.is_closed is True @@ -131,9 +137,9 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None: def test_streaming_response_register(self, client: LlamaStackClient) -> None: with pytest.warns(DeprecationWarning): with client.scoring_functions.with_streaming_response.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -259,9 +265,9 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): scoring_function = await async_client.scoring_functions.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) assert scoring_function is None @@ -270,12 +276,18 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): scoring_function = await async_client.scoring_functions.register( - description={}, - return_type={}, - scoring_fn_id={}, - params={}, - provider_id={}, - provider_scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", + params={ + "judge_model": "judge_model", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + "type": "llm_as_judge", + }, + provider_id="provider_id", + provider_scoring_fn_id="provider_scoring_fn_id", ) assert scoring_function is None @@ -284,9 +296,9 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): response = await async_client.scoring_functions.with_raw_response.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) assert response.is_closed is True @@ -298,9 +310,9 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None: with pytest.warns(DeprecationWarning): async with async_client.scoring_functions.with_streaming_response.register( - description={}, - return_type={}, - scoring_fn_id={}, + description="description", + return_type={"type": "string"}, + scoring_fn_id="scoring_fn_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" From fa8bd6557d98d7f8cc068bf71813fbf196795801 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:37:33 +0000 Subject: [PATCH 2/2] release: 0.4.0-alpha.10 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ pyproject.toml | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index d12052d9..8615e587 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.4.0-alpha.9" + ".": "0.4.0-alpha.10" } diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d2fac9..f902c89d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.4.0-alpha.10 (2025-11-17) + +Full Changelog: [v0.4.0-alpha.9...v0.4.0-alpha.10](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.9...v0.4.0-alpha.10) + +### Bug Fixes + +* **openapi:** restore embedded request wrappers ([261e364](https://github.com/llamastack/llama-stack-client-python/commit/261e3640c942c60860af08cd4d205d8e402bb702)) + ## 0.4.0-alpha.9 (2025-11-14) Full Changelog: [v0.4.0-alpha.8...v0.4.0-alpha.9](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.8...v0.4.0-alpha.9) diff --git a/pyproject.toml b/pyproject.toml index c4d1d77f..e4ed9244 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "llama_stack_client" -version = "0.4.0-alpha.9" +version = "0.4.0-alpha.10" description = "The official Python library for the llama-stack-client API" dynamic = ["readme"] license = "MIT"