diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 6603053537..cb464946f0 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.74.1" + ".": "1.75.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index b40485bd0a..848c5b5adb 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 97 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml -openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a -config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml +openapi_spec_hash: c855121b2b2324b99499c9244c21d24d +config_hash: d20837393b73efdb19cd08e04c1cc9a1 diff --git a/CHANGELOG.md b/CHANGELOG.md index b03bbedb52..fb077b91c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 1.75.0 (2025-04-16) + +Full Changelog: [v1.74.1...v1.75.0](https://github.com/openai/openai-python/compare/v1.74.1...v1.75.0) + +### Features + +* **api:** add o3 and o4-mini model IDs ([4bacbd5](https://github.com/openai/openai-python/commit/4bacbd5503137e266c127dc643ebae496cb4f158)) + ## 1.74.1 (2025-04-16) Full Changelog: [v1.74.0...v1.74.1](https://github.com/openai/openai-python/compare/v1.74.0...v1.74.1) diff --git a/pyproject.toml b/pyproject.toml index e2cd25f69c..b5648e9e51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai" -version = "1.74.1" +version = "1.75.0" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/openai/_version.py b/src/openai/_version.py index 5bbfee3232..8eab2d7416 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "1.74.1" # x-release-please-version +__version__ = "1.75.0" # x-release-please-version diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py index f9e380cc72..d6214225d8 100644 --- a/src/openai/resources/chat/completions/completions.py +++ b/src/openai/resources/chat/completions/completions.py @@ -99,7 +99,7 @@ def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, @@ -145,7 +145,7 @@ def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -201,7 +201,7 @@ def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -270,12 +270,17 @@ def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -364,7 +369,7 @@ def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -409,7 +414,7 @@ def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -474,7 +479,7 @@ def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -543,12 +548,17 @@ def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -628,7 +638,7 @@ def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -673,7 +683,7 @@ def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -738,7 +748,7 @@ def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -807,12 +817,17 @@ def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -891,7 +906,7 @@ def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, @@ -1187,7 +1202,7 @@ async def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, @@ -1233,7 +1248,7 @@ async def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -1289,7 +1304,7 @@ async def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -1358,12 +1373,17 @@ async def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -1452,7 +1472,7 @@ async def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -1497,7 +1517,7 @@ async def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -1562,7 +1582,7 @@ async def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -1631,12 +1651,17 @@ async def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -1716,7 +1741,7 @@ async def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -1761,7 +1786,7 @@ async def create( [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio). - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -1826,7 +1851,7 @@ async def create( This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and @@ -1895,12 +1920,17 @@ async def create( latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` utilized. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. store: Whether or not to store the output of this chat completion request for use in @@ -1979,7 +2009,7 @@ async def create( reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, - service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py index 592696f7da..aebf35d1f1 100644 --- a/src/openai/resources/completions.py +++ b/src/openai/resources/completions.py @@ -159,7 +159,9 @@ def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream: Whether to stream back partial progress. If set, tokens will be sent as @@ -319,7 +321,9 @@ def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -472,7 +476,9 @@ def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -703,7 +709,9 @@ async def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream: Whether to stream back partial progress. If set, tokens will be sent as @@ -863,7 +871,9 @@ async def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -1016,7 +1026,9 @@ async def create( Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - stop: Up to 4 sequences where the API will stop generating further tokens. The + stop: Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. stream_options: Options for streaming response. Only set this when you set `stream: true`. diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py index f8588178ed..f07b4d8c4a 100644 --- a/src/openai/resources/responses/responses.py +++ b/src/openai/resources/responses/responses.py @@ -89,6 +89,7 @@ def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -130,7 +131,7 @@ def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -174,6 +175,24 @@ def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. stream: If set to true, the model response data will be streamed to the client as it is @@ -255,6 +274,7 @@ def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, @@ -295,7 +315,7 @@ def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -346,6 +366,24 @@ def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will @@ -420,6 +458,7 @@ def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, @@ -460,7 +499,7 @@ def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -511,6 +550,24 @@ def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will @@ -584,6 +641,7 @@ def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -613,6 +671,7 @@ def create( "parallel_tool_calls": parallel_tool_calls, "previous_response_id": previous_response_id, "reasoning": reasoning, + "service_tier": service_tier, "store": store, "stream": stream, "temperature": temperature, @@ -903,6 +962,7 @@ async def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -944,7 +1004,7 @@ async def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -988,6 +1048,24 @@ async def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. stream: If set to true, the model response data will be streamed to the client as it is @@ -1069,6 +1147,7 @@ async def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, @@ -1109,7 +1188,7 @@ async def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -1160,6 +1239,24 @@ async def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will @@ -1234,6 +1331,7 @@ async def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, text: ResponseTextConfigParam | NotGiven = NOT_GIVEN, @@ -1274,7 +1372,7 @@ async def create( - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) - [Function calling](https://platform.openai.com/docs/guides/function-calling) - model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a + model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare @@ -1325,6 +1423,24 @@ async def create( Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + store: Whether to store the generated model response for later retrieval via API. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will @@ -1398,6 +1514,7 @@ async def create( parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN, previous_response_id: Optional[str] | NotGiven = NOT_GIVEN, reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN, store: Optional[bool] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -1427,6 +1544,7 @@ async def create( "parallel_tool_calls": parallel_tool_calls, "previous_response_id": previous_response_id, "reasoning": reasoning, + "service_tier": service_tier, "store": store, "stream": stream, "temperature": temperature, diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py index cb812a2702..3a235f89a5 100644 --- a/src/openai/types/chat/chat_completion.py +++ b/src/openai/types/chat/chat_completion.py @@ -59,8 +59,26 @@ class ChatCompletion(BaseModel): object: Literal["chat.completion"] """The object type, which is always `chat.completion`.""" - service_tier: Optional[Literal["scale", "default"]] = None - """The service tier used for processing the request.""" + service_tier: Optional[Literal["auto", "default", "flex"]] = None + """Specifies the latency tier to use for processing the request. + + This parameter is relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + """ system_fingerprint: Optional[str] = None """This fingerprint represents the backend configuration that the model runs with. diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py index b902f2667f..25caada177 100644 --- a/src/openai/types/chat/chat_completion_audio_param.py +++ b/src/openai/types/chat/chat_completion_audio_param.py @@ -9,7 +9,7 @@ class ChatCompletionAudioParam(TypedDict, total=False): - format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]] + format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]] """Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`. @@ -22,6 +22,6 @@ class ChatCompletionAudioParam(TypedDict, total=False): ] """The voice the model uses to respond. - Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and - `shimmer`. + Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`, + `onyx`, `sage`, and `shimmer`. """ diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py index 31b9cb5456..6fe996dd95 100644 --- a/src/openai/types/chat/chat_completion_chunk.py +++ b/src/openai/types/chat/chat_completion_chunk.py @@ -128,8 +128,26 @@ class ChatCompletionChunk(BaseModel): object: Literal["chat.completion.chunk"] """The object type, which is always `chat.completion.chunk`.""" - service_tier: Optional[Literal["scale", "default"]] = None - """The service tier used for processing the request.""" + service_tier: Optional[Literal["auto", "default", "flex"]] = None + """Specifies the latency tier to use for processing the request. + + This parameter is relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + """ system_fingerprint: Optional[str] = None """ diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index 05103fba91..60d5f53cdd 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -45,7 +45,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ model: Required[Union[str, ChatModel]] - """Model ID used to generate the response, like `gpt-4o` or `o1`. + """Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the @@ -123,7 +123,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): This value is now deprecated in favor of `max_completion_tokens`, and is not compatible with - [o1 series models](https://platform.openai.com/docs/guides/reasoning). + [o-series models](https://platform.openai.com/docs/guides/reasoning). """ metadata: Optional[Metadata] @@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): in the backend. """ - service_tier: Optional[Literal["auto", "default"]] + service_tier: Optional[Literal["auto", "default", "flex"]] """Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: @@ -220,6 +220,9 @@ class CompletionCreateParamsBase(TypedDict, total=False): latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). - When not set, the default behavior is 'auto'. When this parameter is set, the response body will include the `service_tier` @@ -227,9 +230,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ stop: Union[Optional[str], List[str], None] - """Up to 4 sequences where the API will stop generating further tokens. + """Not supported with latest reasoning models `o3` and `o4-mini`. - The returned text will not contain the stop sequence. + Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. """ store: Optional[bool] diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py index fdb1680d26..6ae20cff83 100644 --- a/src/openai/types/completion_create_params.py +++ b/src/openai/types/completion_create_params.py @@ -120,9 +120,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ stop: Union[Optional[str], List[str], None] - """Up to 4 sequences where the API will stop generating further tokens. + """Not supported with latest reasoning models `o3` and `o4-mini`. - The returned text will not contain the stop sequence. + Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. """ stream_options: Optional[ChatCompletionStreamOptionsParam] diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py index 8cd1e01144..254f7e204b 100644 --- a/src/openai/types/responses/response.py +++ b/src/openai/types/responses/response.py @@ -62,7 +62,7 @@ class Response(BaseModel): """ model: ResponsesModel - """Model ID used to generate the response, like `gpt-4o` or `o1`. + """Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the @@ -149,6 +149,27 @@ class Response(BaseModel): [reasoning models](https://platform.openai.com/docs/guides/reasoning). """ + service_tier: Optional[Literal["auto", "default", "flex"]] = None + """Specifies the latency tier to use for processing the request. + + This parameter is relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + """ + status: Optional[ResponseStatus] = None """The status of the response generation. diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py index ed82e678e5..3c0a9d7b8a 100644 --- a/src/openai/types/responses/response_create_params.py +++ b/src/openai/types/responses/response_create_params.py @@ -38,7 +38,7 @@ class ResponseCreateParamsBase(TypedDict, total=False): """ model: Required[ResponsesModel] - """Model ID used to generate the response, like `gpt-4o` or `o1`. + """Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of models with different capabilities, performance characteristics, and price points. Refer to the @@ -102,6 +102,27 @@ class ResponseCreateParamsBase(TypedDict, total=False): [reasoning models](https://platform.openai.com/docs/guides/reasoning). """ + service_tier: Optional[Literal["auto", "default", "flex"]] + """Specifies the latency tier to use for processing the request. + + This parameter is relevant for customers subscribed to the scale tier service: + + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. + - If set to 'default', the request will be processed using the default service + tier with a lower uptime SLA and no latency guarentee. + - If set to 'flex', the request will be processed with the Flex Processing + service tier. + [Learn more](https://platform.openai.com/docs/guides/flex-processing). + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` + utilized. + """ + store: Optional[bool] """Whether to store the generated model response for later retrieval via API.""" diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py index 30878b4347..4869cd325c 100644 --- a/src/openai/types/shared/chat_model.py +++ b/src/openai/types/shared/chat_model.py @@ -11,6 +11,10 @@ "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14", + "o4-mini", + "o4-mini-2025-04-16", + "o3", + "o3-2025-04-16", "o3-mini", "o3-mini-2025-01-31", "o1", diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py index 78a396d738..107aab2e4a 100644 --- a/src/openai/types/shared/reasoning.py +++ b/src/openai/types/shared/reasoning.py @@ -19,10 +19,17 @@ class Reasoning(BaseModel): result in faster responses and fewer tokens used on reasoning in a response. """ - generate_summary: Optional[Literal["concise", "detailed"]] = None - """**computer_use_preview only** + generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None + """**Deprecated:** use `summary` instead. A summary of the reasoning performed by the model. This can be useful for - debugging and understanding the model's reasoning process. One of `concise` or - `detailed`. + debugging and understanding the model's reasoning process. One of `auto`, + `concise`, or `detailed`. + """ + + summary: Optional[Literal["auto", "concise", "detailed"]] = None + """A summary of the reasoning performed by the model. + + This can be useful for debugging and understanding the model's reasoning + process. One of `auto`, `concise`, or `detailed`. """ diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py index f606beb693..99e082fc11 100644 --- a/src/openai/types/shared_params/chat_model.py +++ b/src/openai/types/shared_params/chat_model.py @@ -13,6 +13,10 @@ "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14", + "o4-mini", + "o4-mini-2025-04-16", + "o3", + "o3-2025-04-16", "o3-mini", "o3-mini-2025-01-31", "o1", diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py index 2953b895c4..73e1a008df 100644 --- a/src/openai/types/shared_params/reasoning.py +++ b/src/openai/types/shared_params/reasoning.py @@ -20,10 +20,17 @@ class Reasoning(TypedDict, total=False): result in faster responses and fewer tokens used on reasoning in a response. """ - generate_summary: Optional[Literal["concise", "detailed"]] - """**computer_use_preview only** + generate_summary: Optional[Literal["auto", "concise", "detailed"]] + """**Deprecated:** use `summary` instead. A summary of the reasoning performed by the model. This can be useful for - debugging and understanding the model's reasoning process. One of `concise` or - `detailed`. + debugging and understanding the model's reasoning process. One of `auto`, + `concise`, or `detailed`. + """ + + summary: Optional[Literal["auto", "concise", "detailed"]] + """A summary of the reasoning performed by the model. + + This can be useful for debugging and understanding the model's reasoning + process. One of `auto`, `concise`, or `detailed`. """ diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py index e45a5becf3..3753af8fdb 100644 --- a/tests/api_resources/test_responses.py +++ b/tests/api_resources/test_responses.py @@ -38,8 +38,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: previous_response_id="previous_response_id", reasoning={ "effort": "low", - "generate_summary": "concise", + "generate_summary": "auto", + "summary": "auto", }, + service_tier="auto", store=True, stream=False, temperature=1, @@ -116,8 +118,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: previous_response_id="previous_response_id", reasoning={ "effort": "low", - "generate_summary": "concise", + "generate_summary": "auto", + "summary": "auto", }, + service_tier="auto", store=True, temperature=1, text={"format": {"type": "text"}}, @@ -280,8 +284,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn previous_response_id="previous_response_id", reasoning={ "effort": "low", - "generate_summary": "concise", + "generate_summary": "auto", + "summary": "auto", }, + service_tier="auto", store=True, stream=False, temperature=1, @@ -358,8 +364,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn previous_response_id="previous_response_id", reasoning={ "effort": "low", - "generate_summary": "concise", + "generate_summary": "auto", + "summary": "auto", }, + service_tier="auto", store=True, temperature=1, text={"format": {"type": "text"}},