diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 6603053537..cb464946f0 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.74.1"
+  ".": "1.75.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index b40485bd0a..848c5b5adb 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml
-openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a
-config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
+openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
+config_hash: d20837393b73efdb19cd08e04c1cc9a1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b03bbedb52..fb077b91c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.75.0 (2025-04-16)
+
+Full Changelog: [v1.74.1...v1.75.0](https://github.com/openai/openai-python/compare/v1.74.1...v1.75.0)
+
+### Features
+
+* **api:** add o3 and o4-mini model IDs ([4bacbd5](https://github.com/openai/openai-python/commit/4bacbd5503137e266c127dc643ebae496cb4f158))
+
 ## 1.74.1 (2025-04-16)
 
 Full Changelog: [v1.74.0...v1.74.1](https://github.com/openai/openai-python/compare/v1.74.0...v1.74.1)
diff --git a/pyproject.toml b/pyproject.toml
index e2cd25f69c..b5648e9e51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.74.1"
+version = "1.75.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 5bbfee3232..8eab2d7416 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.74.1"  # x-release-please-version
+__version__ = "1.75.0"  # x-release-please-version
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index f9e380cc72..d6214225d8 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -99,7 +99,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -145,7 +145,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -201,7 +201,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -270,12 +270,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -364,7 +369,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -409,7 +414,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -474,7 +479,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -543,12 +548,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -628,7 +638,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -673,7 +683,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -738,7 +748,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -807,12 +817,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -891,7 +906,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1187,7 +1202,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1233,7 +1248,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1289,7 +1304,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1358,12 +1373,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1452,7 +1472,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1497,7 +1517,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1562,7 +1582,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1631,12 +1651,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1716,7 +1741,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1761,7 +1786,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1826,7 +1851,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1895,12 +1920,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1979,7 +2009,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 592696f7da..aebf35d1f1 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -159,7 +159,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -319,7 +321,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -472,7 +476,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -703,7 +709,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -863,7 +871,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1016,7 +1026,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index f8588178ed..f07b4d8c4a 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -89,6 +89,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -130,7 +131,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -174,6 +175,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -255,6 +274,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -295,7 +315,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -346,6 +366,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -420,6 +458,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -460,7 +499,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -511,6 +550,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -584,6 +641,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -613,6 +671,7 @@ def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -903,6 +962,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -944,7 +1004,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -988,6 +1048,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -1069,6 +1147,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1109,7 +1188,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1160,6 +1239,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1234,6 +1331,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1274,7 +1372,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1325,6 +1423,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1398,6 +1514,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1427,6 +1544,7 @@ async def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index cb812a2702..3a235f89a5 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -59,8 +59,26 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
index b902f2667f..25caada177 100644
--- a/src/openai/types/chat/chat_completion_audio_param.py
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -9,7 +9,7 @@
 
 
 class ChatCompletionAudioParam(TypedDict, total=False):
-    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
     """Specifies the output audio format.
 
     Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
@@ -22,6 +22,6 @@ class ChatCompletionAudioParam(TypedDict, total=False):
     ]
     """The voice the model uses to respond.
 
-    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and
-    `shimmer`.
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+    `onyx`, `sage`, and `shimmer`.
     """
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 31b9cb5456..6fe996dd95 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -128,8 +128,26 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 05103fba91..60d5f53cdd 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -45,7 +45,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[Union[str, ChatModel]]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -123,7 +123,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     This value is now deprecated in favor of `max_completion_tokens`, and is not
     compatible with
-    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+    [o-series models](https://platform.openai.com/docs/guides/reasoning).
     """
 
     metadata: Optional[Metadata]
@@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    service_tier: Optional[Literal["auto", "default"]]
+    service_tier: Optional[Literal["auto", "default", "flex"]]
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:
@@ -220,6 +220,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
       latency guarentee.
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
     - When not set, the default behavior is 'auto'.
 
     When this parameter is set, the response body will include the `service_tier`
@@ -227,9 +230,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     store: Optional[bool]
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index fdb1680d26..6ae20cff83 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -120,9 +120,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
index 8cd1e01144..254f7e204b 100644
--- a/src/openai/types/responses/response.py
+++ b/src/openai/types/responses/response.py
@@ -62,7 +62,7 @@ class Response(BaseModel):
     """
 
     model: ResponsesModel
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -149,6 +149,27 @@ class Response(BaseModel):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     status: Optional[ResponseStatus] = None
     """The status of the response generation.
 
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
index ed82e678e5..3c0a9d7b8a 100644
--- a/src/openai/types/responses/response_create_params.py
+++ b/src/openai/types/responses/response_create_params.py
@@ -38,7 +38,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[ResponsesModel]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -102,6 +102,27 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]]
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     store: Optional[bool]
     """Whether to store the generated model response for later retrieval via API."""
 
diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py
index 30878b4347..4869cd325c 100644
--- a/src/openai/types/shared/chat_model.py
+++ b/src/openai/types/shared/chat_model.py
@@ -11,6 +11,10 @@
     "gpt-4.1-2025-04-14",
     "gpt-4.1-mini-2025-04-14",
     "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
index 78a396d738..107aab2e4a 100644
--- a/src/openai/types/shared/reasoning.py
+++ b/src/openai/types/shared/reasoning.py
@@ -19,10 +19,17 @@ class Reasoning(BaseModel):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]] = None
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py
index f606beb693..99e082fc11 100644
--- a/src/openai/types/shared_params/chat_model.py
+++ b/src/openai/types/shared_params/chat_model.py
@@ -13,6 +13,10 @@
     "gpt-4.1-2025-04-14",
     "gpt-4.1-mini-2025-04-14",
     "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
index 2953b895c4..73e1a008df 100644
--- a/src/openai/types/shared_params/reasoning.py
+++ b/src/openai/types/shared_params/reasoning.py
@@ -20,10 +20,17 @@ class Reasoning(TypedDict, total=False):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]]
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]]
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]]
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index e45a5becf3..3753af8fdb 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -38,8 +38,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -116,8 +118,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},
@@ -280,8 +284,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -358,8 +364,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},