diff --git a/.stats.yml b/.stats.yml
index c550abf3c6..284caebf44 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 51
+configured_endpoints: 52
diff --git a/api.md b/api.md
index dbc95cd0b4..cc3c91a8d5 100644
--- a/api.md
+++ b/api.md
@@ -159,16 +159,34 @@ Methods:
 Types:
 
 ```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+    FineTuningJobIntegration,
+    FineTuningJobWandbIntegration,
+    FineTuningJobWandbIntegrationObject,
+)
+```
+
+Methods:
+
+- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
 ```
 
 Methods:
 
-- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
-- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="./src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
 # Beta
 
@@ -220,7 +238,15 @@ Methods:
 Types:
 
 ```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+    AssistantResponseFormat,
+    AssistantResponseFormatOption,
+    AssistantToolChoice,
+    AssistantToolChoiceFunction,
+    AssistantToolChoiceOption,
+    Thread,
+    ThreadDeleted,
+)
 ```
 
 Methods:
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants/assistants.py
index 232451ab25..9e88794ebc 100644
--- a/src/openai/resources/beta/assistants/assistants.py
+++ b/src/openai/resources/beta/assistants/assistants.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
@@ -57,7 +57,29 @@ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
     def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -87,7 +109,7 @@ def create(
               attached to this assistant. There can be a maximum of 20 files attached to the
               assistant. Files are ordered by their creation date in ascending order.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -194,7 +216,7 @@ def update(
               file was previously attached to the list but does not show up in the list, it
               will be deleted from the assistant.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -360,7 +382,29 @@ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
     async def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,7 +434,7 @@ async def create(
               attached to this assistant. There can be a maximum of 20 files attached to the
               assistant. Files are ordered by their creation date in ascending order.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -497,7 +541,7 @@ async def update(
               file was previously attached to the list but does not show up in the list, it
               will be deleted from the assistant.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 8576a5c09a..9fa7239c0b 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -4,7 +4,7 @@
 
 import time
 import typing_extensions
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
 from functools import partial
 from typing_extensions import Literal
 
@@ -31,7 +31,12 @@
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from .....types.beta import AssistantToolParam, AssistantStreamEvent
+from .....types.beta import (
+    AssistantToolParam,
+    AssistantStreamEvent,
+    AssistantToolChoiceOptionParam,
+    AssistantResponseFormatOptionParam,
+)
 from ....._base_client import (
     AsyncPaginator,
     make_request_options,
@@ -77,11 +82,40 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -107,6 +141,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -117,6 +163,21 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -125,6 +186,13 @@ def create(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -148,10 +216,39 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -181,6 +278,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -191,10 +300,32 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -218,10 +349,39 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -251,6 +411,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -261,10 +433,32 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -287,11 +481,40 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -310,11 +533,16 @@ def create(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -518,10 +746,39 @@ def create_and_poll(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,12 +799,17 @@ def create_and_poll(
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
+            tool_choice=tool_choice,
             # We assume we are not streaming when polling
             stream=False,
             tools=tools,
+            truncation_strategy=truncation_strategy,
             extra_headers=extra_headers,
             extra_query=extra_query,
             extra_body=extra_body,
@@ -572,10 +834,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -596,10 +887,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -620,10 +940,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -652,11 +1001,16 @@ def create_and_stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -722,10 +1076,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -745,10 +1128,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -768,10 +1180,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -800,11 +1241,16 @@ def stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1123,11 +1569,40 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1153,6 +1628,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1163,6 +1650,21 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -1171,6 +1673,13 @@ async def create(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1194,10 +1703,39 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1227,6 +1765,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1237,10 +1787,32 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1264,10 +1836,39 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1297,6 +1898,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1307,10 +1920,32 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1333,11 +1968,40 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1356,11 +2020,16 @@ async def create(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1564,10 +2233,39 @@ async def create_and_poll(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1588,12 +2286,17 @@ async def create_and_poll(
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
+            tool_choice=tool_choice,
             # We assume we are not streaming when polling
             stream=False,
             tools=tools,
+            truncation_strategy=truncation_strategy,
             extra_headers=extra_headers,
             extra_query=extra_query,
             extra_body=extra_body,
@@ -1618,10 +2321,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1642,10 +2374,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1666,10 +2427,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1700,11 +2490,16 @@ def create_and_stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1770,10 +2565,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1793,10 +2617,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1816,10 +2669,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1850,11 +2732,16 @@ def stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 3509267d4f..9c2e2f0043 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
 from functools import partial
 from typing_extensions import Literal
 
@@ -40,6 +40,8 @@
     Thread,
     ThreadDeleted,
     AssistantStreamEvent,
+    AssistantToolChoiceOptionParam,
+    AssistantResponseFormatOptionParam,
     thread_create_params,
     thread_update_params,
     thread_create_and_run_params,
@@ -241,12 +243,41 @@ def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -265,6 +296,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -275,6 +318,21 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -285,6 +343,13 @@ def create_and_run(
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -305,11 +370,40 @@ def create_and_run(
         assistant_id: str,
         stream: Literal[True],
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -332,6 +426,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -342,12 +448,34 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -368,11 +496,40 @@ def create_and_run(
         assistant_id: str,
         stream: bool,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -395,6 +552,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -405,12 +574,34 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -430,12 +621,41 @@ def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -450,12 +670,17 @@ def create_and_run(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
                     "thread": thread,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -472,11 +697,40 @@ def create_and_run_poll(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -493,11 +747,16 @@ def create_and_run_poll(
         run = self.create_and_run(
             assistant_id=assistant_id,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
             stream=False,
             thread=thread,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
             tools=tools,
             extra_headers=extra_headers,
             extra_query=extra_query,
@@ -512,11 +771,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -533,11 +821,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -554,11 +871,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -581,12 +927,17 @@ def create_and_run_stream(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -780,12 +1131,41 @@ async def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -804,6 +1184,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -814,6 +1206,21 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -824,6 +1231,13 @@ async def create_and_run(
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -844,11 +1258,40 @@ async def create_and_run(
         assistant_id: str,
         stream: Literal[True],
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -871,6 +1314,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -881,12 +1336,34 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -907,11 +1384,40 @@ async def create_and_run(
         assistant_id: str,
         stream: bool,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -934,6 +1440,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -944,12 +1462,34 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -969,12 +1509,41 @@ async def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -989,12 +1558,17 @@ async def create_and_run(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
                     "thread": thread,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -1011,11 +1585,40 @@ async def create_and_run_poll(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1032,11 +1635,16 @@ async def create_and_run_poll(
         run = await self.create_and_run(
             assistant_id=assistant_id,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
             stream=False,
             thread=thread,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
             tools=tools,
             extra_headers=extra_headers,
             extra_query=extra_query,
@@ -1053,11 +1661,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1074,11 +1711,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1095,11 +1761,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1124,12 +1819,17 @@ def create_and_run_stream(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index 3000603689..1a23e7876e 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -50,6 +50,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -137,8 +139,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -240,6 +241,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -334,8 +337,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -430,6 +432,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -524,8 +528,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -620,6 +623,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -717,6 +722,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -804,8 +811,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -907,6 +913,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -1001,8 +1009,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -1097,6 +1104,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -1191,8 +1200,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -1287,6 +1295,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 659b3e8501..0404fed6ec 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -11,6 +11,7 @@
     AsyncJobsWithStreamingResponse,
 )
 from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
 from ..._resource import SyncAPIResource, AsyncAPIResource
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..e9ea6aad9a
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,176 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import FineTuningJobCheckpoint, checkpoint_list_params
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 89%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index a0c3e24dac..229f716c48 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,26 +2,34 @@
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
     maybe_transform,
     async_maybe_transform,
 )
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
     AsyncPaginator,
     make_request_options,
 )
-from ...types.fine_tuning import (
+from ....types.fine_tuning import (
     FineTuningJob,
     FineTuningJobEvent,
     job_list_params,
@@ -33,6 +41,10 @@
 
 
 class Jobs(SyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self)
@@ -47,6 +59,8 @@ def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -82,6 +96,12 @@ def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -116,6 +136,8 @@ def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -294,6 +316,10 @@ def list_events(
 
 
 class AsyncJobs(AsyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self)
@@ -308,6 +334,8 @@ async def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -343,6 +371,12 @@ async def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -377,6 +411,8 @@ async def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -574,6 +610,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithRawResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -595,6 +635,10 @@ def __init__(self, jobs: AsyncJobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class JobsWithStreamingResponse:
     def __init__(self, jobs: Jobs) -> None:
@@ -616,6 +660,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithStreamingResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -636,3 +684,7 @@ def __init__(self, jobs: AsyncJobs) -> None:
         self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index a7de0272b4..0171694587 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -15,9 +15,21 @@
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
 from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
 from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
 from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 32561a9aa8..0a0d28ed01 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -29,7 +29,7 @@ class Assistant(BaseModel):
     instructions: Optional[str] = None
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object] = None
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 8bad323640..011121485f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -2,8 +2,8 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
-from typing_extensions import Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
 
 from .assistant_tool_param import AssistantToolParam
 
@@ -11,7 +11,31 @@
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[str]
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+    ]
     """ID of the model to use.
 
     You can use the
@@ -34,7 +58,7 @@ class AssistantCreateParams(TypedDict, total=False):
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
new file mode 100644
index 0000000000..f53bdaf62a
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+    type: Optional[Literal["text", "json_object"]] = None
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..d4e05e0ea9
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..46e04125d1
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 0000000000..96e1d02115
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..4314d4b41e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+    type: Literal["function", "code_interpreter", "retrieval"]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..87f38310ca
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..0045a5986e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[Literal["none", "auto"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..618e7bff98
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[Literal["none", "auto"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..5cf6ea27be
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+    type: Required[Literal["function", "code_interpreter", "retrieval"]]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 7c96aca8c1..6e9d9ed5db 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -26,7 +26,7 @@ class AssistantUpdateParams(TypedDict, total=False):
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index d4266fc48c..50f947a40a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -8,12 +8,15 @@
 from .function_tool_param import FunctionToolParam
 from .retrieval_tool_param import RetrievalToolParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
     "ThreadCreateAndRunParamsBase",
     "Thread",
     "ThreadMessage",
     "Tool",
+    "TruncationStrategy",
     "ThreadCreateAndRunParamsNonStreaming",
     "ThreadCreateAndRunParamsStreaming",
 ]
@@ -33,6 +36,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -41,7 +62,30 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -49,6 +93,25 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
@@ -59,12 +122,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     thread: Thread
     """If no thread is provided, an empty thread will be created."""
 
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: Optional[Iterable[Tool]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    truncation_strategy: Optional[TruncationStrategy]
+
 
 class ThreadMessage(TypedDict, total=False):
     content: Required[str]
@@ -115,6 +190,23 @@ class Thread(TypedDict, total=False):
 Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
 
 
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
     stream: Optional[Literal[False]]
     """
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 3ab276245f..2efc3c77fa 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,9 +6,28 @@
 from ...._models import BaseModel
 from .run_status import RunStatus
 from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
 
-__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
+__all__ = [
+    "Run",
+    "IncompleteDetails",
+    "LastError",
+    "RequiredAction",
+    "RequiredActionSubmitToolOutputs",
+    "TruncationStrategy",
+    "Usage",
+]
+
+
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+    """The reason why the run is incomplete.
+
+    This will point to which specific token limit was reached over the course of the
+    run.
+    """
 
 
 class LastError(BaseModel):
@@ -32,6 +51,23 @@ class RequiredAction(BaseModel):
     """For now, this is always `submit_tool_outputs`."""
 
 
+class TruncationStrategy(BaseModel):
+    type: Literal["auto", "last_messages"]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int] = None
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class Usage(BaseModel):
     completion_tokens: int
     """Number of completion tokens used over the course of the run."""
@@ -76,6 +112,12 @@ class Run(BaseModel):
     this run.
     """
 
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
+    """
+
     instructions: str
     """
     The instructions that the
@@ -86,6 +128,18 @@ class Run(BaseModel):
     last_error: Optional[LastError] = None
     """The last error associated with this run. Will be `null` if there are no errors."""
 
+    max_completion_tokens: Optional[int] = None
+    """
+    The maximum number of completion tokens specified to have been used over the
+    course of the run.
+    """
+
+    max_prompt_tokens: Optional[int] = None
+    """
+    The maximum number of prompt tokens specified to have been used over the course
+    of the run.
+    """
+
     metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -110,6 +164,25 @@ class Run(BaseModel):
     Will be `null` if no action is required.
     """
 
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     started_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was started."""
 
@@ -126,6 +199,16 @@ class Run(BaseModel):
     that was executed on as a part of this run.
     """
 
+    tool_choice: Optional[AssistantToolChoiceOption] = None
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: List[AssistantTool]
     """
     The list of tools that the
@@ -133,6 +216,8 @@ class Run(BaseModel):
     this run.
     """
 
+    truncation_strategy: Optional[TruncationStrategy] = None
+
     usage: Optional[Usage] = None
     """Usage statistics related to the run.
 
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index e9bc19d980..9f2d4ba18b 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -6,8 +6,16 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from ..assistant_tool_param import AssistantToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
-__all__ = ["RunCreateParamsBase", "AdditionalMessage", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
+__all__ = [
+    "RunCreateParamsBase",
+    "AdditionalMessage",
+    "TruncationStrategy",
+    "RunCreateParamsNonStreaming",
+    "RunCreateParamsStreaming",
+]
 
 
 class RunCreateParamsBase(TypedDict, total=False):
@@ -35,6 +43,24 @@ class RunCreateParamsBase(TypedDict, total=False):
     of the assistant. This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -43,7 +69,30 @@ class RunCreateParamsBase(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -51,6 +100,25 @@ class RunCreateParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
@@ -58,12 +126,24 @@ class RunCreateParamsBase(TypedDict, total=False):
     0.2 will make it more focused and deterministic.
     """
 
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    truncation_strategy: Optional[TruncationStrategy]
+
 
 class AdditionalMessage(TypedDict, total=False):
     content: Required[str]
@@ -95,6 +175,23 @@ class AdditionalMessage(TypedDict, total=False):
     """
 
 
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class RunCreateParamsNonStreaming(RunCreateParamsBase):
     stream: Optional[Literal[False]]
     """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index ab6a747021..1e0f7f8195 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -32,6 +32,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
         Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -102,8 +104,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """Whether to return log probabilities of the output tokens or not.
 
     If true, returns the log probabilities of each output token returned in the
-    `content` of `message`. This option is currently not available on the
-    `gpt-4-vision-preview` model.
+    `content` of `message`.
     """
 
     max_tokens: Optional[int]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index 0bb2b90438..92b81329b1 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -7,3 +7,8 @@
 from .job_create_params import JobCreateParams as JobCreateParams
 from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
 from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 23fe96d1a0..1593bf50c7 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
 __all__ = ["FineTuningJob", "Error", "Hyperparameters"]
 
@@ -80,6 +81,9 @@ class FineTuningJob(BaseModel):
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
 
+    seed: int
+    """The seed used for the fine-tuning job."""
+
     status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
     """
     The current status of the fine-tuning job, which can be either
@@ -105,3 +109,6 @@ class FineTuningJob(BaseModel):
     You can retrieve the validation results with the
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..8076313cae
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 79e0b67e13..892c737fa3 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -2,10 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters"]
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
 
 
 class JobCreateParams(TypedDict, total=False):
@@ -32,6 +32,17 @@ class JobCreateParams(TypedDict, total=False):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job."""
 
+    integrations: Optional[Iterable[Integration]]
+    """A list of integrations to enable for your fine-tuning job."""
+
+    seed: Optional[int]
+    """The seed controls the reproducibility of the job.
+
+    Passing in the same seed and job parameters should produce the same results, but
+    may differ in rare cases. If a seed is not specified, one will be generated for
+    you.
+    """
+
     suffix: Optional[str]
     """
     A string of up to 18 characters that will be added to your fine-tuned model
@@ -76,3 +87,45 @@ class Hyperparameters(TypedDict, total=False):
 
     An epoch refers to one full cycle through the training dataset.
     """
+
+
+class IntegrationWandb(TypedDict, total=False):
+    project: Required[str]
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str]
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str]
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: List[str]
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class Integration(TypedDict, total=False):
+    type: Required[Literal["wandb"]]
+    """The type of integration to enable.
+
+    Currently, only "wandb" (Weights and Biases) is supported.
+    """
+
+    wandb: Required[IntegrationWandb]
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last checkpoint ID from the previous pagination request."""
+
+    limit: int
+    """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+    full_valid_loss: Optional[float] = None
+
+    full_valid_mean_token_accuracy: Optional[float] = None
+
+    step: Optional[float] = None
+
+    train_loss: Optional[float] = None
+
+    train_mean_token_accuracy: Optional[float] = None
+
+    valid_loss: Optional[float] = None
+
+    valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+    id: str
+    """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+    fine_tuned_model_checkpoint: str
+    """The name of the fine-tuned checkpoint model that is created."""
+
+    fine_tuning_job_id: str
+    """The name of the fine-tuning job that this checkpoint was created from."""
+
+    metrics: Metrics
+    """Metrics at the step number during the fine-tuning job."""
+
+    object: Literal["fine_tuning.job.checkpoint"]
+    """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+    step_number: int
+    """The step number that the checkpoint was created at."""
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 6edbe4b491..a509627b8e 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,14 +24,14 @@ class TestAssistants:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
             file_ids=["string", "string", "string"],
             instructions="string",
@@ -44,7 +44,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -55,7 +55,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -235,14 +235,14 @@ class TestAsyncAssistants:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
             file_ids=["string", "string", "string"],
             instructions="string",
@@ -255,7 +255,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -266,7 +266,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index fd3f7c5102..7c07251433 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -207,8 +207,11 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
             thread={
@@ -234,7 +237,12 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
@@ -276,8 +284,11 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
             assistant_id="string",
             stream=True,
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
             thread={
                 "messages": [
@@ -302,7 +313,12 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         thread_stream.response.close()
 
@@ -521,8 +537,11 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
             thread={
@@ -548,7 +567,12 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
@@ -590,8 +614,11 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
             assistant_id="string",
             stream=True,
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
             thread={
                 "messages": [
@@ -616,7 +643,12 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         await thread_stream.response.aclose()
 
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 271bcccdd3..cf5b2998b9 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -57,11 +57,19 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
@@ -136,10 +144,18 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         run_stream.response.close()
 
@@ -553,11 +569,19 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
@@ -632,10 +656,18 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         await run_stream.response.aclose()
 
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index bb0658f3d9..c54b56a37d 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -40,7 +40,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -102,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -119,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         completion_stream.response.close()
@@ -153,7 +153,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -215,7 +215,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -232,7 +232,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -256,7 +256,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -270,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -332,7 +332,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -349,7 +349,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -368,7 +368,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -383,7 +383,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -445,7 +445,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -462,7 +462,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..915d5c6f63
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCheckpoints:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
+
+
+class TestAsyncCheckpoints:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index f4974ebbcd..1ff6d63b31 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -39,6 +39,36 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
@@ -248,6 +278,36 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )