diff --git a/.stats.yml b/.stats.yml
index c550abf3c6..284caebf44 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 51
+configured_endpoints: 52
diff --git a/api.md b/api.md
index dbc95cd0b4..cc3c91a8d5 100644
--- a/api.md
+++ b/api.md
@@ -159,16 +159,34 @@ Methods:
Types:
```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+ FineTuningJob,
+ FineTuningJobEvent,
+ FineTuningJobIntegration,
+ FineTuningJobWandbIntegration,
+ FineTuningJobWandbIntegrationObject,
+)
+```
+
+Methods:
+
+- client.fine_tuning.jobs.create(\*\*params) -> FineTuningJob
+- client.fine_tuning.jobs.retrieve(fine_tuning_job_id) -> FineTuningJob
+- client.fine_tuning.jobs.list(\*\*params) -> SyncCursorPage[FineTuningJob]
+- client.fine_tuning.jobs.cancel(fine_tuning_job_id) -> FineTuningJob
+- client.fine_tuning.jobs.list_events(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobEvent]
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
```
Methods:
-- client.fine_tuning.jobs.create(\*\*params) -> FineTuningJob
-- client.fine_tuning.jobs.retrieve(fine_tuning_job_id) -> FineTuningJob
-- client.fine_tuning.jobs.list(\*\*params) -> SyncCursorPage[FineTuningJob]
-- client.fine_tuning.jobs.cancel(fine_tuning_job_id) -> FineTuningJob
-- client.fine_tuning.jobs.list_events(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobEvent]
+- client.fine_tuning.jobs.checkpoints.list(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobCheckpoint]
# Beta
@@ -220,7 +238,15 @@ Methods:
Types:
```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+ AssistantResponseFormat,
+ AssistantResponseFormatOption,
+ AssistantToolChoice,
+ AssistantToolChoiceFunction,
+ AssistantToolChoiceOption,
+ Thread,
+ ThreadDeleted,
+)
```
Methods:
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants/assistants.py
index 232451ab25..9e88794ebc 100644
--- a/src/openai/resources/beta/assistants/assistants.py
+++ b/src/openai/resources/beta/assistants/assistants.py
@@ -2,7 +2,7 @@
from __future__ import annotations
-from typing import List, Iterable, Optional
+from typing import List, Union, Iterable, Optional
from typing_extensions import Literal
import httpx
@@ -57,7 +57,29 @@ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
def create(
self,
*,
- model: str,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ],
description: Optional[str] | NotGiven = NOT_GIVEN,
file_ids: List[str] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -87,7 +109,7 @@ def create(
attached to this assistant. There can be a maximum of 20 files attached to the
assistant. Files are ordered by their creation date in ascending order.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -194,7 +216,7 @@ def update(
file was previously attached to the list but does not show up in the list, it
will be deleted from the assistant.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -360,7 +382,29 @@ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
async def create(
self,
*,
- model: str,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ],
description: Optional[str] | NotGiven = NOT_GIVEN,
file_ids: List[str] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,7 +434,7 @@ async def create(
attached to this assistant. There can be a maximum of 20 files attached to the
assistant. Files are ordered by their creation date in ascending order.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -497,7 +541,7 @@ async def update(
file was previously attached to the list but does not show up in the list, it
will be deleted from the assistant.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 8576a5c09a..9fa7239c0b 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -4,7 +4,7 @@
import time
import typing_extensions
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
from functools import partial
from typing_extensions import Literal
@@ -31,7 +31,12 @@
from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
from ....._streaming import Stream, AsyncStream
from .....pagination import SyncCursorPage, AsyncCursorPage
-from .....types.beta import AssistantToolParam, AssistantStreamEvent
+from .....types.beta import (
+ AssistantToolParam,
+ AssistantStreamEvent,
+ AssistantToolChoiceOptionParam,
+ AssistantResponseFormatOptionParam,
+)
from ....._base_client import (
AsyncPaginator,
make_request_options,
@@ -77,11 +82,40 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -107,6 +141,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -117,6 +163,21 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -125,6 +186,13 @@ def create(
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -148,10 +216,39 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -181,6 +278,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -191,10 +300,32 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -218,10 +349,39 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -251,6 +411,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -261,10 +433,32 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -287,11 +481,40 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -310,11 +533,16 @@ def create(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -518,10 +746,39 @@ def create_and_poll(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,12 +799,17 @@ def create_and_poll(
additional_instructions=additional_instructions,
additional_messages=additional_messages,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
+ tool_choice=tool_choice,
# We assume we are not streaming when polling
stream=False,
tools=tools,
+ truncation_strategy=truncation_strategy,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
@@ -572,10 +834,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -596,10 +887,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -620,10 +940,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -652,11 +1001,16 @@ def create_and_stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -722,10 +1076,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -745,10 +1128,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -768,10 +1180,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -800,11 +1241,16 @@ def stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1123,11 +1569,40 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1153,6 +1628,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1163,6 +1650,21 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -1171,6 +1673,13 @@ async def create(
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1194,10 +1703,39 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1227,6 +1765,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1237,10 +1787,32 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1264,10 +1836,39 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1297,6 +1898,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1307,10 +1920,32 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1333,11 +1968,40 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1356,11 +2020,16 @@ async def create(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1564,10 +2233,39 @@ async def create_and_poll(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1588,12 +2286,17 @@ async def create_and_poll(
additional_instructions=additional_instructions,
additional_messages=additional_messages,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
+ tool_choice=tool_choice,
# We assume we are not streaming when polling
stream=False,
tools=tools,
+ truncation_strategy=truncation_strategy,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
@@ -1618,10 +2321,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1642,10 +2374,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1666,10 +2427,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1700,11 +2490,16 @@ def create_and_stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1770,10 +2565,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1793,10 +2617,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1816,10 +2669,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1850,11 +2732,16 @@ def stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 3509267d4f..9c2e2f0043 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,7 +2,7 @@
from __future__ import annotations
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
from functools import partial
from typing_extensions import Literal
@@ -40,6 +40,8 @@
Thread,
ThreadDeleted,
AssistantStreamEvent,
+ AssistantToolChoiceOptionParam,
+ AssistantResponseFormatOptionParam,
thread_create_params,
thread_update_params,
thread_create_and_run_params,
@@ -241,12 +243,41 @@ def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -265,6 +296,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -275,6 +318,21 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -285,6 +343,13 @@ def create_and_run(
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -305,11 +370,40 @@ def create_and_run(
assistant_id: str,
stream: Literal[True],
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -332,6 +426,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -342,12 +448,34 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -368,11 +496,40 @@ def create_and_run(
assistant_id: str,
stream: bool,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -395,6 +552,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -405,12 +574,34 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -430,12 +621,41 @@ def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -450,12 +670,17 @@ def create_and_run(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
"thread": thread,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -472,11 +697,40 @@ def create_and_run_poll(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -493,11 +747,16 @@ def create_and_run_poll(
run = self.create_and_run(
assistant_id=assistant_id,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
stream=False,
thread=thread,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
tools=tools,
extra_headers=extra_headers,
extra_query=extra_query,
@@ -512,11 +771,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -533,11 +821,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -554,11 +871,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -581,12 +927,17 @@ def create_and_run_stream(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"thread": thread,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -780,12 +1131,41 @@ async def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -804,6 +1184,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -814,6 +1206,21 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -824,6 +1231,13 @@ async def create_and_run(
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -844,11 +1258,40 @@ async def create_and_run(
assistant_id: str,
stream: Literal[True],
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -871,6 +1314,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -881,12 +1336,34 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -907,11 +1384,40 @@ async def create_and_run(
assistant_id: str,
stream: bool,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -934,6 +1440,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -944,12 +1462,34 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -969,12 +1509,41 @@ async def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -989,12 +1558,17 @@ async def create_and_run(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
"thread": thread,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -1011,11 +1585,40 @@ async def create_and_run_poll(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1032,11 +1635,16 @@ async def create_and_run_poll(
run = await self.create_and_run(
assistant_id=assistant_id,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
stream=False,
thread=thread,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
tools=tools,
extra_headers=extra_headers,
extra_query=extra_query,
@@ -1053,11 +1661,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1074,11 +1711,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1095,11 +1761,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1124,12 +1819,17 @@ def create_and_run_stream(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"thread": thread,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index 3000603689..1a23e7876e 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -50,6 +50,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -137,8 +139,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -240,6 +241,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -334,8 +337,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -430,6 +432,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -524,8 +528,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -620,6 +623,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -717,6 +722,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -804,8 +811,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -907,6 +913,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -1001,8 +1009,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -1097,6 +1104,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -1191,8 +1200,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -1287,6 +1295,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 659b3e8501..0404fed6ec 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -11,6 +11,7 @@
AsyncJobsWithStreamingResponse,
)
from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
from ..._resource import SyncAPIResource, AsyncAPIResource
__all__ = ["FineTuning", "AsyncFineTuning"]
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+ Jobs,
+ AsyncJobs,
+ JobsWithRawResponse,
+ AsyncJobsWithRawResponse,
+ JobsWithStreamingResponse,
+ AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+ "Checkpoints",
+ "AsyncCheckpoints",
+ "CheckpointsWithRawResponse",
+ "AsyncCheckpointsWithRawResponse",
+ "CheckpointsWithStreamingResponse",
+ "AsyncCheckpointsWithStreamingResponse",
+ "Jobs",
+ "AsyncJobs",
+ "JobsWithRawResponse",
+ "AsyncJobsWithRawResponse",
+ "JobsWithStreamingResponse",
+ "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..e9ea6aad9a
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,176 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+ AsyncPaginator,
+ make_request_options,
+)
+from ....types.fine_tuning.jobs import FineTuningJobCheckpoint, checkpoint_list_params
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> CheckpointsWithRawResponse:
+ return CheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+ return CheckpointsWithStreamingResponse(self)
+
+ def list(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+ """
+ List checkpoints for a fine-tuning job.
+
+ Args:
+ after: Identifier for the last checkpoint ID from the previous pagination request.
+
+ limit: Number of checkpoints to retrieve.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._get_api_list(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+ page=SyncCursorPage[FineTuningJobCheckpoint],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ },
+ checkpoint_list_params.CheckpointListParams,
+ ),
+ ),
+ model=FineTuningJobCheckpoint,
+ )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+ return AsyncCheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+ return AsyncCheckpointsWithStreamingResponse(self)
+
+ def list(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+ """
+ List checkpoints for a fine-tuning job.
+
+ Args:
+ after: Identifier for the last checkpoint ID from the previous pagination request.
+
+ limit: Number of checkpoints to retrieve.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._get_api_list(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+ page=AsyncCursorPage[FineTuningJobCheckpoint],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ },
+ checkpoint_list_params.CheckpointListParams,
+ ),
+ ),
+ model=FineTuningJobCheckpoint,
+ )
+
+
+class CheckpointsWithRawResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = _legacy_response.to_raw_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class AsyncCheckpointsWithRawResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class CheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = to_streamed_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = async_to_streamed_response_wrapper(
+ checkpoints.list,
+ )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 89%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index a0c3e24dac..229f716c48 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,26 +2,34 @@
from __future__ import annotations
-from typing import Union, Optional
+from typing import Union, Iterable, Optional
from typing_extensions import Literal
import httpx
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
maybe_transform,
async_maybe_transform,
)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
+from ...._compat import cached_property
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
AsyncPaginator,
make_request_options,
)
-from ...types.fine_tuning import (
+from ....types.fine_tuning import (
FineTuningJob,
FineTuningJobEvent,
job_list_params,
@@ -33,6 +41,10 @@
class Jobs(SyncAPIResource):
+ @cached_property
+ def checkpoints(self) -> Checkpoints:
+ return Checkpoints(self._client)
+
@cached_property
def with_raw_response(self) -> JobsWithRawResponse:
return JobsWithRawResponse(self)
@@ -47,6 +59,8 @@ def create(
model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+ integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -82,6 +96,12 @@ def create(
hyperparameters: The hyperparameters used for the fine-tuning job.
+ integrations: A list of integrations to enable for your fine-tuning job.
+
+ seed: The seed controls the reproducibility of the job. Passing in the same seed and
+ job parameters should produce the same results, but may differ in rare cases. If
+ a seed is not specified, one will be generated for you.
+
suffix: A string of up to 18 characters that will be added to your fine-tuned model
name.
@@ -116,6 +136,8 @@ def create(
"model": model,
"training_file": training_file,
"hyperparameters": hyperparameters,
+ "integrations": integrations,
+ "seed": seed,
"suffix": suffix,
"validation_file": validation_file,
},
@@ -294,6 +316,10 @@ def list_events(
class AsyncJobs(AsyncAPIResource):
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpoints:
+ return AsyncCheckpoints(self._client)
+
@cached_property
def with_raw_response(self) -> AsyncJobsWithRawResponse:
return AsyncJobsWithRawResponse(self)
@@ -308,6 +334,8 @@ async def create(
model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+ integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -343,6 +371,12 @@ async def create(
hyperparameters: The hyperparameters used for the fine-tuning job.
+ integrations: A list of integrations to enable for your fine-tuning job.
+
+ seed: The seed controls the reproducibility of the job. Passing in the same seed and
+ job parameters should produce the same results, but may differ in rare cases. If
+ a seed is not specified, one will be generated for you.
+
suffix: A string of up to 18 characters that will be added to your fine-tuned model
name.
@@ -377,6 +411,8 @@ async def create(
"model": model,
"training_file": training_file,
"hyperparameters": hyperparameters,
+ "integrations": integrations,
+ "seed": seed,
"suffix": suffix,
"validation_file": validation_file,
},
@@ -574,6 +610,10 @@ def __init__(self, jobs: Jobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithRawResponse:
+ return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
class AsyncJobsWithRawResponse:
def __init__(self, jobs: AsyncJobs) -> None:
@@ -595,6 +635,10 @@ def __init__(self, jobs: AsyncJobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+ return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
class JobsWithStreamingResponse:
def __init__(self, jobs: Jobs) -> None:
@@ -616,6 +660,10 @@ def __init__(self, jobs: Jobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithStreamingResponse:
+ return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
class AsyncJobsWithStreamingResponse:
def __init__(self, jobs: AsyncJobs) -> None:
@@ -636,3 +684,7 @@ def __init__(self, jobs: AsyncJobs) -> None:
self.list_events = async_to_streamed_response_wrapper(
jobs.list_events,
)
+
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+ return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index a7de0272b4..0171694587 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -15,9 +15,21 @@
from .thread_create_params import ThreadCreateParams as ThreadCreateParams
from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+ AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 32561a9aa8..0a0d28ed01 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -29,7 +29,7 @@ class Assistant(BaseModel):
instructions: Optional[str] = None
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object] = None
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 8bad323640..011121485f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -2,8 +2,8 @@
from __future__ import annotations
-from typing import List, Iterable, Optional
-from typing_extensions import Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
from .assistant_tool_param import AssistantToolParam
@@ -11,7 +11,31 @@
class AssistantCreateParams(TypedDict, total=False):
- model: Required[str]
+ model: Required[
+ Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ]
+ ]
"""ID of the model to use.
You can use the
@@ -34,7 +58,7 @@ class AssistantCreateParams(TypedDict, total=False):
instructions: Optional[str]
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object]
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
new file mode 100644
index 0000000000..f53bdaf62a
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+ type: Optional[Literal["text", "json_object"]] = None
+ """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..d4e05e0ea9
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..46e04125d1
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 0000000000..96e1d02115
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+ type: Literal["text", "json_object"]
+ """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..4314d4b41e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+ type: Literal["function", "code_interpreter", "retrieval"]
+ """The type of the tool. If type is `function`, the function name must be set"""
+
+ function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..87f38310ca
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+ name: str
+ """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+ name: Required[str]
+ """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..0045a5986e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[Literal["none", "auto"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..618e7bff98
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[Literal["none", "auto"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..5cf6ea27be
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+ type: Required[Literal["function", "code_interpreter", "retrieval"]]
+ """The type of the tool. If type is `function`, the function name must be set"""
+
+ function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 7c96aca8c1..6e9d9ed5db 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -26,7 +26,7 @@ class AssistantUpdateParams(TypedDict, total=False):
instructions: Optional[str]
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index d4266fc48c..50f947a40a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -8,12 +8,15 @@
from .function_tool_param import FunctionToolParam
from .retrieval_tool_param import RetrievalToolParam
from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
__all__ = [
"ThreadCreateAndRunParamsBase",
"Thread",
"ThreadMessage",
"Tool",
+ "TruncationStrategy",
"ThreadCreateAndRunParamsNonStreaming",
"ThreadCreateAndRunParamsStreaming",
]
@@ -33,6 +36,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
This is useful for modifying the behavior on a per-run basis.
"""
+ max_completion_tokens: Optional[int]
+ """
+ The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
+ max_prompt_tokens: Optional[int]
+ """The maximum number of prompt tokens that may be used over the course of the run.
+
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
metadata: Optional[object]
"""Set of 16 key-value pairs that can be attached to an object.
@@ -41,7 +62,30 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
a maxium of 512 characters long.
"""
- model: Optional[str]
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
"""
The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
be used to execute this run. If a value is provided here, it will override the
@@ -49,6 +93,25 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
assistant will be used.
"""
+ response_format: Optional[AssistantResponseFormatOptionParam]
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
temperature: Optional[float]
"""What sampling temperature to use, between 0 and 2.
@@ -59,12 +122,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
thread: Thread
"""If no thread is provided, an empty thread will be created."""
+ tool_choice: Optional[AssistantToolChoiceOptionParam]
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: Optional[Iterable[Tool]]
"""Override the tools the assistant can use for this run.
This is useful for modifying the behavior on a per-run basis.
"""
+ truncation_strategy: Optional[TruncationStrategy]
+
class ThreadMessage(TypedDict, total=False):
content: Required[str]
@@ -115,6 +190,23 @@ class Thread(TypedDict, total=False):
Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
+class TruncationStrategy(TypedDict, total=False):
+ type: Required[Literal["auto", "last_messages"]]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int]
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
stream: Optional[Literal[False]]
"""
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 3ab276245f..2efc3c77fa 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,9 +6,28 @@
from ...._models import BaseModel
from .run_status import RunStatus
from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
from .required_action_function_tool_call import RequiredActionFunctionToolCall
-__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
+__all__ = [
+ "Run",
+ "IncompleteDetails",
+ "LastError",
+ "RequiredAction",
+ "RequiredActionSubmitToolOutputs",
+ "TruncationStrategy",
+ "Usage",
+]
+
+
+class IncompleteDetails(BaseModel):
+ reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+ """The reason why the run is incomplete.
+
+ This will point to which specific token limit was reached over the course of the
+ run.
+ """
class LastError(BaseModel):
@@ -32,6 +51,23 @@ class RequiredAction(BaseModel):
"""For now, this is always `submit_tool_outputs`."""
+class TruncationStrategy(BaseModel):
+ type: Literal["auto", "last_messages"]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int] = None
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class Usage(BaseModel):
completion_tokens: int
"""Number of completion tokens used over the course of the run."""
@@ -76,6 +112,12 @@ class Run(BaseModel):
this run.
"""
+ incomplete_details: Optional[IncompleteDetails] = None
+ """Details on why the run is incomplete.
+
+ Will be `null` if the run is not incomplete.
+ """
+
instructions: str
"""
The instructions that the
@@ -86,6 +128,18 @@ class Run(BaseModel):
last_error: Optional[LastError] = None
"""The last error associated with this run. Will be `null` if there are no errors."""
+ max_completion_tokens: Optional[int] = None
+ """
+ The maximum number of completion tokens specified to have been used over the
+ course of the run.
+ """
+
+ max_prompt_tokens: Optional[int] = None
+ """
+ The maximum number of prompt tokens specified to have been used over the course
+ of the run.
+ """
+
metadata: Optional[object] = None
"""Set of 16 key-value pairs that can be attached to an object.
@@ -110,6 +164,25 @@ class Run(BaseModel):
Will be `null` if no action is required.
"""
+ response_format: Optional[AssistantResponseFormatOption] = None
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
started_at: Optional[int] = None
"""The Unix timestamp (in seconds) for when the run was started."""
@@ -126,6 +199,16 @@ class Run(BaseModel):
that was executed on as a part of this run.
"""
+ tool_choice: Optional[AssistantToolChoiceOption] = None
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: List[AssistantTool]
"""
The list of tools that the
@@ -133,6 +216,8 @@ class Run(BaseModel):
this run.
"""
+ truncation_strategy: Optional[TruncationStrategy] = None
+
usage: Optional[Usage] = None
"""Usage statistics related to the run.
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index e9bc19d980..9f2d4ba18b 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -6,8 +6,16 @@
from typing_extensions import Literal, Required, TypedDict
from ..assistant_tool_param import AssistantToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
-__all__ = ["RunCreateParamsBase", "AdditionalMessage", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
+__all__ = [
+ "RunCreateParamsBase",
+ "AdditionalMessage",
+ "TruncationStrategy",
+ "RunCreateParamsNonStreaming",
+ "RunCreateParamsStreaming",
+]
class RunCreateParamsBase(TypedDict, total=False):
@@ -35,6 +43,24 @@ class RunCreateParamsBase(TypedDict, total=False):
of the assistant. This is useful for modifying the behavior on a per-run basis.
"""
+ max_completion_tokens: Optional[int]
+ """
+ The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
+ max_prompt_tokens: Optional[int]
+ """The maximum number of prompt tokens that may be used over the course of the run.
+
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
metadata: Optional[object]
"""Set of 16 key-value pairs that can be attached to an object.
@@ -43,7 +69,30 @@ class RunCreateParamsBase(TypedDict, total=False):
a maxium of 512 characters long.
"""
- model: Optional[str]
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
"""
The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
be used to execute this run. If a value is provided here, it will override the
@@ -51,6 +100,25 @@ class RunCreateParamsBase(TypedDict, total=False):
assistant will be used.
"""
+ response_format: Optional[AssistantResponseFormatOptionParam]
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
temperature: Optional[float]
"""What sampling temperature to use, between 0 and 2.
@@ -58,12 +126,24 @@ class RunCreateParamsBase(TypedDict, total=False):
0.2 will make it more focused and deterministic.
"""
+ tool_choice: Optional[AssistantToolChoiceOptionParam]
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: Optional[Iterable[AssistantToolParam]]
"""Override the tools the assistant can use for this run.
This is useful for modifying the behavior on a per-run basis.
"""
+ truncation_strategy: Optional[TruncationStrategy]
+
class AdditionalMessage(TypedDict, total=False):
content: Required[str]
@@ -95,6 +175,23 @@ class AdditionalMessage(TypedDict, total=False):
"""
+class TruncationStrategy(TypedDict, total=False):
+ type: Required[Literal["auto", "last_messages"]]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int]
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class RunCreateParamsNonStreaming(RunCreateParamsBase):
stream: Optional[Literal[False]]
"""
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index ab6a747021..1e0f7f8195 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -32,6 +32,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -102,8 +104,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""Whether to return log probabilities of the output tokens or not.
If true, returns the log probabilities of each output token returned in the
- `content` of `message`. This option is currently not available on the
- `gpt-4-vision-preview` model.
+ `content` of `message`.
"""
max_tokens: Optional[int]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index 0bb2b90438..92b81329b1 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -7,3 +7,8 @@
from .job_create_params import JobCreateParams as JobCreateParams
from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .fine_tuning_job_wandb_integration_object import (
+ FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 23fe96d1a0..1593bf50c7 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,6 +4,7 @@
from typing_extensions import Literal
from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
@@ -80,6 +81,9 @@ class FineTuningJob(BaseModel):
[Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
"""
+ seed: int
+ """The seed used for the fine-tuning job."""
+
status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
"""
The current status of the fine-tuning job, which can be either
@@ -105,3 +109,6 @@ class FineTuningJob(BaseModel):
You can retrieve the validation results with the
[Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
"""
+
+ integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+ """A list of integrations to enable for this fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..8076313cae
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+ project: str
+ """The name of the project that the new run will be created under."""
+
+ entity: Optional[str] = None
+ """The entity to use for the run.
+
+ This allows you to set the team or username of the WandB user that you would
+ like associated with the run. If not set, the default entity for the registered
+ WandB API key is used.
+ """
+
+ name: Optional[str] = None
+ """A display name to set for the run.
+
+ If not set, we will use the Job ID as the name.
+ """
+
+ tags: Optional[List[str]] = None
+ """A list of tags to be attached to the newly created run.
+
+ These tags are passed through directly to WandB. Some default tags are generated
+ by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+ """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+ type: Literal["wandb"]
+ """The type of the integration being enabled for the fine-tuning job"""
+
+ wandb: FineTuningJobWandbIntegration
+ """The settings for your integration with Weights and Biases.
+
+ This payload specifies the project that metrics will be sent to. Optionally, you
+ can set an explicit display name for your run, add tags to your run, and set a
+ default entity (team, username, etc) to be associated with your run.
+ """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 79e0b67e13..892c737fa3 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -2,10 +2,10 @@
from __future__ import annotations
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["JobCreateParams", "Hyperparameters"]
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
class JobCreateParams(TypedDict, total=False):
@@ -32,6 +32,17 @@ class JobCreateParams(TypedDict, total=False):
hyperparameters: Hyperparameters
"""The hyperparameters used for the fine-tuning job."""
+ integrations: Optional[Iterable[Integration]]
+ """A list of integrations to enable for your fine-tuning job."""
+
+ seed: Optional[int]
+ """The seed controls the reproducibility of the job.
+
+ Passing in the same seed and job parameters should produce the same results, but
+ may differ in rare cases. If a seed is not specified, one will be generated for
+ you.
+ """
+
suffix: Optional[str]
"""
A string of up to 18 characters that will be added to your fine-tuned model
@@ -76,3 +87,45 @@ class Hyperparameters(TypedDict, total=False):
An epoch refers to one full cycle through the training dataset.
"""
+
+
+class IntegrationWandb(TypedDict, total=False):
+ project: Required[str]
+ """The name of the project that the new run will be created under."""
+
+ entity: Optional[str]
+ """The entity to use for the run.
+
+ This allows you to set the team or username of the WandB user that you would
+ like associated with the run. If not set, the default entity for the registered
+ WandB API key is used.
+ """
+
+ name: Optional[str]
+ """A display name to set for the run.
+
+ If not set, we will use the Job ID as the name.
+ """
+
+ tags: List[str]
+ """A list of tags to be attached to the newly created run.
+
+ These tags are passed through directly to WandB. Some default tags are generated
+ by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+ """
+
+
+class Integration(TypedDict, total=False):
+ type: Required[Literal["wandb"]]
+ """The type of integration to enable.
+
+ Currently, only "wandb" (Weights and Biases) is supported.
+ """
+
+ wandb: Required[IntegrationWandb]
+ """The settings for your integration with Weights and Biases.
+
+ This payload specifies the project that metrics will be sent to. Optionally, you
+ can set an explicit display name for your run, add tags to your run, and set a
+ default entity (team, username, etc) to be associated with your run.
+ """
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+ after: str
+ """Identifier for the last checkpoint ID from the previous pagination request."""
+
+ limit: int
+ """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+ full_valid_loss: Optional[float] = None
+
+ full_valid_mean_token_accuracy: Optional[float] = None
+
+ step: Optional[float] = None
+
+ train_loss: Optional[float] = None
+
+ train_mean_token_accuracy: Optional[float] = None
+
+ valid_loss: Optional[float] = None
+
+ valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+ id: str
+ """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+ fine_tuned_model_checkpoint: str
+ """The name of the fine-tuned checkpoint model that is created."""
+
+ fine_tuning_job_id: str
+ """The name of the fine-tuning job that this checkpoint was created from."""
+
+ metrics: Metrics
+ """Metrics at the step number during the fine-tuning job."""
+
+ object: Literal["fine_tuning.job.checkpoint"]
+ """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+ step_number: int
+ """The step number that the checkpoint was created at."""
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 6edbe4b491..a509627b8e 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,14 +24,14 @@ class TestAssistants:
@parametrize
def test_method_create(self, client: OpenAI) -> None:
assistant = client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
)
assert_matches_type(Assistant, assistant, path=["response"])
@parametrize
def test_method_create_with_all_params(self, client: OpenAI) -> None:
assistant = client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
description="string",
file_ids=["string", "string", "string"],
instructions="string",
@@ -44,7 +44,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
@parametrize
def test_raw_response_create(self, client: OpenAI) -> None:
response = client.beta.assistants.with_raw_response.create(
- model="string",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -55,7 +55,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
@parametrize
def test_streaming_response_create(self, client: OpenAI) -> None:
with client.beta.assistants.with_streaming_response.create(
- model="string",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -235,14 +235,14 @@ class TestAsyncAssistants:
@parametrize
async def test_method_create(self, async_client: AsyncOpenAI) -> None:
assistant = await async_client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
)
assert_matches_type(Assistant, assistant, path=["response"])
@parametrize
async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
assistant = await async_client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
description="string",
file_ids=["string", "string", "string"],
instructions="string",
@@ -255,7 +255,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
@parametrize
async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
response = await async_client.beta.assistants.with_raw_response.create(
- model="string",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -266,7 +266,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@parametrize
async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
async with async_client.beta.assistants.with_streaming_response.create(
- model="string",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index fd3f7c5102..7c07251433 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -207,8 +207,11 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
thread = client.beta.threads.create_and_run(
assistant_id="string",
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
thread={
@@ -234,7 +237,12 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, thread, path=["response"])
@@ -276,8 +284,11 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
assistant_id="string",
stream=True,
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
thread={
"messages": [
@@ -302,7 +313,12 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
thread_stream.response.close()
@@ -521,8 +537,11 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
thread = await async_client.beta.threads.create_and_run(
assistant_id="string",
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
thread={
@@ -548,7 +567,12 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, thread, path=["response"])
@@ -590,8 +614,11 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
assistant_id="string",
stream=True,
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
thread={
"messages": [
@@ -616,7 +643,12 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
await thread_stream.response.aclose()
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 271bcccdd3..cf5b2998b9 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -57,11 +57,19 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, run, path=["response"])
@@ -136,10 +144,18 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
run_stream.response.close()
@@ -553,11 +569,19 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, run, path=["response"])
@@ -632,10 +656,18 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
await run_stream.response.aclose()
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index bb0658f3d9..c54b56a37d 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -40,7 +40,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
frequency_penalty=-2,
function_call="none",
functions=[
@@ -102,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -119,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
completion_stream.response.close()
@@ -153,7 +153,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
frequency_penalty=-2,
function_call="none",
@@ -215,7 +215,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
@@ -232,7 +232,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
) as response:
assert not response.is_closed
@@ -256,7 +256,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -270,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
frequency_penalty=-2,
function_call="none",
functions=[
@@ -332,7 +332,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -349,7 +349,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -368,7 +368,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
await completion_stream.response.aclose()
@@ -383,7 +383,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
frequency_penalty=-2,
function_call="none",
@@ -445,7 +445,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
@@ -462,7 +462,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
) as response:
assert not response.is_closed
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..915d5c6f63
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCheckpoints:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_list(self, client: OpenAI) -> None:
+ checkpoint = client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: OpenAI) -> None:
+ checkpoint = client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="string",
+ limit=0,
+ )
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: OpenAI) -> None:
+ response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ checkpoint = response.parse()
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: OpenAI) -> None:
+ with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ checkpoint = response.parse()
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_list(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+ client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "",
+ )
+
+
+class TestAsyncCheckpoints:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+ checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="string",
+ limit=0,
+ )
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ checkpoint = response.parse()
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ checkpoint = await response.parse()
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+ await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "",
+ )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index f4974ebbcd..1ff6d63b31 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -39,6 +39,36 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
"learning_rate_multiplier": "auto",
"n_epochs": "auto",
},
+ integrations=[
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ ],
+ seed=42,
suffix="x",
validation_file="file-abc123",
)
@@ -248,6 +278,36 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
"learning_rate_multiplier": "auto",
"n_epochs": "auto",
},
+ integrations=[
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ ],
+ seed=42,
suffix="x",
validation_file="file-abc123",
)