feat(api): add usage metadata when streaming (#1395)

openai · May 6, 2024 · 3cb064b · 3cb064b
1 parent 2a678e3
commit 3cb064b
Show file tree

Hide file tree

Showing 11 changed files with 97 additions and 3 deletions.
diff --git a/.stats.yml b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-97c9a5f089049dc9eb5cee9475558049003e37e42202cab39e59d75e08b4c613.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-edb5af3ade0cd27cf366b0654b90c7a81c43c433e11fc3f6e621e2c779de10d4.yml
diff --git a/api.md b/api.md
@@ -43,6 +43,7 @@ from openai.types.chat import (
     ChatCompletionMessageToolCall,
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
+    ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
     ChatCompletionTokenLogprob,
     ChatCompletionTool,

diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
@@ -27,6 +27,7 @@
 from ...types.chat.chat_completion_chunk import ChatCompletionChunk
 from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 
 __all__ = ["Completions", "AsyncCompletions"]
@@ -59,6 +60,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -165,6 +167,8 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -227,6 +231,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -333,6 +338,8 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -395,6 +402,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -501,6 +509,8 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -563,6 +573,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -594,6 +605,7 @@ def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,
@@ -639,6 +651,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -745,6 +758,8 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -807,6 +822,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -913,6 +929,8 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -975,6 +993,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -1081,6 +1100,8 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -1143,6 +1164,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
@@ -1174,6 +1196,7 @@ async def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,

diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
@@ -23,6 +23,7 @@
     make_request_options,
 )
 from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -53,6 +54,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -156,6 +158,8 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -203,6 +207,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -306,6 +311,8 @@ def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -353,6 +360,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -456,6 +464,8 @@ def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -503,6 +513,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -531,6 +542,7 @@ def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
@@ -573,6 +585,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -676,6 +689,8 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -723,6 +738,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -826,6 +842,8 @@ async def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -873,6 +891,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -976,6 +995,8 @@ async def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
               This parameter is only supported for `gpt-3.5-turbo-instruct`.
@@ -1023,6 +1044,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1051,6 +1073,7 @@ async def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,

diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
@@ -14,6 +14,7 @@
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam as ChatCompletionStreamOptionsParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import (
     ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,

diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = [
@@ -105,7 +106,8 @@ class ChatCompletionChunk(BaseModel):
     choices: List[Choice]
     """A list of chat completion choices.
 
-    Can be more than one if `n` is greater than 1.
+    Can contain more than one elements if `n` is greater than 1. Can also be empty
+    for the last chunk if you set `stream_options: {"include_usage": true}`.
     """
 
     created: int
@@ -126,3 +128,11 @@ class ChatCompletionChunk(BaseModel):
     Can be used in conjunction with the `seed` request parameter to understand when
     backend changes have been made that might impact determinism.
     """
+
+    usage: Optional[CompletionUsage] = None
+    """
+    An optional field that will only be present when you set
+    `stream_options: {"include_usage": true}` in your request. When present, it
+    contains a null value except for the last chunk which contains the token usage
+    statistics for the entire request.
+    """
diff --git a/src/openai/types/chat/chat_completion_stream_options_param.py b/src/openai/types/chat/chat_completion_stream_options_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatCompletionStreamOptionsParam"]
+
+
+class ChatCompletionStreamOptionsParam(TypedDict, total=False):
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array. All other chunks
+    will also include a `usage` field, but with a null value.
+    """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
@@ -9,6 +9,7 @@
 from ..chat_model import ChatModel
 from .chat_completion_tool_param import ChatCompletionToolParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
 
@@ -141,6 +142,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     stop: Union[Optional[str], List[str]]
     """Up to 4 sequences where the API will stop generating further tokens."""
 
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.