feat(api): add token logprobs to chat completions (#980)

openai · Dec 17, 2023 · f50e962 · f50e962
1 parent 215476a
commit f50e962
Show file tree

Hide file tree

Showing 14 changed files with 255 additions and 61 deletions.
diff --git a/api.md b/api.md
@@ -38,6 +38,7 @@ from openai.types.chat import (
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
     ChatCompletionSystemMessageParam,
+    ChatCompletionTokenLogprob,
     ChatCompletionTool,
     ChatCompletionToolChoiceOption,
     ChatCompletionToolMessageParam,

diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
@@ -119,14 +119,15 @@ def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -288,14 +289,15 @@ def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -450,14 +452,15 @@ def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -687,14 +690,15 @@ async def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -856,14 +860,15 @@ async def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -1018,14 +1023,15 @@ async def create(
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.

diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
@@ -51,7 +51,8 @@ def create(
         The size of all the
         files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files can be a maximum of 512 MB. See the
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.
@@ -314,7 +315,8 @@ async def create(
         The size of all the
         files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files can be a maximum of 512 MB. See the
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.

diff --git a/src/openai/types/beta/threads/runs/message_creation_step_details.py b/src/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -16,4 +16,4 @@ class MessageCreationStepDetails(BaseModel):
     message_creation: MessageCreation
 
     type: Literal["message_creation"]
-    """Always `message_creation``."""
+    """Always `message_creation`."""
diff --git a/src/openai/types/beta/threads/runs/run_step.py b/src/openai/types/beta/threads/runs/run_step.py
@@ -66,7 +66,7 @@ class RunStep(BaseModel):
     """
 
     object: Literal["thread.run.step"]
-    """The object type, which is always `thread.run.step``."""
+    """The object type, which is always `thread.run.step`."""
 
     run_id: str
     """

diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
@@ -13,6 +13,9 @@
 from .chat_completion_message_param import (
     ChatCompletionMessageParam as ChatCompletionMessageParam,
 )
+from .chat_completion_token_logprob import (
+    ChatCompletionTokenLogprob as ChatCompletionTokenLogprob,
+)
 from .chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall as ChatCompletionMessageToolCall,
 )

diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
@@ -6,8 +6,14 @@
 from ..._models import BaseModel
 from ..completion_usage import CompletionUsage
 from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
-__all__ = ["ChatCompletion", "Choice"]
+__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]]
+    """A list of message content tokens with log probability information."""
 
 
 class Choice(BaseModel):
@@ -24,6 +30,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs]
+    """Log probability information for the choice."""
+
     message: ChatCompletionMessage
     """A chat completion message generated by the model."""
 

diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = [
     "ChatCompletionChunk",
@@ -12,6 +13,7 @@
     "ChoiceDeltaFunctionCall",
     "ChoiceDeltaToolCall",
     "ChoiceDeltaToolCallFunction",
+    "ChoiceLogprobs",
 ]
 
 
@@ -70,6 +72,11 @@ class ChoiceDelta(BaseModel):
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
 
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]]
+    """A list of message content tokens with log probability information."""
+
+
 class Choice(BaseModel):
     delta: ChoiceDelta
     """A chat completion delta generated by streamed model responses."""
@@ -87,6 +94,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
 
 class ChatCompletionChunk(BaseModel):
     id: str

diff --git a/src/openai/types/chat/chat_completion_function_message_param.py b/src/openai/types/chat/chat_completion_function_message_param.py
@@ -2,13 +2,14 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ChatCompletionFunctionMessageParam"]
 
 
 class ChatCompletionFunctionMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Optional[str]]
     """The contents of the function message."""
 
     name: Required[str]

diff --git a/src/openai/types/chat/chat_completion_token_logprob.py b/src/openai/types/chat/chat_completion_token_logprob.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
+
+
+class TopLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]]
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token."""
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]]
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token."""
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
@@ -78,7 +78,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     particular function via `{"name": "my_function"}` forces the model to call that
     function.
 
-    `none` is the default when no functions are present. `auto`` is the default if
+    `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
 
@@ -99,8 +99,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     or exclusive selection of the relevant token.
     """
 
+    logprobs: Optional[bool]
+    """Whether to return log probabilities of the output tokens or not.
+
+    If true, returns the log probabilities of each output token returned in the
+    `content` of `message`. This option is currently not available on the
+    `gpt-4-vision-preview` model.
+    """
+
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the chat
+    completion.
 
     The total length of input tokens and generated tokens is limited by the model's
     context length.
@@ -127,6 +137,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
+    Compatible with `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
 
@@ -180,6 +192,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     functions the model may generate JSON inputs for.
     """
 
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 5 specifying the number of most likely tokens to return
+    at each token position, each with an associated log probability. `logprobs` must
+    be set to `true` if this parameter is used.
+    """
+
     top_p: Optional[float]
     """
     An alternative to sampling with temperature, called nucleus sampling, where the

diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
@@ -88,16 +88,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     logprobs: Optional[int]
     """
-    Include the log probabilities on the `logprobs` most likely tokens, as well the
-    chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-    the 5 most likely tokens. The API will always return the `logprob` of the
-    sampled token, so there may be up to `logprobs+1` elements in the response.
+    Include the log probabilities on the `logprobs` most likely output tokens, as
+    well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+    list of the 5 most likely tokens. The API will always return the `logprob` of
+    the sampled token, so there may be up to `logprobs+1` elements in the response.
 
     The maximum value for `logprobs` is 5.
     """
 
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the
+    completion.
 
     The token count of your prompt plus `max_tokens` cannot exceed the model's
     context length.