From 17a623f70050bca4538ad2939055cd9d9b165f89 Mon Sep 17 00:00:00 2001 From: Stainless Bot Date: Thu, 17 Oct 2024 16:52:53 +0000 Subject: [PATCH 1/2] feat(api): add gpt-4o-audio-preview model for chat completions (#1135) This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio --- .stats.yml | 2 +- api.md | 4 + src/index.ts | 4 + src/lib/AbstractChatCompletionRunner.ts | 4 +- src/resources/beta/assistants.ts | 10 ++ src/resources/chat/chat.ts | 7 + src/resources/chat/completions.ts | 153 ++++++++++++++++++- src/resources/chat/index.ts | 4 + tests/api-resources/chat/completions.test.ts | 2 + 9 files changed, 183 insertions(+), 7 deletions(-) diff --git a/.stats.yml b/.stats.yml index 68789976b..984e8a8d5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 68 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-71e58a77027c67e003fdd1b1ac8ac11557d8bfabc7666d1a827c6b1ca8ab98b5.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml diff --git a/api.md b/api.md index 71027acfd..da60f65bd 100644 --- a/api.md +++ b/api.md @@ -33,9 +33,12 @@ Types: - ChatCompletion - ChatCompletionAssistantMessageParam +- ChatCompletionAudio +- ChatCompletionAudioParam - ChatCompletionChunk - ChatCompletionContentPart - ChatCompletionContentPartImage +- ChatCompletionContentPartInputAudio - ChatCompletionContentPartRefusal - ChatCompletionContentPartText - ChatCompletionFunctionCallOption @@ -43,6 +46,7 @@ Types: - ChatCompletionMessage - ChatCompletionMessageParam - ChatCompletionMessageToolCall +- ChatCompletionModality - ChatCompletionNamedToolChoice - ChatCompletionRole - ChatCompletionStreamOptions diff --git a/src/index.ts b/src/index.ts index d3e1d2a78..56108223a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -250,9 +250,12 @@ export namespace OpenAI { export import ChatModel = API.ChatModel; export import ChatCompletion = API.ChatCompletion; export import ChatCompletionAssistantMessageParam = API.ChatCompletionAssistantMessageParam; + export import ChatCompletionAudio = API.ChatCompletionAudio; + export import ChatCompletionAudioParam = API.ChatCompletionAudioParam; export import ChatCompletionChunk = API.ChatCompletionChunk; export import ChatCompletionContentPart = API.ChatCompletionContentPart; export import ChatCompletionContentPartImage = API.ChatCompletionContentPartImage; + export import ChatCompletionContentPartInputAudio = API.ChatCompletionContentPartInputAudio; export import ChatCompletionContentPartRefusal = API.ChatCompletionContentPartRefusal; export import ChatCompletionContentPartText = API.ChatCompletionContentPartText; export import ChatCompletionFunctionCallOption = API.ChatCompletionFunctionCallOption; @@ -260,6 +263,7 @@ export namespace OpenAI { export import ChatCompletionMessage = API.ChatCompletionMessage; export import ChatCompletionMessageParam = API.ChatCompletionMessageParam; export import ChatCompletionMessageToolCall = API.ChatCompletionMessageToolCall; + export import ChatCompletionModality = API.ChatCompletionModality; export import ChatCompletionNamedToolChoice = API.ChatCompletionNamedToolChoice; export import ChatCompletionRole = API.ChatCompletionRole; export import ChatCompletionStreamOptions = API.ChatCompletionStreamOptions; diff --git a/src/lib/AbstractChatCompletionRunner.ts b/src/lib/AbstractChatCompletionRunner.ts index 39ee4e993..e943a4e4f 100644 --- a/src/lib/AbstractChatCompletionRunner.ts +++ b/src/lib/AbstractChatCompletionRunner.ts @@ -105,7 +105,9 @@ export class AbstractChatCompletionRunner< const message = this.messages[i]; if (isAssistantMessage(message)) { const { function_call, ...rest } = message; - const ret: ChatCompletionMessage = { + + // TODO: support audio here + const ret: Omit = { ...rest, content: (message as ChatCompletionMessage).content ?? null, refusal: (message as ChatCompletionMessage).refusal ?? null, diff --git a/src/resources/beta/assistants.ts b/src/resources/beta/assistants.ts index 410d520b0..aa7362297 100644 --- a/src/resources/beta/assistants.ts +++ b/src/resources/beta/assistants.ts @@ -298,6 +298,11 @@ export namespace AssistantStreamEvent { data: ThreadsAPI.Thread; event: 'thread.created'; + + /** + * Whether to enable input audio transcription. + */ + enabled?: boolean; } /** @@ -1084,6 +1089,11 @@ export interface ThreadStreamEvent { data: ThreadsAPI.Thread; event: 'thread.created'; + + /** + * Whether to enable input audio transcription. + */ + enabled?: boolean; } export interface AssistantCreateParams { diff --git a/src/resources/chat/chat.ts b/src/resources/chat/chat.ts index 5bc7de955..43ef5662c 100644 --- a/src/resources/chat/chat.ts +++ b/src/resources/chat/chat.ts @@ -16,7 +16,10 @@ export type ChatModel = | 'gpt-4o' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-05-13' + | 'gpt-4o-realtime-preview' | 'gpt-4o-realtime-preview-2024-10-01' + | 'gpt-4o-audio-preview' + | 'gpt-4o-audio-preview-2024-10-01' | 'chatgpt-4o-latest' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' @@ -45,9 +48,12 @@ export namespace Chat { export import Completions = CompletionsAPI.Completions; export import ChatCompletion = CompletionsAPI.ChatCompletion; export import ChatCompletionAssistantMessageParam = CompletionsAPI.ChatCompletionAssistantMessageParam; + export import ChatCompletionAudio = CompletionsAPI.ChatCompletionAudio; + export import ChatCompletionAudioParam = CompletionsAPI.ChatCompletionAudioParam; export import ChatCompletionChunk = CompletionsAPI.ChatCompletionChunk; export import ChatCompletionContentPart = CompletionsAPI.ChatCompletionContentPart; export import ChatCompletionContentPartImage = CompletionsAPI.ChatCompletionContentPartImage; + export import ChatCompletionContentPartInputAudio = CompletionsAPI.ChatCompletionContentPartInputAudio; export import ChatCompletionContentPartRefusal = CompletionsAPI.ChatCompletionContentPartRefusal; export import ChatCompletionContentPartText = CompletionsAPI.ChatCompletionContentPartText; export import ChatCompletionFunctionCallOption = CompletionsAPI.ChatCompletionFunctionCallOption; @@ -55,6 +61,7 @@ export namespace Chat { export import ChatCompletionMessage = CompletionsAPI.ChatCompletionMessage; export import ChatCompletionMessageParam = CompletionsAPI.ChatCompletionMessageParam; export import ChatCompletionMessageToolCall = CompletionsAPI.ChatCompletionMessageToolCall; + export import ChatCompletionModality = CompletionsAPI.ChatCompletionModality; export import ChatCompletionNamedToolChoice = CompletionsAPI.ChatCompletionNamedToolChoice; export import ChatCompletionRole = CompletionsAPI.ChatCompletionRole; export import ChatCompletionStreamOptions = CompletionsAPI.ChatCompletionStreamOptions; diff --git a/src/resources/chat/completions.ts b/src/resources/chat/completions.ts index 27aebdc4c..97174ec1b 100644 --- a/src/resources/chat/completions.ts +++ b/src/resources/chat/completions.ts @@ -11,7 +11,10 @@ import { Stream } from '../../streaming'; export class Completions extends APIResource { /** - * Creates a model response for the given chat conversation. + * Creates a model response for the given chat conversation. Learn more in the + * [text generation](https://platform.openai.com/docs/guides/text-generation), + * [vision](https://platform.openai.com/docs/guides/vision), and + * [audio](https://platform.openai.com/docs/guides/audio) guides. */ create( body: ChatCompletionCreateParamsNonStreaming, @@ -138,6 +141,12 @@ export interface ChatCompletionAssistantMessageParam { */ role: 'assistant'; + /** + * Data about a previous audio response from the model. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ + audio?: ChatCompletionAssistantMessageParam.Audio | null; + /** * The contents of the assistant message. Required unless `tool_calls` or * `function_call` is specified. @@ -168,6 +177,17 @@ export interface ChatCompletionAssistantMessageParam { } export namespace ChatCompletionAssistantMessageParam { + /** + * Data about a previous audio response from the model. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ + export interface Audio { + /** + * Unique identifier for a previous audio response from the model. + */ + id: string; + } + /** * @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of * a function that should be called, as generated by the model. @@ -188,6 +208,54 @@ export namespace ChatCompletionAssistantMessageParam { } } +/** + * If the audio output modality is requested, this object contains data about the + * audio response from the model. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ +export interface ChatCompletionAudio { + /** + * Unique identifier for this audio response. + */ + id: string; + + /** + * Base64 encoded audio bytes generated by the model, in the format specified in + * the request. + */ + data: string; + + /** + * The Unix timestamp (in seconds) for when this audio response will no longer be + * accessible on the server for use in multi-turn conversations. + */ + expires_at: number; + + /** + * Transcript of the audio generated by the model. + */ + transcript: string; +} + +/** + * Parameters for audio output. Required when audio output is requested with + * `modalities: ["audio"]`. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ +export interface ChatCompletionAudioParam { + /** + * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, + * or `pcm16`. + */ + format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16'; + + /** + * Specifies the voice type. Supported voices are `alloy`, `echo`, `fable`, `onyx`, + * `nova`, and `shimmer`. + */ + voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer'; +} + /** * Represents a streamed chunk of a chat completion response returned by model, * based on the provided input. @@ -371,8 +439,18 @@ export namespace ChatCompletionChunk { } } -export type ChatCompletionContentPart = ChatCompletionContentPartText | ChatCompletionContentPartImage; +/** + * Learn about + * [text inputs](https://platform.openai.com/docs/guides/text-generation). + */ +export type ChatCompletionContentPart = + | ChatCompletionContentPartText + | ChatCompletionContentPartImage + | ChatCompletionContentPartInputAudio; +/** + * Learn about [image inputs](https://platform.openai.com/docs/guides/vision). + */ export interface ChatCompletionContentPartImage { image_url: ChatCompletionContentPartImage.ImageURL; @@ -397,6 +475,32 @@ export namespace ChatCompletionContentPartImage { } } +/** + * Learn about [audio inputs](https://platform.openai.com/docs/guides/audio). + */ +export interface ChatCompletionContentPartInputAudio { + input_audio: ChatCompletionContentPartInputAudio.InputAudio; + + /** + * The type of the content part. Always `input_audio`. + */ + type: 'input_audio'; +} + +export namespace ChatCompletionContentPartInputAudio { + export interface InputAudio { + /** + * Base64 encoded audio data. + */ + data: string; + + /** + * The format of the encoded audio data. Currently supports "wav" and "mp3". + */ + format: 'wav' | 'mp3'; + } +} + export interface ChatCompletionContentPartRefusal { /** * The refusal message generated by the model. @@ -409,6 +513,10 @@ export interface ChatCompletionContentPartRefusal { type: 'refusal'; } +/** + * Learn about + * [text inputs](https://platform.openai.com/docs/guides/text-generation). + */ export interface ChatCompletionContentPartText { /** * The text content. @@ -471,6 +579,13 @@ export interface ChatCompletionMessage { */ role: 'assistant'; + /** + * If the audio output modality is requested, this object contains data about the + * audio response from the model. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ + audio?: ChatCompletionAudio | null; + /** * @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of * a function that should be called, as generated by the model. @@ -548,6 +663,8 @@ export namespace ChatCompletionMessageToolCall { } } +export type ChatCompletionModality = 'text' | 'audio'; + /** * Specifies a tool the model should use. Use to force the model to call a specific * function. @@ -743,6 +860,13 @@ export interface ChatCompletionCreateParamsBase { */ model: (string & {}) | ChatAPI.ChatModel; + /** + * Parameters for audio output. Required when audio output is requested with + * `modalities: ["audio"]`. + * [Learn more](https://platform.openai.com/docs/guides/audio). + */ + audio?: ChatCompletionAudioParam | null; + /** * Number between -2.0 and 2.0. Positive values penalize new tokens based on their * existing frequency in the text so far, decreasing the model's likelihood to @@ -812,10 +936,24 @@ export interface ChatCompletionCreateParamsBase { /** * Developer-defined tags and values used for filtering completions in the - * [dashboard](https://platform.openai.com/completions). + * [dashboard](https://platform.openai.com/chat-completions). */ metadata?: Record | null; + /** + * Output types that you would like the model to generate for this request. Most + * models are capable of generating text, which is the default: + * + * `["text"]` + * + * The `gpt-4o-audio-preview` model can also be used to + * [generate audio](https://platform.openai.com/docs/guides/audio). To request that + * this model generate both text and audio responses, you can use: + * + * `["text", "audio"]` + */ + modalities?: Array | null; + /** * How many chat completion choices to generate for each input message. Note that * you will be charged based on the number of generated tokens across all of the @@ -900,8 +1038,9 @@ export interface ChatCompletionCreateParamsBase { stop?: string | null | Array; /** - * Whether or not to store the output of this completion request for traffic - * logging in the [dashboard](https://platform.openai.com/completions). + * Whether or not to store the output of this chat completion request for use in + * our [model distillation](https://platform.openai.com/docs/guides/distillation) + * or [evals](https://platform.openai.com/docs/guides/evals) products. */ store?: boolean | null; @@ -1049,9 +1188,12 @@ export type CompletionCreateParamsStreaming = ChatCompletionCreateParamsStreamin export namespace Completions { export import ChatCompletion = ChatCompletionsAPI.ChatCompletion; export import ChatCompletionAssistantMessageParam = ChatCompletionsAPI.ChatCompletionAssistantMessageParam; + export import ChatCompletionAudio = ChatCompletionsAPI.ChatCompletionAudio; + export import ChatCompletionAudioParam = ChatCompletionsAPI.ChatCompletionAudioParam; export import ChatCompletionChunk = ChatCompletionsAPI.ChatCompletionChunk; export import ChatCompletionContentPart = ChatCompletionsAPI.ChatCompletionContentPart; export import ChatCompletionContentPartImage = ChatCompletionsAPI.ChatCompletionContentPartImage; + export import ChatCompletionContentPartInputAudio = ChatCompletionsAPI.ChatCompletionContentPartInputAudio; export import ChatCompletionContentPartRefusal = ChatCompletionsAPI.ChatCompletionContentPartRefusal; export import ChatCompletionContentPartText = ChatCompletionsAPI.ChatCompletionContentPartText; export import ChatCompletionFunctionCallOption = ChatCompletionsAPI.ChatCompletionFunctionCallOption; @@ -1059,6 +1201,7 @@ export namespace Completions { export import ChatCompletionMessage = ChatCompletionsAPI.ChatCompletionMessage; export import ChatCompletionMessageParam = ChatCompletionsAPI.ChatCompletionMessageParam; export import ChatCompletionMessageToolCall = ChatCompletionsAPI.ChatCompletionMessageToolCall; + export import ChatCompletionModality = ChatCompletionsAPI.ChatCompletionModality; export import ChatCompletionNamedToolChoice = ChatCompletionsAPI.ChatCompletionNamedToolChoice; export import ChatCompletionRole = ChatCompletionsAPI.ChatCompletionRole; export import ChatCompletionStreamOptions = ChatCompletionsAPI.ChatCompletionStreamOptions; diff --git a/src/resources/chat/index.ts b/src/resources/chat/index.ts index 748770948..22803e819 100644 --- a/src/resources/chat/index.ts +++ b/src/resources/chat/index.ts @@ -3,9 +3,12 @@ export { ChatCompletion, ChatCompletionAssistantMessageParam, + ChatCompletionAudio, + ChatCompletionAudioParam, ChatCompletionChunk, ChatCompletionContentPart, ChatCompletionContentPartImage, + ChatCompletionContentPartInputAudio, ChatCompletionContentPartRefusal, ChatCompletionContentPartText, ChatCompletionFunctionCallOption, @@ -13,6 +16,7 @@ export { ChatCompletionMessage, ChatCompletionMessageParam, ChatCompletionMessageToolCall, + ChatCompletionModality, ChatCompletionNamedToolChoice, ChatCompletionRole, ChatCompletionStreamOptions, diff --git a/tests/api-resources/chat/completions.test.ts b/tests/api-resources/chat/completions.test.ts index 4f015b47e..77d4a251c 100644 --- a/tests/api-resources/chat/completions.test.ts +++ b/tests/api-resources/chat/completions.test.ts @@ -27,6 +27,7 @@ describe('resource completions', () => { const response = await client.chat.completions.create({ messages: [{ content: 'string', role: 'system', name: 'name' }], model: 'gpt-4o', + audio: { format: 'wav', voice: 'alloy' }, frequency_penalty: -2, function_call: 'none', functions: [{ name: 'name', description: 'description', parameters: { foo: 'bar' } }], @@ -35,6 +36,7 @@ describe('resource completions', () => { max_completion_tokens: 0, max_tokens: 0, metadata: { foo: 'string' }, + modalities: ['text', 'audio'], n: 1, parallel_tool_calls: true, presence_penalty: -2, From 59d1e555c87c245d067cefab4ae99755a41b15a2 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:19:37 +0000 Subject: [PATCH 2/2] release: 4.68.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ README.md | 2 +- package.json | 2 +- scripts/build-deno | 2 +- src/version.ts | 2 +- 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index e8c54ecee..91b39801d 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "4.67.3" + ".": "4.68.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 710d09ca9..2fcd3be4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 4.68.0 (2024-10-17) + +Full Changelog: [v4.67.3...v4.68.0](https://github.com/openai/openai-node/compare/v4.67.3...v4.68.0) + +### Features + +* **api:** add gpt-4o-audio-preview model for chat completions ([#1135](https://github.com/openai/openai-node/issues/1135)) ([17a623f](https://github.com/openai/openai-node/commit/17a623f70050bca4538ad2939055cd9d9b165f89)) + ## 4.67.3 (2024-10-08) Full Changelog: [v4.67.2...v4.67.3](https://github.com/openai/openai-node/compare/v4.67.2...v4.67.3) diff --git a/README.md b/README.md index 407933634..bbfc821d2 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ You can import in Deno via: ```ts -import OpenAI from 'https://deno.land/x/openai@v4.67.3/mod.ts'; +import OpenAI from 'https://deno.land/x/openai@v4.68.0/mod.ts'; ``` diff --git a/package.json b/package.json index e20c1b9c1..807c79098 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openai", - "version": "4.67.3", + "version": "4.68.0", "description": "The official TypeScript library for the OpenAI API", "author": "OpenAI ", "types": "dist/index.d.ts", diff --git a/scripts/build-deno b/scripts/build-deno index f59404dbc..5e813aeb2 100755 --- a/scripts/build-deno +++ b/scripts/build-deno @@ -16,7 +16,7 @@ This is a build produced from https://github.com/openai/openai-node – please g Usage: \`\`\`ts -import OpenAI from "https://deno.land/x/openai@v4.67.3/mod.ts"; +import OpenAI from "https://deno.land/x/openai@v4.68.0/mod.ts"; const client = new OpenAI(); \`\`\` diff --git a/src/version.ts b/src/version.ts index 174c31111..12aaa52bb 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '4.67.3'; // x-release-please-version +export const VERSION = '4.68.0'; // x-release-please-version