From ef86f9510046799d3b0577ad1ea69755c55992cd Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:09:01 +0000 Subject: [PATCH] feat(api): new models for TTS, STT, + new audio features for Realtime --- .stats.yml | 2 +- lib/openai.rb | 4 + .../models/audio/speech_create_params.rb | 20 +++- lib/openai/models/audio/speech_model.rb | 1 + lib/openai/models/audio/transcription.rb | 56 +++++++++- .../audio/transcription_create_params.rb | 31 +++++- .../models/audio/transcription_include.rb | 14 +++ .../audio/transcription_stream_event.rb | 29 +++++ .../audio/transcription_text_delta_event.rb | 88 +++++++++++++++ .../audio/transcription_text_done_event.rb | 89 +++++++++++++++ .../models/audio/translation_create_params.rb | 22 +++- lib/openai/models/audio_model.rb | 2 + lib/openai/models/audio_response_format.rb | 3 +- .../models/chat/chat_completion_chunk.rb | 10 +- lib/openai/resources/audio/speech.rb | 5 +- lib/openai/resources/audio/transcriptions.rb | 81 +++++++++++++- lib/openai/resources/audio/translations.rb | 2 +- .../models/audio/speech_create_params.rbi | 18 ++- rbi/lib/openai/models/audio/speech_model.rbi | 1 + rbi/lib/openai/models/audio/transcription.rbi | 60 +++++++++- .../audio/transcription_create_params.rbi | 29 ++++- .../models/audio/transcription_include.rbi | 15 +++ .../audio/transcription_stream_event.rbi | 25 +++++ .../audio/transcription_text_delta_event.rbi | 102 +++++++++++++++++ .../audio/transcription_text_done_event.rbi | 103 ++++++++++++++++++ .../audio/translation_create_params.rbi | 14 +++ rbi/lib/openai/models/audio_model.rbi | 2 + .../openai/models/audio_response_format.rbi | 3 +- .../models/chat/chat_completion_chunk.rbi | 6 +- rbi/lib/openai/resources/audio/speech.rbi | 6 +- .../openai/resources/audio/transcriptions.rbi | 88 ++++++++++++++- .../models/audio/speech_create_params.rbs | 6 + sig/openai/models/audio/speech_model.rbs | 3 +- sig/openai/models/audio/transcription.rbs | 41 ++++++- .../audio/transcription_create_params.rbs | 8 ++ .../models/audio/transcription_include.rbs | 13 +++ .../audio/transcription_stream_event.rbs | 13 +++ .../audio/transcription_text_delta_event.rbs | 56 ++++++++++ .../audio/transcription_text_done_event.rbs | 56 ++++++++++ .../audio/translation_create_params.rbs | 22 +++- sig/openai/models/audio_model.rbs | 5 +- .../models/chat/chat_completion_chunk.rbs | 10 +- sig/openai/resources/audio/speech.rbs | 1 + sig/openai/resources/audio/transcriptions.rbs | 13 +++ sig/openai/resources/audio/translations.rbs | 2 +- 45 files changed, 1115 insertions(+), 65 deletions(-) create mode 100644 lib/openai/models/audio/transcription_include.rb create mode 100644 lib/openai/models/audio/transcription_stream_event.rb create mode 100644 lib/openai/models/audio/transcription_text_delta_event.rb create mode 100644 lib/openai/models/audio/transcription_text_done_event.rb create mode 100644 rbi/lib/openai/models/audio/transcription_include.rbi create mode 100644 rbi/lib/openai/models/audio/transcription_stream_event.rbi create mode 100644 rbi/lib/openai/models/audio/transcription_text_delta_event.rbi create mode 100644 rbi/lib/openai/models/audio/transcription_text_done_event.rbi create mode 100644 sig/openai/models/audio/transcription_include.rbs create mode 100644 sig/openai/models/audio/transcription_stream_event.rbs create mode 100644 sig/openai/models/audio/transcription_text_delta_event.rbs create mode 100644 sig/openai/models/audio/transcription_text_done_event.rbs diff --git a/.stats.yml b/.stats.yml index 16c6386b..199d46be 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 80 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c22f59c66aec7914b6ee653d3098d1c1c8c16c180d2a158e819c8ddbf476f74b.yml diff --git a/lib/openai.rb b/lib/openai.rb index 84c5d333..3a9e6cd2 100644 --- a/lib/openai.rb +++ b/lib/openai.rb @@ -43,7 +43,11 @@ require_relative "openai/models/audio/transcription" require_relative "openai/models/audio/transcription_create_params" require_relative "openai/models/audio/transcription_create_response" +require_relative "openai/models/audio/transcription_include" require_relative "openai/models/audio/transcription_segment" +require_relative "openai/models/audio/transcription_stream_event" +require_relative "openai/models/audio/transcription_text_delta_event" +require_relative "openai/models/audio/transcription_text_done_event" require_relative "openai/models/audio/transcription_verbose" require_relative "openai/models/audio/transcription_word" require_relative "openai/models/audio/translation" diff --git a/lib/openai/models/audio/speech_create_params.rb b/lib/openai/models/audio/speech_create_params.rb index b7e77b57..2477a4ca 100644 --- a/lib/openai/models/audio/speech_create_params.rb +++ b/lib/openai/models/audio/speech_create_params.rb @@ -16,7 +16,7 @@ class SpeechCreateParams < OpenAI::BaseModel # @!attribute model # One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. # # @return [String, Symbol, OpenAI::Models::Audio::SpeechModel] required :model, union: -> { OpenAI::Models::Audio::SpeechCreateParams::Model } @@ -30,6 +30,17 @@ class SpeechCreateParams < OpenAI::BaseModel # @return [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] required :voice, enum: -> { OpenAI::Models::Audio::SpeechCreateParams::Voice } + # @!attribute [r] instructions + # Control the voice of your generated audio with additional instructions. Does not + # work with `tts-1` or `tts-1-hd`. + # + # @return [String, nil] + optional :instructions, String + + # @!parse + # # @return [String] + # attr_writer :instructions + # @!attribute [r] response_format # The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, # `wav`, and `pcm`. @@ -56,22 +67,23 @@ class SpeechCreateParams < OpenAI::BaseModel # # @param input [String] # # @param model [String, Symbol, OpenAI::Models::Audio::SpeechModel] # # @param voice [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] + # # @param instructions [String] # # @param response_format [Symbol, OpenAI::Models::Audio::SpeechCreateParams::ResponseFormat] # # @param speed [Float] # # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}] # # - # def initialize(input:, model:, voice:, response_format: nil, speed: nil, request_options: {}, **) = super + # def initialize(input:, model:, voice:, instructions: nil, response_format: nil, speed: nil, request_options: {}, **) = super # def initialize: (Hash | OpenAI::BaseModel) -> void # @abstract # # One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. class Model < OpenAI::Union variant String - # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1` or `tts-1-hd` + # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. variant enum: -> { OpenAI::Models::Audio::SpeechModel } # @!parse diff --git a/lib/openai/models/audio/speech_model.rb b/lib/openai/models/audio/speech_model.rb index 96744e0c..26aae9d6 100644 --- a/lib/openai/models/audio/speech_model.rb +++ b/lib/openai/models/audio/speech_model.rb @@ -7,6 +7,7 @@ module Audio class SpeechModel < OpenAI::Enum TTS_1 = :"tts-1" TTS_1_HD = :"tts-1-hd" + GPT_4O_MINI_TTS = :"gpt-4o-mini-tts" finalize! end diff --git a/lib/openai/models/audio/transcription.rb b/lib/openai/models/audio/transcription.rb index 6bd2d97b..8185ea6c 100644 --- a/lib/openai/models/audio/transcription.rb +++ b/lib/openai/models/audio/transcription.rb @@ -10,15 +10,69 @@ class Transcription < OpenAI::BaseModel # @return [String] required :text, String + # @!attribute [r] logprobs + # The log probabilities of the tokens in the transcription. Only returned with the + # models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added + # to the `include` array. + # + # @return [Array, nil] + optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::Transcription::Logprob] } + + # @!parse + # # @return [Array] + # attr_writer :logprobs + # @!parse # # Represents a transcription response returned by model, based on the provided # # input. # # # # @param text [String] + # # @param logprobs [Array] # # - # def initialize(text:, **) = super + # def initialize(text:, logprobs: nil, **) = super # def initialize: (Hash | OpenAI::BaseModel) -> void + + class Logprob < OpenAI::BaseModel + # @!attribute [r] token + # The token in the transcription. + # + # @return [String, nil] + optional :token, String + + # @!parse + # # @return [String] + # attr_writer :token + + # @!attribute [r] bytes + # The bytes of the token. + # + # @return [Array, nil] + optional :bytes, OpenAI::ArrayOf[Float] + + # @!parse + # # @return [Array] + # attr_writer :bytes + + # @!attribute [r] logprob + # The log probability of the token. + # + # @return [Float, nil] + optional :logprob, Float + + # @!parse + # # @return [Float] + # attr_writer :logprob + + # @!parse + # # @param token [String] + # # @param bytes [Array] + # # @param logprob [Float] + # # + # def initialize(token: nil, bytes: nil, logprob: nil, **) = super + + # def initialize: (Hash | OpenAI::BaseModel) -> void + end end end end diff --git a/lib/openai/models/audio/transcription_create_params.rb b/lib/openai/models/audio/transcription_create_params.rb index d6d9f071..3ff8c770 100644 --- a/lib/openai/models/audio/transcription_create_params.rb +++ b/lib/openai/models/audio/transcription_create_params.rb @@ -16,12 +16,27 @@ class TranscriptionCreateParams < OpenAI::BaseModel required :file, IO # @!attribute model - # ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). # # @return [String, Symbol, OpenAI::Models::AudioModel] required :model, union: -> { OpenAI::Models::Audio::TranscriptionCreateParams::Model } + # @!attribute [r] include + # Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. + # + # @return [Array, nil] + optional :include, -> { OpenAI::ArrayOf[enum: OpenAI::Models::Audio::TranscriptionInclude] } + + # @!parse + # # @return [Array] + # attr_writer :include + # @!attribute [r] language # The language of the input audio. Supplying the input language in # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) @@ -49,7 +64,8 @@ class TranscriptionCreateParams < OpenAI::BaseModel # @!attribute [r] response_format # The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. # # @return [Symbol, OpenAI::Models::AudioResponseFormat, nil] optional :response_format, enum: -> { OpenAI::Models::AudioResponseFormat } @@ -90,6 +106,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel # @!parse # # @param file [IO, StringIO] # # @param model [String, Symbol, OpenAI::Models::AudioModel] + # # @param include [Array] # # @param language [String] # # @param prompt [String] # # @param response_format [Symbol, OpenAI::Models::AudioResponseFormat] @@ -100,6 +117,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel # def initialize( # file:, # model:, + # include: nil, # language: nil, # prompt: nil, # response_format: nil, @@ -115,12 +133,13 @@ class TranscriptionCreateParams < OpenAI::BaseModel # @abstract # - # ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). class Model < OpenAI::Union variant String - # ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source Whisper V2 model). variant enum: -> { OpenAI::Models::AudioModel } # @!parse diff --git a/lib/openai/models/audio/transcription_include.rb b/lib/openai/models/audio/transcription_include.rb new file mode 100644 index 00000000..97303675 --- /dev/null +++ b/lib/openai/models/audio/transcription_include.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Audio + # @abstract + class TranscriptionInclude < OpenAI::Enum + LOGPROBS = :logprobs + + finalize! + end + end + end +end diff --git a/lib/openai/models/audio/transcription_stream_event.rb b/lib/openai/models/audio/transcription_stream_event.rb new file mode 100644 index 00000000..4bddaa1b --- /dev/null +++ b/lib/openai/models/audio/transcription_stream_event.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Audio + # @abstract + # + # Emitted when there is an additional text delta. This is also the first event + # emitted when the transcription starts. Only emitted when you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `Stream` parameter set to `true`. + class TranscriptionStreamEvent < OpenAI::Union + discriminator :type + + # Emitted when there is an additional text delta. This is also the first event emitted when the transcription starts. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`. + variant :"transcript.text.delta", -> { OpenAI::Models::Audio::TranscriptionTextDeltaEvent } + + # Emitted when the transcription is complete. Contains the complete transcription text. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`. + variant :"transcript.text.done", -> { OpenAI::Models::Audio::TranscriptionTextDoneEvent } + + # @!parse + # class << self + # # @return [Array(OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent)] + # def variants; end + # end + end + end + end +end diff --git a/lib/openai/models/audio/transcription_text_delta_event.rb b/lib/openai/models/audio/transcription_text_delta_event.rb new file mode 100644 index 00000000..ec8ca4f4 --- /dev/null +++ b/lib/openai/models/audio/transcription_text_delta_event.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Audio + class TranscriptionTextDeltaEvent < OpenAI::BaseModel + # @!attribute delta + # The text delta that was additionally transcribed. + # + # @return [String] + required :delta, String + + # @!attribute type + # The type of the event. Always `transcript.text.delta`. + # + # @return [Symbol, :"transcript.text.delta"] + required :type, const: :"transcript.text.delta" + + # @!attribute [r] logprobs + # The log probabilities of the delta. Only included if you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `include[]` parameter set to `logprobs`. + # + # @return [Array, nil] + optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] } + + # @!parse + # # @return [Array] + # attr_writer :logprobs + + # @!parse + # # Emitted when there is an additional text delta. This is also the first event + # # emitted when the transcription starts. Only emitted when you + # # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # # with the `Stream` parameter set to `true`. + # # + # # @param delta [String] + # # @param logprobs [Array] + # # @param type [Symbol, :"transcript.text.delta"] + # # + # def initialize(delta:, logprobs: nil, type: :"transcript.text.delta", **) = super + + # def initialize: (Hash | OpenAI::BaseModel) -> void + + class Logprob < OpenAI::BaseModel + # @!attribute [r] token + # The token that was used to generate the log probability. + # + # @return [String, nil] + optional :token, String + + # @!parse + # # @return [String] + # attr_writer :token + + # @!attribute [r] bytes + # The bytes that were used to generate the log probability. + # + # @return [Array, nil] + optional :bytes, OpenAI::ArrayOf[OpenAI::Unknown] + + # @!parse + # # @return [Array] + # attr_writer :bytes + + # @!attribute [r] logprob + # The log probability of the token. + # + # @return [Float, nil] + optional :logprob, Float + + # @!parse + # # @return [Float] + # attr_writer :logprob + + # @!parse + # # @param token [String] + # # @param bytes [Array] + # # @param logprob [Float] + # # + # def initialize(token: nil, bytes: nil, logprob: nil, **) = super + + # def initialize: (Hash | OpenAI::BaseModel) -> void + end + end + end + end +end diff --git a/lib/openai/models/audio/transcription_text_done_event.rb b/lib/openai/models/audio/transcription_text_done_event.rb new file mode 100644 index 00000000..b2a78b25 --- /dev/null +++ b/lib/openai/models/audio/transcription_text_done_event.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Audio + class TranscriptionTextDoneEvent < OpenAI::BaseModel + # @!attribute text + # The text that was transcribed. + # + # @return [String] + required :text, String + + # @!attribute type + # The type of the event. Always `transcript.text.done`. + # + # @return [Symbol, :"transcript.text.done"] + required :type, const: :"transcript.text.done" + + # @!attribute [r] logprobs + # The log probabilities of the individual tokens in the transcription. Only + # included if you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `include[]` parameter set to `logprobs`. + # + # @return [Array, nil] + optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob] } + + # @!parse + # # @return [Array] + # attr_writer :logprobs + + # @!parse + # # Emitted when the transcription is complete. Contains the complete transcription + # # text. Only emitted when you + # # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # # with the `Stream` parameter set to `true`. + # # + # # @param text [String] + # # @param logprobs [Array] + # # @param type [Symbol, :"transcript.text.done"] + # # + # def initialize(text:, logprobs: nil, type: :"transcript.text.done", **) = super + + # def initialize: (Hash | OpenAI::BaseModel) -> void + + class Logprob < OpenAI::BaseModel + # @!attribute [r] token + # The token that was used to generate the log probability. + # + # @return [String, nil] + optional :token, String + + # @!parse + # # @return [String] + # attr_writer :token + + # @!attribute [r] bytes + # The bytes that were used to generate the log probability. + # + # @return [Array, nil] + optional :bytes, OpenAI::ArrayOf[OpenAI::Unknown] + + # @!parse + # # @return [Array] + # attr_writer :bytes + + # @!attribute [r] logprob + # The log probability of the token. + # + # @return [Float, nil] + optional :logprob, Float + + # @!parse + # # @return [Float] + # attr_writer :logprob + + # @!parse + # # @param token [String] + # # @param bytes [Array] + # # @param logprob [Float] + # # + # def initialize(token: nil, bytes: nil, logprob: nil, **) = super + + # def initialize: (Hash | OpenAI::BaseModel) -> void + end + end + end + end +end diff --git a/lib/openai/models/audio/translation_create_params.rb b/lib/openai/models/audio/translation_create_params.rb index 4fd4a4dc..0b31b58c 100644 --- a/lib/openai/models/audio/translation_create_params.rb +++ b/lib/openai/models/audio/translation_create_params.rb @@ -39,11 +39,11 @@ class TranslationCreateParams < OpenAI::BaseModel # The format of the output, in one of these options: `json`, `text`, `srt`, # `verbose_json`, or `vtt`. # - # @return [Symbol, OpenAI::Models::AudioResponseFormat, nil] - optional :response_format, enum: -> { OpenAI::Models::AudioResponseFormat } + # @return [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat, nil] + optional :response_format, enum: -> { OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat } # @!parse - # # @return [Symbol, OpenAI::Models::AudioResponseFormat] + # # @return [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat] # attr_writer :response_format # @!attribute [r] temperature @@ -64,7 +64,7 @@ class TranslationCreateParams < OpenAI::BaseModel # # @param file [IO, StringIO] # # @param model [String, Symbol, OpenAI::Models::AudioModel] # # @param prompt [String] - # # @param response_format [Symbol, OpenAI::Models::AudioResponseFormat] + # # @param response_format [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat] # # @param temperature [Float] # # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}] # # @@ -88,6 +88,20 @@ class Model < OpenAI::Union # def variants; end # end end + + # @abstract + # + # The format of the output, in one of these options: `json`, `text`, `srt`, + # `verbose_json`, or `vtt`. + class ResponseFormat < OpenAI::Enum + JSON = :json + TEXT = :text + SRT = :srt + VERBOSE_JSON = :verbose_json + VTT = :vtt + + finalize! + end end end end diff --git a/lib/openai/models/audio_model.rb b/lib/openai/models/audio_model.rb index 81db712e..88507173 100644 --- a/lib/openai/models/audio_model.rb +++ b/lib/openai/models/audio_model.rb @@ -5,6 +5,8 @@ module Models # @abstract class AudioModel < OpenAI::Enum WHISPER_1 = :"whisper-1" + GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" + GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" finalize! end diff --git a/lib/openai/models/audio_response_format.rb b/lib/openai/models/audio_response_format.rb index 8b92a3b9..9593d816 100644 --- a/lib/openai/models/audio_response_format.rb +++ b/lib/openai/models/audio_response_format.rb @@ -5,7 +5,8 @@ module Models # @abstract # # The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. class AudioResponseFormat < OpenAI::Enum JSON = :json TEXT = :text diff --git a/lib/openai/models/chat/chat_completion_chunk.rb b/lib/openai/models/chat/chat_completion_chunk.rb index 5c0d47df..5f0a0fef 100644 --- a/lib/openai/models/chat/chat_completion_chunk.rb +++ b/lib/openai/models/chat/chat_completion_chunk.rb @@ -55,7 +55,7 @@ class ChatCompletionChunk < OpenAI::BaseModel # # @return [String] # attr_writer :system_fingerprint - # @!attribute [r] usage + # @!attribute usage # An optional field that will only be present when you set # `stream_options: {"include_usage": true}` in your request. When present, it # contains a null value **except for the last chunk** which contains the token @@ -65,11 +65,7 @@ class ChatCompletionChunk < OpenAI::BaseModel # final usage chunk which contains the total token usage for the request. # # @return [OpenAI::Models::CompletionUsage, nil] - optional :usage, -> { OpenAI::Models::CompletionUsage } - - # @!parse - # # @return [OpenAI::Models::CompletionUsage] - # attr_writer :usage + optional :usage, -> { OpenAI::Models::CompletionUsage }, nil?: true # @!parse # # Represents a streamed chunk of a chat completion response returned by the model, @@ -82,7 +78,7 @@ class ChatCompletionChunk < OpenAI::BaseModel # # @param model [String] # # @param service_tier [Symbol, OpenAI::Models::Chat::ChatCompletionChunk::ServiceTier, nil] # # @param system_fingerprint [String] - # # @param usage [OpenAI::Models::CompletionUsage] + # # @param usage [OpenAI::Models::CompletionUsage, nil] # # @param object [Symbol, :"chat.completion.chunk"] # # # def initialize( diff --git a/lib/openai/resources/audio/speech.rb b/lib/openai/resources/audio/speech.rb index 9c5d8284..b5b584fe 100644 --- a/lib/openai/resources/audio/speech.rb +++ b/lib/openai/resources/audio/speech.rb @@ -11,13 +11,16 @@ class Speech # @option params [String] :input The text to generate audio for. The maximum length is 4096 characters. # # @option params [String, Symbol, OpenAI::Models::Audio::SpeechModel] :model One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. # # @option params [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] :voice The voice to use when generating the audio. Supported voices are `alloy`, `ash`, # `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the # voices are available in the # [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). # + # @option params [String] :instructions Control the voice of your generated audio with additional instructions. Does not + # work with `tts-1` or `tts-1-hd`. + # # @option params [Symbol, OpenAI::Models::Audio::SpeechCreateParams::ResponseFormat] :response_format The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, # `wav`, and `pcm`. # diff --git a/lib/openai/resources/audio/transcriptions.rb b/lib/openai/resources/audio/transcriptions.rb index 9e291700..3c7238bf 100644 --- a/lib/openai/resources/audio/transcriptions.rb +++ b/lib/openai/resources/audio/transcriptions.rb @@ -11,8 +11,15 @@ class Transcriptions # @option params [IO, StringIO] :file The audio file object (not file name) to transcribe, in one of these formats: # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. # - # @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). + # + # @option params [Array] :include Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. # # @option params [String] :language The language of the input audio. Supplying the input language in # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) @@ -24,7 +31,8 @@ class Transcriptions # should match the audio language. # # @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. # # @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the # output more random, while lower values like 0.2 will make it more focused and @@ -43,6 +51,10 @@ class Transcriptions # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose] def create(params) parsed, options = OpenAI::Models::Audio::TranscriptionCreateParams.dump_request(params) + if parsed[:stream] + message = "Please use `#create_streaming` for the streaming use case." + raise ArgumentError.new(message) + end @client.request( method: :post, path: "audio/transcriptions", @@ -53,6 +65,69 @@ def create(params) ) end + # Transcribes audio into the input language. + # + # @param params [OpenAI::Models::Audio::TranscriptionCreateParams, Hash{Symbol=>Object}] . + # + # @option params [IO, StringIO] :file The audio file object (not file name) to transcribe, in one of these formats: + # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + # + # @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). + # + # @option params [Array] :include Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. + # + # @option params [String] :language The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + # + # @option params [String] :prompt An optional text to guide the model's style or continue a previous audio + # segment. The + # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + # should match the audio language. + # + # @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`, + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. + # + # @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + # output more random, while lower values like 0.2 will make it more focused and + # deterministic. If set to 0, the model will use + # [log probability](https://en.wikipedia.org/wiki/Log_probability) to + # automatically increase the temperature until certain thresholds are hit. + # + # @option params [Array] :timestamp_granularities The timestamp granularities to populate for this transcription. + # `response_format` must be set `verbose_json` to use timestamp granularities. + # Either or both of these options are supported: `word`, or `segment`. Note: There + # is no additional latency for segment timestamps, but generating word timestamps + # incurs additional latency. + # + # @option params [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil] :request_options + # + # @return [OpenAI::Stream] + def create_streaming(params) + parsed, options = OpenAI::Models::Audio::TranscriptionCreateParams.dump_request(params) + unless parsed.fetch(:stream, true) + message = "Please use `#create` for the non-streaming use case." + raise ArgumentError.new(message) + end + parsed.store(:stream, true) + @client.request( + method: :post, + path: "audio/transcriptions", + headers: {"content-type" => "multipart/form-data", "accept" => "text/event-stream"}, + body: parsed, + stream: OpenAI::Stream, + model: OpenAI::Models::Audio::TranscriptionStreamEvent, + options: options + ) + end + # @param client [OpenAI::Client] def initialize(client:) @client = client diff --git a/lib/openai/resources/audio/translations.rb b/lib/openai/resources/audio/translations.rb index c1de4f8e..ea8e0e4a 100644 --- a/lib/openai/resources/audio/translations.rb +++ b/lib/openai/resources/audio/translations.rb @@ -19,7 +19,7 @@ class Translations # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) # should be in English. # - # @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`, + # @option params [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`, # `verbose_json`, or `vtt`. # # @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the diff --git a/rbi/lib/openai/models/audio/speech_create_params.rbi b/rbi/lib/openai/models/audio/speech_create_params.rbi index 043a7179..e74cec3d 100644 --- a/rbi/lib/openai/models/audio/speech_create_params.rbi +++ b/rbi/lib/openai/models/audio/speech_create_params.rbi @@ -17,7 +17,7 @@ module OpenAI end # One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. sig { returns(T.any(String, Symbol)) } def model end @@ -38,6 +38,16 @@ module OpenAI def voice=(_) end + # Control the voice of your generated audio with additional instructions. Does not + # work with `tts-1` or `tts-1-hd`. + sig { returns(T.nilable(String)) } + def instructions + end + + sig { params(_: String).returns(String) } + def instructions=(_) + end + # The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, # `wav`, and `pcm`. sig { returns(T.nilable(Symbol)) } @@ -63,13 +73,14 @@ module OpenAI input: String, model: T.any(String, Symbol), voice: Symbol, + instructions: String, response_format: Symbol, speed: Float, request_options: T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything]) ) .returns(T.attached_class) end - def self.new(input:, model:, voice:, response_format: nil, speed: nil, request_options: {}) + def self.new(input:, model:, voice:, instructions: nil, response_format: nil, speed: nil, request_options: {}) end sig do @@ -79,6 +90,7 @@ module OpenAI input: String, model: T.any(String, Symbol), voice: Symbol, + instructions: String, response_format: Symbol, speed: Float, request_options: OpenAI::RequestOptions @@ -89,7 +101,7 @@ module OpenAI end # One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. class Model < OpenAI::Union abstract! diff --git a/rbi/lib/openai/models/audio/speech_model.rbi b/rbi/lib/openai/models/audio/speech_model.rbi index f465baf8..5228e000 100644 --- a/rbi/lib/openai/models/audio/speech_model.rbi +++ b/rbi/lib/openai/models/audio/speech_model.rbi @@ -10,6 +10,7 @@ module OpenAI TTS_1 = :"tts-1" TTS_1_HD = :"tts-1-hd" + GPT_4O_MINI_TTS = :"gpt-4o-mini-tts" end end end diff --git a/rbi/lib/openai/models/audio/transcription.rbi b/rbi/lib/openai/models/audio/transcription.rbi index bc8940ae..ac4346e0 100644 --- a/rbi/lib/openai/models/audio/transcription.rbi +++ b/rbi/lib/openai/models/audio/transcription.rbi @@ -13,15 +13,69 @@ module OpenAI def text=(_) end + # The log probabilities of the tokens in the transcription. Only returned with the + # models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added + # to the `include` array. + sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::Transcription::Logprob])) } + def logprobs + end + + sig do + params(_: T::Array[OpenAI::Models::Audio::Transcription::Logprob]) + .returns(T::Array[OpenAI::Models::Audio::Transcription::Logprob]) + end + def logprobs=(_) + end + # Represents a transcription response returned by model, based on the provided # input. - sig { params(text: String).returns(T.attached_class) } - def self.new(text:) + sig do + params(text: String, logprobs: T::Array[OpenAI::Models::Audio::Transcription::Logprob]) + .returns(T.attached_class) + end + def self.new(text:, logprobs: nil) end - sig { override.returns({text: String}) } + sig { override.returns({text: String, logprobs: T::Array[OpenAI::Models::Audio::Transcription::Logprob]}) } def to_hash end + + class Logprob < OpenAI::BaseModel + # The token in the transcription. + sig { returns(T.nilable(String)) } + def token + end + + sig { params(_: String).returns(String) } + def token=(_) + end + + # The bytes of the token. + sig { returns(T.nilable(T::Array[Float])) } + def bytes + end + + sig { params(_: T::Array[Float]).returns(T::Array[Float]) } + def bytes=(_) + end + + # The log probability of the token. + sig { returns(T.nilable(Float)) } + def logprob + end + + sig { params(_: Float).returns(Float) } + def logprob=(_) + end + + sig { params(token: String, bytes: T::Array[Float], logprob: Float).returns(T.attached_class) } + def self.new(token: nil, bytes: nil, logprob: nil) + end + + sig { override.returns({token: String, bytes: T::Array[Float], logprob: Float}) } + def to_hash + end + end end end end diff --git a/rbi/lib/openai/models/audio/transcription_create_params.rbi b/rbi/lib/openai/models/audio/transcription_create_params.rbi index 29ecd8fb..026f8b77 100644 --- a/rbi/lib/openai/models/audio/transcription_create_params.rbi +++ b/rbi/lib/openai/models/audio/transcription_create_params.rbi @@ -17,8 +17,9 @@ module OpenAI def file=(_) end - # ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). sig { returns(T.any(String, Symbol)) } def model end @@ -27,6 +28,19 @@ module OpenAI def model=(_) end + # Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. + sig { returns(T.nilable(T::Array[Symbol])) } + def include + end + + sig { params(_: T::Array[Symbol]).returns(T::Array[Symbol]) } + def include=(_) + end + # The language of the input audio. Supplying the input language in # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) # format will improve accuracy and latency. @@ -51,7 +65,8 @@ module OpenAI end # The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. sig { returns(T.nilable(Symbol)) } def response_format end @@ -90,6 +105,7 @@ module OpenAI params( file: T.any(IO, StringIO), model: T.any(String, Symbol), + include: T::Array[Symbol], language: String, prompt: String, response_format: Symbol, @@ -102,6 +118,7 @@ module OpenAI def self.new( file:, model:, + include: nil, language: nil, prompt: nil, response_format: nil, @@ -117,6 +134,7 @@ module OpenAI { file: T.any(IO, StringIO), model: T.any(String, Symbol), + include: T::Array[Symbol], language: String, prompt: String, response_format: Symbol, @@ -129,8 +147,9 @@ module OpenAI def to_hash end - # ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). class Model < OpenAI::Union abstract! diff --git a/rbi/lib/openai/models/audio/transcription_include.rbi b/rbi/lib/openai/models/audio/transcription_include.rbi new file mode 100644 index 00000000..7a60b02a --- /dev/null +++ b/rbi/lib/openai/models/audio/transcription_include.rbi @@ -0,0 +1,15 @@ +# typed: strong + +module OpenAI + module Models + module Audio + class TranscriptionInclude < OpenAI::Enum + abstract! + + Value = type_template(:out) { {fixed: Symbol} } + + LOGPROBS = :logprobs + end + end + end +end diff --git a/rbi/lib/openai/models/audio/transcription_stream_event.rbi b/rbi/lib/openai/models/audio/transcription_stream_event.rbi new file mode 100644 index 00000000..4c8cc6bc --- /dev/null +++ b/rbi/lib/openai/models/audio/transcription_stream_event.rbi @@ -0,0 +1,25 @@ +# typed: strong + +module OpenAI + module Models + module Audio + # Emitted when there is an additional text delta. This is also the first event + # emitted when the transcription starts. Only emitted when you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `Stream` parameter set to `true`. + class TranscriptionStreamEvent < OpenAI::Union + abstract! + + Variants = + type_template(:out) do + { + fixed: T.any( + OpenAI::Models::Audio::TranscriptionTextDeltaEvent, + OpenAI::Models::Audio::TranscriptionTextDoneEvent + ) + } + end + end + end + end +end diff --git a/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi b/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi new file mode 100644 index 00000000..6c73838b --- /dev/null +++ b/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi @@ -0,0 +1,102 @@ +# typed: strong + +module OpenAI + module Models + module Audio + class TranscriptionTextDeltaEvent < OpenAI::BaseModel + # The text delta that was additionally transcribed. + sig { returns(String) } + def delta + end + + sig { params(_: String).returns(String) } + def delta=(_) + end + + # The type of the event. Always `transcript.text.delta`. + sig { returns(Symbol) } + def type + end + + sig { params(_: Symbol).returns(Symbol) } + def type=(_) + end + + # The log probabilities of the delta. Only included if you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `include[]` parameter set to `logprobs`. + sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob])) } + def logprobs + end + + sig do + params(_: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]) + .returns(T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]) + end + def logprobs=(_) + end + + # Emitted when there is an additional text delta. This is also the first event + # emitted when the transcription starts. Only emitted when you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `Stream` parameter set to `true`. + sig do + params( + delta: String, + logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob], + type: Symbol + ) + .returns(T.attached_class) + end + def self.new(delta:, logprobs: nil, type: :"transcript.text.delta") + end + + sig do + override + .returns( + {delta: String, type: Symbol, logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]} + ) + end + def to_hash + end + + class Logprob < OpenAI::BaseModel + # The token that was used to generate the log probability. + sig { returns(T.nilable(String)) } + def token + end + + sig { params(_: String).returns(String) } + def token=(_) + end + + # The bytes that were used to generate the log probability. + sig { returns(T.nilable(T::Array[T.anything])) } + def bytes + end + + sig { params(_: T::Array[T.anything]).returns(T::Array[T.anything]) } + def bytes=(_) + end + + # The log probability of the token. + sig { returns(T.nilable(Float)) } + def logprob + end + + sig { params(_: Float).returns(Float) } + def logprob=(_) + end + + sig { params(token: String, bytes: T::Array[T.anything], logprob: Float).returns(T.attached_class) } + def self.new(token: nil, bytes: nil, logprob: nil) + end + + sig { override.returns({token: String, bytes: T::Array[T.anything], logprob: Float}) } + def to_hash + end + end + end + end + end +end diff --git a/rbi/lib/openai/models/audio/transcription_text_done_event.rbi b/rbi/lib/openai/models/audio/transcription_text_done_event.rbi new file mode 100644 index 00000000..fb616718 --- /dev/null +++ b/rbi/lib/openai/models/audio/transcription_text_done_event.rbi @@ -0,0 +1,103 @@ +# typed: strong + +module OpenAI + module Models + module Audio + class TranscriptionTextDoneEvent < OpenAI::BaseModel + # The text that was transcribed. + sig { returns(String) } + def text + end + + sig { params(_: String).returns(String) } + def text=(_) + end + + # The type of the event. Always `transcript.text.done`. + sig { returns(Symbol) } + def type + end + + sig { params(_: Symbol).returns(Symbol) } + def type=(_) + end + + # The log probabilities of the individual tokens in the transcription. Only + # included if you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `include[]` parameter set to `logprobs`. + sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob])) } + def logprobs + end + + sig do + params(_: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]) + .returns(T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]) + end + def logprobs=(_) + end + + # Emitted when the transcription is complete. Contains the complete transcription + # text. Only emitted when you + # [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) + # with the `Stream` parameter set to `true`. + sig do + params( + text: String, + logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob], + type: Symbol + ) + .returns(T.attached_class) + end + def self.new(text:, logprobs: nil, type: :"transcript.text.done") + end + + sig do + override + .returns( + {text: String, type: Symbol, logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]} + ) + end + def to_hash + end + + class Logprob < OpenAI::BaseModel + # The token that was used to generate the log probability. + sig { returns(T.nilable(String)) } + def token + end + + sig { params(_: String).returns(String) } + def token=(_) + end + + # The bytes that were used to generate the log probability. + sig { returns(T.nilable(T::Array[T.anything])) } + def bytes + end + + sig { params(_: T::Array[T.anything]).returns(T::Array[T.anything]) } + def bytes=(_) + end + + # The log probability of the token. + sig { returns(T.nilable(Float)) } + def logprob + end + + sig { params(_: Float).returns(Float) } + def logprob=(_) + end + + sig { params(token: String, bytes: T::Array[T.anything], logprob: Float).returns(T.attached_class) } + def self.new(token: nil, bytes: nil, logprob: nil) + end + + sig { override.returns({token: String, bytes: T::Array[T.anything], logprob: Float}) } + def to_hash + end + end + end + end + end +end diff --git a/rbi/lib/openai/models/audio/translation_create_params.rbi b/rbi/lib/openai/models/audio/translation_create_params.rbi index fb5d4a71..ce2e6e77 100644 --- a/rbi/lib/openai/models/audio/translation_create_params.rbi +++ b/rbi/lib/openai/models/audio/translation_create_params.rbi @@ -99,6 +99,20 @@ module OpenAI Variants = type_template(:out) { {fixed: T.any(String, Symbol)} } end + + # The format of the output, in one of these options: `json`, `text`, `srt`, + # `verbose_json`, or `vtt`. + class ResponseFormat < OpenAI::Enum + abstract! + + Value = type_template(:out) { {fixed: Symbol} } + + JSON = :json + TEXT = :text + SRT = :srt + VERBOSE_JSON = :verbose_json + VTT = :vtt + end end end end diff --git a/rbi/lib/openai/models/audio_model.rbi b/rbi/lib/openai/models/audio_model.rbi index 85348552..917ce7d8 100644 --- a/rbi/lib/openai/models/audio_model.rbi +++ b/rbi/lib/openai/models/audio_model.rbi @@ -8,6 +8,8 @@ module OpenAI Value = type_template(:out) { {fixed: Symbol} } WHISPER_1 = :"whisper-1" + GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" + GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" end end end diff --git a/rbi/lib/openai/models/audio_response_format.rbi b/rbi/lib/openai/models/audio_response_format.rbi index fb54aad0..405da3e2 100644 --- a/rbi/lib/openai/models/audio_response_format.rbi +++ b/rbi/lib/openai/models/audio_response_format.rbi @@ -3,7 +3,8 @@ module OpenAI module Models # The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. class AudioResponseFormat < OpenAI::Enum abstract! diff --git a/rbi/lib/openai/models/chat/chat_completion_chunk.rbi b/rbi/lib/openai/models/chat/chat_completion_chunk.rbi index c2451af5..647695a9 100644 --- a/rbi/lib/openai/models/chat/chat_completion_chunk.rbi +++ b/rbi/lib/openai/models/chat/chat_completion_chunk.rbi @@ -86,7 +86,7 @@ module OpenAI def usage end - sig { params(_: OpenAI::Models::CompletionUsage).returns(OpenAI::Models::CompletionUsage) } + sig { params(_: T.nilable(OpenAI::Models::CompletionUsage)).returns(T.nilable(OpenAI::Models::CompletionUsage)) } def usage=(_) end @@ -101,7 +101,7 @@ module OpenAI model: String, service_tier: T.nilable(Symbol), system_fingerprint: String, - usage: OpenAI::Models::CompletionUsage, + usage: T.nilable(OpenAI::Models::CompletionUsage), object: Symbol ) .returns(T.attached_class) @@ -129,7 +129,7 @@ module OpenAI object: Symbol, service_tier: T.nilable(Symbol), system_fingerprint: String, - usage: OpenAI::Models::CompletionUsage + usage: T.nilable(OpenAI::Models::CompletionUsage) } ) end diff --git a/rbi/lib/openai/resources/audio/speech.rbi b/rbi/lib/openai/resources/audio/speech.rbi index ae6f4be5..e2e85216 100644 --- a/rbi/lib/openai/resources/audio/speech.rbi +++ b/rbi/lib/openai/resources/audio/speech.rbi @@ -10,6 +10,7 @@ module OpenAI input: String, model: T.any(String, Symbol), voice: Symbol, + instructions: String, response_format: Symbol, speed: Float, request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything])) @@ -20,13 +21,16 @@ module OpenAI # The text to generate audio for. The maximum length is 4096 characters. input:, # One of the available [TTS models](https://platform.openai.com/docs/models#tts): - # `tts-1` or `tts-1-hd` + # `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`. model:, # The voice to use when generating the audio. Supported voices are `alloy`, `ash`, # `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the # voices are available in the # [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). voice:, + # Control the voice of your generated audio with additional instructions. Does not + # work with `tts-1` or `tts-1-hd`. + instructions: nil, # The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, # `wav`, and `pcm`. response_format: nil, diff --git a/rbi/lib/openai/resources/audio/transcriptions.rbi b/rbi/lib/openai/resources/audio/transcriptions.rbi index 50e5c416..a1340034 100644 --- a/rbi/lib/openai/resources/audio/transcriptions.rbi +++ b/rbi/lib/openai/resources/audio/transcriptions.rbi @@ -9,11 +9,13 @@ module OpenAI params( file: T.any(IO, StringIO), model: T.any(String, Symbol), + include: T::Array[Symbol], language: String, prompt: String, response_format: Symbol, temperature: Float, timestamp_granularities: T::Array[Symbol], + stream: T.noreturn, request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything])) ) .returns(T.any(OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose)) @@ -22,9 +24,16 @@ module OpenAI # The audio file object (not file name) to transcribe, in one of these formats: # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. file:, - # ID of the model to use. Only `whisper-1` (which is powered by our open source - # Whisper V2 model) is currently available. + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). model:, + # Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. + include: nil, # The language of the input audio. Supplying the input language in # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) # format will improve accuracy and latency. @@ -35,7 +44,8 @@ module OpenAI # should match the audio language. prompt: nil, # The format of the output, in one of these options: `json`, `text`, `srt`, - # `verbose_json`, or `vtt`. + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. response_format: nil, # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the # output more random, while lower values like 0.2 will make it more focused and @@ -49,6 +59,78 @@ module OpenAI # is no additional latency for segment timestamps, but generating word timestamps # incurs additional latency. timestamp_granularities: nil, + # There is no need to provide `stream:`. Instead, use `#create_streaming` or + # `#create` for streaming and non-streaming use cases, respectively. + stream: false, + request_options: {} + ) + end + + # Transcribes audio into the input language. + sig do + params( + file: T.any(IO, StringIO), + model: T.any(String, Symbol), + include: T::Array[Symbol], + language: String, + prompt: String, + response_format: Symbol, + temperature: Float, + timestamp_granularities: T::Array[Symbol], + stream: T.noreturn, + request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything])) + ) + .returns( + OpenAI::Stream[ + T.any( + OpenAI::Models::Audio::TranscriptionTextDeltaEvent, + OpenAI::Models::Audio::TranscriptionTextDoneEvent + ) + ] + ) + end + def create_streaming( + # The audio file object (not file name) to transcribe, in one of these formats: + # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + file:, + # ID of the model to use. The options are `gpt-4o-transcribe`, + # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source + # Whisper V2 model). + model:, + # Additional information to include in the transcription response. `logprobs` will + # return the log probabilities of the tokens in the response to understand the + # model's confidence in the transcription. `logprobs` only works with + # response_format set to `json` and only with the models `gpt-4o-transcribe` and + # `gpt-4o-mini-transcribe`. + include: nil, + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + language: nil, + # An optional text to guide the model's style or continue a previous audio + # segment. The + # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) + # should match the audio language. + prompt: nil, + # The format of the output, in one of these options: `json`, `text`, `srt`, + # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, + # the only supported format is `json`. + response_format: nil, + # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + # output more random, while lower values like 0.2 will make it more focused and + # deterministic. If set to 0, the model will use + # [log probability](https://en.wikipedia.org/wiki/Log_probability) to + # automatically increase the temperature until certain thresholds are hit. + temperature: nil, + # The timestamp granularities to populate for this transcription. + # `response_format` must be set `verbose_json` to use timestamp granularities. + # Either or both of these options are supported: `word`, or `segment`. Note: There + # is no additional latency for segment timestamps, but generating word timestamps + # incurs additional latency. + timestamp_granularities: nil, + # There is no need to provide `stream:`. Instead, use `#create_streaming` or + # `#create` for streaming and non-streaming use cases, respectively. + stream: true, request_options: {} ) end diff --git a/sig/openai/models/audio/speech_create_params.rbs b/sig/openai/models/audio/speech_create_params.rbs index ae9debbf..eb21a97f 100644 --- a/sig/openai/models/audio/speech_create_params.rbs +++ b/sig/openai/models/audio/speech_create_params.rbs @@ -6,6 +6,7 @@ module OpenAI input: String, model: OpenAI::Models::Audio::SpeechCreateParams::model, voice: OpenAI::Models::Audio::SpeechCreateParams::voice, + instructions: String, response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format, speed: Float } @@ -21,6 +22,10 @@ module OpenAI attr_accessor voice: OpenAI::Models::Audio::SpeechCreateParams::voice + attr_reader instructions: String? + + def instructions=: (String) -> String + attr_reader response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format? def response_format=: ( @@ -35,6 +40,7 @@ module OpenAI input: String, model: OpenAI::Models::Audio::SpeechCreateParams::model, voice: OpenAI::Models::Audio::SpeechCreateParams::voice, + ?instructions: String, ?response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format, ?speed: Float, ?request_options: OpenAI::request_opts diff --git a/sig/openai/models/audio/speech_model.rbs b/sig/openai/models/audio/speech_model.rbs index 7ab47f1f..357eaa4c 100644 --- a/sig/openai/models/audio/speech_model.rbs +++ b/sig/openai/models/audio/speech_model.rbs @@ -1,11 +1,12 @@ module OpenAI module Models module Audio - type speech_model = :"tts-1" | :"tts-1-hd" + type speech_model = :"tts-1" | :"tts-1-hd" | :"gpt-4o-mini-tts" class SpeechModel < OpenAI::Enum TTS_1: :"tts-1" TTS_1_HD: :"tts-1-hd" + GPT_4O_MINI_TTS: :"gpt-4o-mini-tts" def self.values: -> ::Array[OpenAI::Models::Audio::speech_model] end diff --git a/sig/openai/models/audio/transcription.rbs b/sig/openai/models/audio/transcription.rbs index 0ea5f955..3f9bf1d4 100644 --- a/sig/openai/models/audio/transcription.rbs +++ b/sig/openai/models/audio/transcription.rbs @@ -1,14 +1,51 @@ module OpenAI module Models module Audio - type transcription = { text: String } + type transcription = + { + text: String, + logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob] + } class Transcription < OpenAI::BaseModel attr_accessor text: String - def initialize: (text: String) -> void + attr_reader logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob]? + + def logprobs=: ( + ::Array[OpenAI::Models::Audio::Transcription::Logprob] + ) -> ::Array[OpenAI::Models::Audio::Transcription::Logprob] + + def initialize: ( + text: String, + ?logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob] + ) -> void def to_hash: -> OpenAI::Models::Audio::transcription + + type logprob = { token: String, bytes: ::Array[Float], logprob: Float } + + class Logprob < OpenAI::BaseModel + attr_reader token: String? + + def token=: (String) -> String + + attr_reader bytes: ::Array[Float]? + + def bytes=: (::Array[Float]) -> ::Array[Float] + + attr_reader logprob: Float? + + def logprob=: (Float) -> Float + + def initialize: ( + ?token: String, + ?bytes: ::Array[Float], + ?logprob: Float + ) -> void + + def to_hash: -> OpenAI::Models::Audio::Transcription::logprob + end end end end diff --git a/sig/openai/models/audio/transcription_create_params.rbs b/sig/openai/models/audio/transcription_create_params.rbs index 16280f3e..28f79a92 100644 --- a/sig/openai/models/audio/transcription_create_params.rbs +++ b/sig/openai/models/audio/transcription_create_params.rbs @@ -5,6 +5,7 @@ module OpenAI { file: (IO | StringIO), model: OpenAI::Models::Audio::TranscriptionCreateParams::model, + include: ::Array[OpenAI::Models::Audio::transcription_include], language: String, prompt: String, response_format: OpenAI::Models::audio_response_format, @@ -21,6 +22,12 @@ module OpenAI attr_accessor model: OpenAI::Models::Audio::TranscriptionCreateParams::model + attr_reader include: ::Array[OpenAI::Models::Audio::transcription_include]? + + def include=: ( + ::Array[OpenAI::Models::Audio::transcription_include] + ) -> ::Array[OpenAI::Models::Audio::transcription_include] + attr_reader language: String? def language=: (String) -> String @@ -48,6 +55,7 @@ module OpenAI def initialize: ( file: IO | StringIO, model: OpenAI::Models::Audio::TranscriptionCreateParams::model, + ?include: ::Array[OpenAI::Models::Audio::transcription_include], ?language: String, ?prompt: String, ?response_format: OpenAI::Models::audio_response_format, diff --git a/sig/openai/models/audio/transcription_include.rbs b/sig/openai/models/audio/transcription_include.rbs new file mode 100644 index 00000000..cf06a929 --- /dev/null +++ b/sig/openai/models/audio/transcription_include.rbs @@ -0,0 +1,13 @@ +module OpenAI + module Models + module Audio + type transcription_include = :logprobs + + class TranscriptionInclude < OpenAI::Enum + LOGPROBS: :logprobs + + def self.values: -> ::Array[OpenAI::Models::Audio::transcription_include] + end + end + end +end diff --git a/sig/openai/models/audio/transcription_stream_event.rbs b/sig/openai/models/audio/transcription_stream_event.rbs new file mode 100644 index 00000000..b9233feb --- /dev/null +++ b/sig/openai/models/audio/transcription_stream_event.rbs @@ -0,0 +1,13 @@ +module OpenAI + module Models + module Audio + type transcription_stream_event = + OpenAI::Models::Audio::TranscriptionTextDeltaEvent + | OpenAI::Models::Audio::TranscriptionTextDoneEvent + + class TranscriptionStreamEvent < OpenAI::Union + def self.variants: -> [OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent] + end + end + end +end diff --git a/sig/openai/models/audio/transcription_text_delta_event.rbs b/sig/openai/models/audio/transcription_text_delta_event.rbs new file mode 100644 index 00000000..373c6aed --- /dev/null +++ b/sig/openai/models/audio/transcription_text_delta_event.rbs @@ -0,0 +1,56 @@ +module OpenAI + module Models + module Audio + type transcription_text_delta_event = + { + delta: String, + type: :"transcript.text.delta", + logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] + } + + class TranscriptionTextDeltaEvent < OpenAI::BaseModel + attr_accessor delta: String + + attr_accessor type: :"transcript.text.delta" + + attr_reader logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]? + + def logprobs=: ( + ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] + ) -> ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] + + def initialize: ( + delta: String, + ?logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob], + ?type: :"transcript.text.delta" + ) -> void + + def to_hash: -> OpenAI::Models::Audio::transcription_text_delta_event + + type logprob = { token: String, bytes: ::Array[top], logprob: Float } + + class Logprob < OpenAI::BaseModel + attr_reader token: String? + + def token=: (String) -> String + + attr_reader bytes: ::Array[top]? + + def bytes=: (::Array[top]) -> ::Array[top] + + attr_reader logprob: Float? + + def logprob=: (Float) -> Float + + def initialize: ( + ?token: String, + ?bytes: ::Array[top], + ?logprob: Float + ) -> void + + def to_hash: -> OpenAI::Models::Audio::TranscriptionTextDeltaEvent::logprob + end + end + end + end +end diff --git a/sig/openai/models/audio/transcription_text_done_event.rbs b/sig/openai/models/audio/transcription_text_done_event.rbs new file mode 100644 index 00000000..f1f1dd1d --- /dev/null +++ b/sig/openai/models/audio/transcription_text_done_event.rbs @@ -0,0 +1,56 @@ +module OpenAI + module Models + module Audio + type transcription_text_done_event = + { + text: String, + type: :"transcript.text.done", + logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob] + } + + class TranscriptionTextDoneEvent < OpenAI::BaseModel + attr_accessor text: String + + attr_accessor type: :"transcript.text.done" + + attr_reader logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]? + + def logprobs=: ( + ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob] + ) -> ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob] + + def initialize: ( + text: String, + ?logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob], + ?type: :"transcript.text.done" + ) -> void + + def to_hash: -> OpenAI::Models::Audio::transcription_text_done_event + + type logprob = { token: String, bytes: ::Array[top], logprob: Float } + + class Logprob < OpenAI::BaseModel + attr_reader token: String? + + def token=: (String) -> String + + attr_reader bytes: ::Array[top]? + + def bytes=: (::Array[top]) -> ::Array[top] + + attr_reader logprob: Float? + + def logprob=: (Float) -> Float + + def initialize: ( + ?token: String, + ?bytes: ::Array[top], + ?logprob: Float + ) -> void + + def to_hash: -> OpenAI::Models::Audio::TranscriptionTextDoneEvent::logprob + end + end + end + end +end diff --git a/sig/openai/models/audio/translation_create_params.rbs b/sig/openai/models/audio/translation_create_params.rbs index 252ed2dc..83dc3322 100644 --- a/sig/openai/models/audio/translation_create_params.rbs +++ b/sig/openai/models/audio/translation_create_params.rbs @@ -6,7 +6,7 @@ module OpenAI file: (IO | StringIO), model: OpenAI::Models::Audio::TranslationCreateParams::model, prompt: String, - response_format: OpenAI::Models::audio_response_format, + response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format, temperature: Float } & OpenAI::request_parameters @@ -23,11 +23,11 @@ module OpenAI def prompt=: (String) -> String - attr_reader response_format: OpenAI::Models::audio_response_format? + attr_reader response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format? def response_format=: ( - OpenAI::Models::audio_response_format - ) -> OpenAI::Models::audio_response_format + OpenAI::Models::Audio::TranslationCreateParams::response_format + ) -> OpenAI::Models::Audio::TranslationCreateParams::response_format attr_reader temperature: Float? @@ -37,7 +37,7 @@ module OpenAI file: IO | StringIO, model: OpenAI::Models::Audio::TranslationCreateParams::model, ?prompt: String, - ?response_format: OpenAI::Models::audio_response_format, + ?response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format, ?temperature: Float, ?request_options: OpenAI::request_opts ) -> void @@ -49,6 +49,18 @@ module OpenAI class Model < OpenAI::Union def self.variants: -> [String, OpenAI::Models::audio_model] end + + type response_format = :json | :text | :srt | :verbose_json | :vtt + + class ResponseFormat < OpenAI::Enum + JSON: :json + TEXT: :text + SRT: :srt + VERBOSE_JSON: :verbose_json + VTT: :vtt + + def self.values: -> ::Array[OpenAI::Models::Audio::TranslationCreateParams::response_format] + end end end end diff --git a/sig/openai/models/audio_model.rbs b/sig/openai/models/audio_model.rbs index f9841d31..72b67344 100644 --- a/sig/openai/models/audio_model.rbs +++ b/sig/openai/models/audio_model.rbs @@ -1,9 +1,12 @@ module OpenAI module Models - type audio_model = :"whisper-1" + type audio_model = + :"whisper-1" | :"gpt-4o-transcribe" | :"gpt-4o-mini-transcribe" class AudioModel < OpenAI::Enum WHISPER_1: :"whisper-1" + GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" + GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" def self.values: -> ::Array[OpenAI::Models::audio_model] end diff --git a/sig/openai/models/chat/chat_completion_chunk.rbs b/sig/openai/models/chat/chat_completion_chunk.rbs index 3eef3ef4..fa2494d9 100644 --- a/sig/openai/models/chat/chat_completion_chunk.rbs +++ b/sig/openai/models/chat/chat_completion_chunk.rbs @@ -13,7 +13,7 @@ module OpenAI object: :"chat.completion.chunk", service_tier: OpenAI::Models::Chat::ChatCompletionChunk::service_tier?, system_fingerprint: String, - usage: OpenAI::Models::CompletionUsage + usage: OpenAI::Models::CompletionUsage? } class ChatCompletionChunk < OpenAI::BaseModel @@ -33,11 +33,7 @@ module OpenAI def system_fingerprint=: (String) -> String - attr_reader usage: OpenAI::Models::CompletionUsage? - - def usage=: ( - OpenAI::Models::CompletionUsage - ) -> OpenAI::Models::CompletionUsage + attr_accessor usage: OpenAI::Models::CompletionUsage? def initialize: ( id: String, @@ -46,7 +42,7 @@ module OpenAI model: String, ?service_tier: OpenAI::Models::Chat::ChatCompletionChunk::service_tier?, ?system_fingerprint: String, - ?usage: OpenAI::Models::CompletionUsage, + ?usage: OpenAI::Models::CompletionUsage?, ?object: :"chat.completion.chunk" ) -> void diff --git a/sig/openai/resources/audio/speech.rbs b/sig/openai/resources/audio/speech.rbs index 65002d04..7e7d117c 100644 --- a/sig/openai/resources/audio/speech.rbs +++ b/sig/openai/resources/audio/speech.rbs @@ -6,6 +6,7 @@ module OpenAI input: String, model: OpenAI::Models::Audio::SpeechCreateParams::model, voice: OpenAI::Models::Audio::SpeechCreateParams::voice, + ?instructions: String, ?response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format, ?speed: Float, ?request_options: OpenAI::request_opts diff --git a/sig/openai/resources/audio/transcriptions.rbs b/sig/openai/resources/audio/transcriptions.rbs index 9ee728bb..b52531d0 100644 --- a/sig/openai/resources/audio/transcriptions.rbs +++ b/sig/openai/resources/audio/transcriptions.rbs @@ -5,6 +5,7 @@ module OpenAI def create: ( file: IO | StringIO, model: OpenAI::Models::Audio::TranscriptionCreateParams::model, + ?include: ::Array[OpenAI::Models::Audio::transcription_include], ?language: String, ?prompt: String, ?response_format: OpenAI::Models::audio_response_format, @@ -13,6 +14,18 @@ module OpenAI ?request_options: OpenAI::request_opts ) -> OpenAI::Models::Audio::transcription_create_response + def create_streaming: ( + file: IO | StringIO, + model: OpenAI::Models::Audio::TranscriptionCreateParams::model, + ?include: ::Array[OpenAI::Models::Audio::transcription_include], + ?language: String, + ?prompt: String, + ?response_format: OpenAI::Models::audio_response_format, + ?temperature: Float, + ?timestamp_granularities: ::Array[OpenAI::Models::Audio::TranscriptionCreateParams::timestamp_granularity], + ?request_options: OpenAI::request_opts + ) -> OpenAI::Stream[OpenAI::Models::Audio::transcription_stream_event] + def initialize: (client: OpenAI::Client) -> void end end diff --git a/sig/openai/resources/audio/translations.rbs b/sig/openai/resources/audio/translations.rbs index f4c61adf..25ea82e3 100644 --- a/sig/openai/resources/audio/translations.rbs +++ b/sig/openai/resources/audio/translations.rbs @@ -6,7 +6,7 @@ module OpenAI file: IO | StringIO, model: OpenAI::Models::Audio::TranslationCreateParams::model, ?prompt: String, - ?response_format: OpenAI::Models::audio_response_format, + ?response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format, ?temperature: Float, ?request_options: OpenAI::request_opts ) -> OpenAI::Models::Audio::translation_create_response