openai · stainless-app · Mar 20, 2025 · Mar 20, 2025
@@ -1,2 +1,2 @@
 configured_endpoints: 80
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c22f59c66aec7914b6ee653d3098d1c1c8c16c180d2a158e819c8ddbf476f74b.yml
@@ -43,7 +43,11 @@
 require_relative "openai/models/audio/transcription"
 require_relative "openai/models/audio/transcription_create_params"
 require_relative "openai/models/audio/transcription_create_response"
+require_relative "openai/models/audio/transcription_include"
 require_relative "openai/models/audio/transcription_segment"
+require_relative "openai/models/audio/transcription_stream_event"
+require_relative "openai/models/audio/transcription_text_delta_event"
+require_relative "openai/models/audio/transcription_text_done_event"
 require_relative "openai/models/audio/transcription_verbose"
 require_relative "openai/models/audio/transcription_word"
 require_relative "openai/models/audio/translation"

@@ -16,7 +16,7 @@ class SpeechCreateParams < OpenAI::BaseModel
 
         # @!attribute model
         #   One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #     `tts-1` or `tts-1-hd`
+        #     `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         #
         #   @return [String, Symbol, OpenAI::Models::Audio::SpeechModel]
         required :model, union: -> { OpenAI::Models::Audio::SpeechCreateParams::Model }
@@ -30,6 +30,17 @@ class SpeechCreateParams < OpenAI::BaseModel
         #   @return [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice]
         required :voice, enum: -> { OpenAI::Models::Audio::SpeechCreateParams::Voice }
 
+        # @!attribute [r] instructions
+        #   Control the voice of your generated audio with additional instructions. Does not
+        #     work with `tts-1` or `tts-1-hd`.
+        #
+        #   @return [String, nil]
+        optional :instructions, String
+
+        # @!parse
+        #   # @return [String]
+        #   attr_writer :instructions
+
         # @!attribute [r] response_format
         #   The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
         #     `wav`, and `pcm`.
@@ -56,22 +67,23 @@ class SpeechCreateParams < OpenAI::BaseModel
         #   # @param input [String]
         #   # @param model [String, Symbol, OpenAI::Models::Audio::SpeechModel]
         #   # @param voice [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice]
+        #   # @param instructions [String]
         #   # @param response_format [Symbol, OpenAI::Models::Audio::SpeechCreateParams::ResponseFormat]
         #   # @param speed [Float]
         #   # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
         #   #
-        #   def initialize(input:, model:, voice:, response_format: nil, speed: nil, request_options: {}, **) = super
+        #   def initialize(input:, model:, voice:, instructions: nil, response_format: nil, speed: nil, request_options: {}, **) = super
 
         # def initialize: (Hash | OpenAI::BaseModel) -> void
 
         # @abstract
         #
         # One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #   `tts-1` or `tts-1-hd`
+        #   `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         class Model < OpenAI::Union
           variant String
 
-          # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1` or `tts-1-hd`
+          # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
           variant enum: -> { OpenAI::Models::Audio::SpeechModel }
 
           # @!parse

@@ -7,6 +7,7 @@ module Audio
       class SpeechModel < OpenAI::Enum
         TTS_1 = :"tts-1"
         TTS_1_HD = :"tts-1-hd"
+        GPT_4O_MINI_TTS = :"gpt-4o-mini-tts"
 
         finalize!
       end

@@ -10,15 +10,69 @@ class Transcription < OpenAI::BaseModel
         #   @return [String]
         required :text, String
 
+        # @!attribute [r] logprobs
+        #   The log probabilities of the tokens in the transcription. Only returned with the
+        #     models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
+        #     to the `include` array.
+        #
+        #   @return [Array<OpenAI::Models::Audio::Transcription::Logprob>, nil]
+        optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::Transcription::Logprob] }
+
+        # @!parse
+        #   # @return [Array<OpenAI::Models::Audio::Transcription::Logprob>]
+        #   attr_writer :logprobs
+
         # @!parse
         #   # Represents a transcription response returned by model, based on the provided
         #   #   input.
         #   #
         #   # @param text [String]
+        #   # @param logprobs [Array<OpenAI::Models::Audio::Transcription::Logprob>]
         #   #
-        #   def initialize(text:, **) = super
+        #   def initialize(text:, logprobs: nil, **) = super
 
         # def initialize: (Hash | OpenAI::BaseModel) -> void
+
+        class Logprob < OpenAI::BaseModel
+          # @!attribute [r] token
+          #   The token in the transcription.
+          #
+          #   @return [String, nil]
+          optional :token, String
+
+          # @!parse
+          #   # @return [String]
+          #   attr_writer :token
+
+          # @!attribute [r] bytes
+          #   The bytes of the token.
+          #
+          #   @return [Array<Float>, nil]
+          optional :bytes, OpenAI::ArrayOf[Float]
+
+          # @!parse
+          #   # @return [Array<Float>]
+          #   attr_writer :bytes
+
+          # @!attribute [r] logprob
+          #   The log probability of the token.
+          #
+          #   @return [Float, nil]
+          optional :logprob, Float
+
+          # @!parse
+          #   # @return [Float]
+          #   attr_writer :logprob
+
+          # @!parse
+          #   # @param token [String]
+          #   # @param bytes [Array<Float>]
+          #   # @param logprob [Float]
+          #   #
+          #   def initialize(token: nil, bytes: nil, logprob: nil, **) = super
+
+          # def initialize: (Hash | OpenAI::BaseModel) -> void
+        end
       end
     end
   end

@@ -16,12 +16,27 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         required :file, IO
 
         # @!attribute model
-        #   ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #     Whisper V2 model) is currently available.
+        #   ID of the model to use. The options are `gpt-4o-transcribe`,
+        #     `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #     Whisper V2 model).
         #
         #   @return [String, Symbol, OpenAI::Models::AudioModel]
         required :model, union: -> { OpenAI::Models::Audio::TranscriptionCreateParams::Model }
 
+        # @!attribute [r] include
+        #   Additional information to include in the transcription response. `logprobs` will
+        #     return the log probabilities of the tokens in the response to understand the
+        #     model's confidence in the transcription. `logprobs` only works with
+        #     response_format set to `json` and only with the models `gpt-4o-transcribe` and
+        #     `gpt-4o-mini-transcribe`.
+        #
+        #   @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>, nil]
+        optional :include, -> { OpenAI::ArrayOf[enum: OpenAI::Models::Audio::TranscriptionInclude] }
+
+        # @!parse
+        #   # @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>]
+        #   attr_writer :include
+
         # @!attribute [r] language
         #   The language of the input audio. Supplying the input language in
         #     [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
@@ -49,7 +64,8 @@ class TranscriptionCreateParams < OpenAI::BaseModel
 
         # @!attribute [r] response_format
         #   The format of the output, in one of these options: `json`, `text`, `srt`,
-        #     `verbose_json`, or `vtt`.
+        #     `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+        #     the only supported format is `json`.
         #
         #   @return [Symbol, OpenAI::Models::AudioResponseFormat, nil]
         optional :response_format, enum: -> { OpenAI::Models::AudioResponseFormat }
@@ -90,6 +106,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         # @!parse
         #   # @param file [IO, StringIO]
         #   # @param model [String, Symbol, OpenAI::Models::AudioModel]
+        #   # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>]
         #   # @param language [String]
         #   # @param prompt [String]
         #   # @param response_format [Symbol, OpenAI::Models::AudioResponseFormat]
@@ -100,6 +117,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         #   def initialize(
         #     file:,
         #     model:,
+        #     include: nil,
         #     language: nil,
         #     prompt: nil,
         #     response_format: nil,
@@ -115,12 +133,13 @@ class TranscriptionCreateParams < OpenAI::BaseModel
 
         # @abstract
         #
-        # ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #   Whisper V2 model) is currently available.
+        # ID of the model to use. The options are `gpt-4o-transcribe`,
+        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #   Whisper V2 model).
         class Model < OpenAI::Union
           variant String
 
-          # ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is currently available.
+          # ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source Whisper V2 model).
           variant enum: -> { OpenAI::Models::AudioModel }
 
           # @!parse

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      # @abstract
+      class TranscriptionInclude < OpenAI::Enum
+        LOGPROBS = :logprobs
+
+        finalize!
+      end
+    end
+  end
+end
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      # @abstract
+      #
+      # Emitted when there is an additional text delta. This is also the first event
+      #   emitted when the transcription starts. Only emitted when you
+      #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+      #   with the `Stream` parameter set to `true`.
+      class TranscriptionStreamEvent < OpenAI::Union
+        discriminator :type
+
+        # Emitted when there is an additional text delta. This is also the first event emitted when the transcription starts. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+        variant :"transcript.text.delta", -> { OpenAI::Models::Audio::TranscriptionTextDeltaEvent }
+
+        # Emitted when the transcription is complete. Contains the complete transcription text. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+        variant :"transcript.text.done", -> { OpenAI::Models::Audio::TranscriptionTextDoneEvent }
+
+        # @!parse
+        #   class << self
+        #     # @return [Array(OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent)]
+        #     def variants; end
+        #   end
+      end
+    end
+  end
+end
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionTextDeltaEvent < OpenAI::BaseModel
+        # @!attribute delta
+        #   The text delta that was additionally transcribed.
+        #
+        #   @return [String]
+        required :delta, String
+
+        # @!attribute type
+        #   The type of the event. Always `transcript.text.delta`.
+        #
+        #   @return [Symbol, :"transcript.text.delta"]
+        required :type, const: :"transcript.text.delta"
+
+        # @!attribute [r] logprobs
+        #   The log probabilities of the delta. Only included if you
+        #     [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #     with the `include[]` parameter set to `logprobs`.
+        #
+        #   @return [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>, nil]
+        optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] }
+
+        # @!parse
+        #   # @return [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>]
+        #   attr_writer :logprobs
+
+        # @!parse
+        #   # Emitted when there is an additional text delta. This is also the first event
+        #   #   emitted when the transcription starts. Only emitted when you
+        #   #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   #   with the `Stream` parameter set to `true`.
+        #   #
+        #   # @param delta [String]
+        #   # @param logprobs [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>]
+        #   # @param type [Symbol, :"transcript.text.delta"]
+        #   #
+        #   def initialize(delta:, logprobs: nil, type: :"transcript.text.delta", **) = super
+
+        # def initialize: (Hash | OpenAI::BaseModel) -> void
+
+        class Logprob < OpenAI::BaseModel
+          # @!attribute [r] token
+          #   The token that was used to generate the log probability.
+          #
+          #   @return [String, nil]
+          optional :token, String
+
+          # @!parse
+          #   # @return [String]
+          #   attr_writer :token
+
+          # @!attribute [r] bytes
+          #   The bytes that were used to generate the log probability.
+          #
+          #   @return [Array<Object>, nil]
+          optional :bytes, OpenAI::ArrayOf[OpenAI::Unknown]
+
+          # @!parse
+          #   # @return [Array<Object>]
+          #   attr_writer :bytes
+
+          # @!attribute [r] logprob
+          #   The log probability of the token.
+          #
+          #   @return [Float, nil]
+          optional :logprob, Float
+
+          # @!parse
+          #   # @return [Float]
+          #   attr_writer :logprob
+
+          # @!parse
+          #   # @param token [String]
+          #   # @param bytes [Array<Object>]
+          #   # @param logprob [Float]
+          #   #
+          #   def initialize(token: nil, bytes: nil, logprob: nil, **) = super
+
+          # def initialize: (Hash | OpenAI::BaseModel) -> void
+        end
+      end
+    end
+  end
+end