From ef86f9510046799d3b0577ad1ea69755c55992cd Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 20 Mar 2025 16:09:01 +0000
Subject: [PATCH] feat(api): new models for TTS, STT, + new audio features for
 Realtime

---
 .stats.yml                                    |   2 +-
 lib/openai.rb                                 |   4 +
 .../models/audio/speech_create_params.rb      |  20 +++-
 lib/openai/models/audio/speech_model.rb       |   1 +
 lib/openai/models/audio/transcription.rb      |  56 +++++++++-
 .../audio/transcription_create_params.rb      |  31 +++++-
 .../models/audio/transcription_include.rb     |  14 +++
 .../audio/transcription_stream_event.rb       |  29 +++++
 .../audio/transcription_text_delta_event.rb   |  88 +++++++++++++++
 .../audio/transcription_text_done_event.rb    |  89 +++++++++++++++
 .../models/audio/translation_create_params.rb |  22 +++-
 lib/openai/models/audio_model.rb              |   2 +
 lib/openai/models/audio_response_format.rb    |   3 +-
 .../models/chat/chat_completion_chunk.rb      |  10 +-
 lib/openai/resources/audio/speech.rb          |   5 +-
 lib/openai/resources/audio/transcriptions.rb  |  81 +++++++++++++-
 lib/openai/resources/audio/translations.rb    |   2 +-
 .../models/audio/speech_create_params.rbi     |  18 ++-
 rbi/lib/openai/models/audio/speech_model.rbi  |   1 +
 rbi/lib/openai/models/audio/transcription.rbi |  60 +++++++++-
 .../audio/transcription_create_params.rbi     |  29 ++++-
 .../models/audio/transcription_include.rbi    |  15 +++
 .../audio/transcription_stream_event.rbi      |  25 +++++
 .../audio/transcription_text_delta_event.rbi  | 102 +++++++++++++++++
 .../audio/transcription_text_done_event.rbi   | 103 ++++++++++++++++++
 .../audio/translation_create_params.rbi       |  14 +++
 rbi/lib/openai/models/audio_model.rbi         |   2 +
 .../openai/models/audio_response_format.rbi   |   3 +-
 .../models/chat/chat_completion_chunk.rbi     |   6 +-
 rbi/lib/openai/resources/audio/speech.rbi     |   6 +-
 .../openai/resources/audio/transcriptions.rbi |  88 ++++++++++++++-
 .../models/audio/speech_create_params.rbs     |   6 +
 sig/openai/models/audio/speech_model.rbs      |   3 +-
 sig/openai/models/audio/transcription.rbs     |  41 ++++++-
 .../audio/transcription_create_params.rbs     |   8 ++
 .../models/audio/transcription_include.rbs    |  13 +++
 .../audio/transcription_stream_event.rbs      |  13 +++
 .../audio/transcription_text_delta_event.rbs  |  56 ++++++++++
 .../audio/transcription_text_done_event.rbs   |  56 ++++++++++
 .../audio/translation_create_params.rbs       |  22 +++-
 sig/openai/models/audio_model.rbs             |   5 +-
 .../models/chat/chat_completion_chunk.rbs     |  10 +-
 sig/openai/resources/audio/speech.rbs         |   1 +
 sig/openai/resources/audio/transcriptions.rbs |  13 +++
 sig/openai/resources/audio/translations.rbs   |   2 +-
 45 files changed, 1115 insertions(+), 65 deletions(-)
 create mode 100644 lib/openai/models/audio/transcription_include.rb
 create mode 100644 lib/openai/models/audio/transcription_stream_event.rb
 create mode 100644 lib/openai/models/audio/transcription_text_delta_event.rb
 create mode 100644 lib/openai/models/audio/transcription_text_done_event.rb
 create mode 100644 rbi/lib/openai/models/audio/transcription_include.rbi
 create mode 100644 rbi/lib/openai/models/audio/transcription_stream_event.rbi
 create mode 100644 rbi/lib/openai/models/audio/transcription_text_delta_event.rbi
 create mode 100644 rbi/lib/openai/models/audio/transcription_text_done_event.rbi
 create mode 100644 sig/openai/models/audio/transcription_include.rbs
 create mode 100644 sig/openai/models/audio/transcription_stream_event.rbs
 create mode 100644 sig/openai/models/audio/transcription_text_delta_event.rbs
 create mode 100644 sig/openai/models/audio/transcription_text_done_event.rbs

diff --git a/.stats.yml b/.stats.yml
index 16c6386b..199d46be 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 80
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c22f59c66aec7914b6ee653d3098d1c1c8c16c180d2a158e819c8ddbf476f74b.yml
diff --git a/lib/openai.rb b/lib/openai.rb
index 84c5d333..3a9e6cd2 100644
--- a/lib/openai.rb
+++ b/lib/openai.rb
@@ -43,7 +43,11 @@
 require_relative "openai/models/audio/transcription"
 require_relative "openai/models/audio/transcription_create_params"
 require_relative "openai/models/audio/transcription_create_response"
+require_relative "openai/models/audio/transcription_include"
 require_relative "openai/models/audio/transcription_segment"
+require_relative "openai/models/audio/transcription_stream_event"
+require_relative "openai/models/audio/transcription_text_delta_event"
+require_relative "openai/models/audio/transcription_text_done_event"
 require_relative "openai/models/audio/transcription_verbose"
 require_relative "openai/models/audio/transcription_word"
 require_relative "openai/models/audio/translation"
diff --git a/lib/openai/models/audio/speech_create_params.rb b/lib/openai/models/audio/speech_create_params.rb
index b7e77b57..2477a4ca 100644
--- a/lib/openai/models/audio/speech_create_params.rb
+++ b/lib/openai/models/audio/speech_create_params.rb
@@ -16,7 +16,7 @@ class SpeechCreateParams < OpenAI::BaseModel
 
         # @!attribute model
         #   One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #     `tts-1` or `tts-1-hd`
+        #     `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         #
         #   @return [String, Symbol, OpenAI::Models::Audio::SpeechModel]
         required :model, union: -> { OpenAI::Models::Audio::SpeechCreateParams::Model }
@@ -30,6 +30,17 @@ class SpeechCreateParams < OpenAI::BaseModel
         #   @return [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice]
         required :voice, enum: -> { OpenAI::Models::Audio::SpeechCreateParams::Voice }
 
+        # @!attribute [r] instructions
+        #   Control the voice of your generated audio with additional instructions. Does not
+        #     work with `tts-1` or `tts-1-hd`.
+        #
+        #   @return [String, nil]
+        optional :instructions, String
+
+        # @!parse
+        #   # @return [String]
+        #   attr_writer :instructions
+
         # @!attribute [r] response_format
         #   The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
         #     `wav`, and `pcm`.
@@ -56,22 +67,23 @@ class SpeechCreateParams < OpenAI::BaseModel
         #   # @param input [String]
         #   # @param model [String, Symbol, OpenAI::Models::Audio::SpeechModel]
         #   # @param voice [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice]
+        #   # @param instructions [String]
         #   # @param response_format [Symbol, OpenAI::Models::Audio::SpeechCreateParams::ResponseFormat]
         #   # @param speed [Float]
         #   # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
         #   #
-        #   def initialize(input:, model:, voice:, response_format: nil, speed: nil, request_options: {}, **) = super
+        #   def initialize(input:, model:, voice:, instructions: nil, response_format: nil, speed: nil, request_options: {}, **) = super
 
         # def initialize: (Hash | OpenAI::BaseModel) -> void
 
         # @abstract
         #
         # One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #   `tts-1` or `tts-1-hd`
+        #   `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         class Model < OpenAI::Union
           variant String
 
-          # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1` or `tts-1-hd`
+          # One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
           variant enum: -> { OpenAI::Models::Audio::SpeechModel }
 
           # @!parse
diff --git a/lib/openai/models/audio/speech_model.rb b/lib/openai/models/audio/speech_model.rb
index 96744e0c..26aae9d6 100644
--- a/lib/openai/models/audio/speech_model.rb
+++ b/lib/openai/models/audio/speech_model.rb
@@ -7,6 +7,7 @@ module Audio
       class SpeechModel < OpenAI::Enum
         TTS_1 = :"tts-1"
         TTS_1_HD = :"tts-1-hd"
+        GPT_4O_MINI_TTS = :"gpt-4o-mini-tts"
 
         finalize!
       end
diff --git a/lib/openai/models/audio/transcription.rb b/lib/openai/models/audio/transcription.rb
index 6bd2d97b..8185ea6c 100644
--- a/lib/openai/models/audio/transcription.rb
+++ b/lib/openai/models/audio/transcription.rb
@@ -10,15 +10,69 @@ class Transcription < OpenAI::BaseModel
         #   @return [String]
         required :text, String
 
+        # @!attribute [r] logprobs
+        #   The log probabilities of the tokens in the transcription. Only returned with the
+        #     models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
+        #     to the `include` array.
+        #
+        #   @return [Array<OpenAI::Models::Audio::Transcription::Logprob>, nil]
+        optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::Transcription::Logprob] }
+
+        # @!parse
+        #   # @return [Array<OpenAI::Models::Audio::Transcription::Logprob>]
+        #   attr_writer :logprobs
+
         # @!parse
         #   # Represents a transcription response returned by model, based on the provided
         #   #   input.
         #   #
         #   # @param text [String]
+        #   # @param logprobs [Array<OpenAI::Models::Audio::Transcription::Logprob>]
         #   #
-        #   def initialize(text:, **) = super
+        #   def initialize(text:, logprobs: nil, **) = super
 
         # def initialize: (Hash | OpenAI::BaseModel) -> void
+
+        class Logprob < OpenAI::BaseModel
+          # @!attribute [r] token
+          #   The token in the transcription.
+          #
+          #   @return [String, nil]
+          optional :token, String
+
+          # @!parse
+          #   # @return [String]
+          #   attr_writer :token
+
+          # @!attribute [r] bytes
+          #   The bytes of the token.
+          #
+          #   @return [Array<Float>, nil]
+          optional :bytes, OpenAI::ArrayOf[Float]
+
+          # @!parse
+          #   # @return [Array<Float>]
+          #   attr_writer :bytes
+
+          # @!attribute [r] logprob
+          #   The log probability of the token.
+          #
+          #   @return [Float, nil]
+          optional :logprob, Float
+
+          # @!parse
+          #   # @return [Float]
+          #   attr_writer :logprob
+
+          # @!parse
+          #   # @param token [String]
+          #   # @param bytes [Array<Float>]
+          #   # @param logprob [Float]
+          #   #
+          #   def initialize(token: nil, bytes: nil, logprob: nil, **) = super
+
+          # def initialize: (Hash | OpenAI::BaseModel) -> void
+        end
       end
     end
   end
diff --git a/lib/openai/models/audio/transcription_create_params.rb b/lib/openai/models/audio/transcription_create_params.rb
index d6d9f071..3ff8c770 100644
--- a/lib/openai/models/audio/transcription_create_params.rb
+++ b/lib/openai/models/audio/transcription_create_params.rb
@@ -16,12 +16,27 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         required :file, IO
 
         # @!attribute model
-        #   ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #     Whisper V2 model) is currently available.
+        #   ID of the model to use. The options are `gpt-4o-transcribe`,
+        #     `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #     Whisper V2 model).
         #
         #   @return [String, Symbol, OpenAI::Models::AudioModel]
         required :model, union: -> { OpenAI::Models::Audio::TranscriptionCreateParams::Model }
 
+        # @!attribute [r] include
+        #   Additional information to include in the transcription response. `logprobs` will
+        #     return the log probabilities of the tokens in the response to understand the
+        #     model's confidence in the transcription. `logprobs` only works with
+        #     response_format set to `json` and only with the models `gpt-4o-transcribe` and
+        #     `gpt-4o-mini-transcribe`.
+        #
+        #   @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>, nil]
+        optional :include, -> { OpenAI::ArrayOf[enum: OpenAI::Models::Audio::TranscriptionInclude] }
+
+        # @!parse
+        #   # @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>]
+        #   attr_writer :include
+
         # @!attribute [r] language
         #   The language of the input audio. Supplying the input language in
         #     [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
@@ -49,7 +64,8 @@ class TranscriptionCreateParams < OpenAI::BaseModel
 
         # @!attribute [r] response_format
         #   The format of the output, in one of these options: `json`, `text`, `srt`,
-        #     `verbose_json`, or `vtt`.
+        #     `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+        #     the only supported format is `json`.
         #
         #   @return [Symbol, OpenAI::Models::AudioResponseFormat, nil]
         optional :response_format, enum: -> { OpenAI::Models::AudioResponseFormat }
@@ -90,6 +106,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         # @!parse
         #   # @param file [IO, StringIO]
         #   # @param model [String, Symbol, OpenAI::Models::AudioModel]
+        #   # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>]
         #   # @param language [String]
         #   # @param prompt [String]
         #   # @param response_format [Symbol, OpenAI::Models::AudioResponseFormat]
@@ -100,6 +117,7 @@ class TranscriptionCreateParams < OpenAI::BaseModel
         #   def initialize(
         #     file:,
         #     model:,
+        #     include: nil,
         #     language: nil,
         #     prompt: nil,
         #     response_format: nil,
@@ -115,12 +133,13 @@ class TranscriptionCreateParams < OpenAI::BaseModel
 
         # @abstract
         #
-        # ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #   Whisper V2 model) is currently available.
+        # ID of the model to use. The options are `gpt-4o-transcribe`,
+        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #   Whisper V2 model).
         class Model < OpenAI::Union
           variant String
 
-          # ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is currently available.
+          # ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source Whisper V2 model).
           variant enum: -> { OpenAI::Models::AudioModel }
 
           # @!parse
diff --git a/lib/openai/models/audio/transcription_include.rb b/lib/openai/models/audio/transcription_include.rb
new file mode 100644
index 00000000..97303675
--- /dev/null
+++ b/lib/openai/models/audio/transcription_include.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      # @abstract
+      class TranscriptionInclude < OpenAI::Enum
+        LOGPROBS = :logprobs
+
+        finalize!
+      end
+    end
+  end
+end
diff --git a/lib/openai/models/audio/transcription_stream_event.rb b/lib/openai/models/audio/transcription_stream_event.rb
new file mode 100644
index 00000000..4bddaa1b
--- /dev/null
+++ b/lib/openai/models/audio/transcription_stream_event.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      # @abstract
+      #
+      # Emitted when there is an additional text delta. This is also the first event
+      #   emitted when the transcription starts. Only emitted when you
+      #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+      #   with the `Stream` parameter set to `true`.
+      class TranscriptionStreamEvent < OpenAI::Union
+        discriminator :type
+
+        # Emitted when there is an additional text delta. This is also the first event emitted when the transcription starts. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+        variant :"transcript.text.delta", -> { OpenAI::Models::Audio::TranscriptionTextDeltaEvent }
+
+        # Emitted when the transcription is complete. Contains the complete transcription text. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+        variant :"transcript.text.done", -> { OpenAI::Models::Audio::TranscriptionTextDoneEvent }
+
+        # @!parse
+        #   class << self
+        #     # @return [Array(OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent)]
+        #     def variants; end
+        #   end
+      end
+    end
+  end
+end
diff --git a/lib/openai/models/audio/transcription_text_delta_event.rb b/lib/openai/models/audio/transcription_text_delta_event.rb
new file mode 100644
index 00000000..ec8ca4f4
--- /dev/null
+++ b/lib/openai/models/audio/transcription_text_delta_event.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionTextDeltaEvent < OpenAI::BaseModel
+        # @!attribute delta
+        #   The text delta that was additionally transcribed.
+        #
+        #   @return [String]
+        required :delta, String
+
+        # @!attribute type
+        #   The type of the event. Always `transcript.text.delta`.
+        #
+        #   @return [Symbol, :"transcript.text.delta"]
+        required :type, const: :"transcript.text.delta"
+
+        # @!attribute [r] logprobs
+        #   The log probabilities of the delta. Only included if you
+        #     [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #     with the `include[]` parameter set to `logprobs`.
+        #
+        #   @return [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>, nil]
+        optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob] }
+
+        # @!parse
+        #   # @return [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>]
+        #   attr_writer :logprobs
+
+        # @!parse
+        #   # Emitted when there is an additional text delta. This is also the first event
+        #   #   emitted when the transcription starts. Only emitted when you
+        #   #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   #   with the `Stream` parameter set to `true`.
+        #   #
+        #   # @param delta [String]
+        #   # @param logprobs [Array<OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob>]
+        #   # @param type [Symbol, :"transcript.text.delta"]
+        #   #
+        #   def initialize(delta:, logprobs: nil, type: :"transcript.text.delta", **) = super
+
+        # def initialize: (Hash | OpenAI::BaseModel) -> void
+
+        class Logprob < OpenAI::BaseModel
+          # @!attribute [r] token
+          #   The token that was used to generate the log probability.
+          #
+          #   @return [String, nil]
+          optional :token, String
+
+          # @!parse
+          #   # @return [String]
+          #   attr_writer :token
+
+          # @!attribute [r] bytes
+          #   The bytes that were used to generate the log probability.
+          #
+          #   @return [Array<Object>, nil]
+          optional :bytes, OpenAI::ArrayOf[OpenAI::Unknown]
+
+          # @!parse
+          #   # @return [Array<Object>]
+          #   attr_writer :bytes
+
+          # @!attribute [r] logprob
+          #   The log probability of the token.
+          #
+          #   @return [Float, nil]
+          optional :logprob, Float
+
+          # @!parse
+          #   # @return [Float]
+          #   attr_writer :logprob
+
+          # @!parse
+          #   # @param token [String]
+          #   # @param bytes [Array<Object>]
+          #   # @param logprob [Float]
+          #   #
+          #   def initialize(token: nil, bytes: nil, logprob: nil, **) = super
+
+          # def initialize: (Hash | OpenAI::BaseModel) -> void
+        end
+      end
+    end
+  end
+end
diff --git a/lib/openai/models/audio/transcription_text_done_event.rb b/lib/openai/models/audio/transcription_text_done_event.rb
new file mode 100644
index 00000000..b2a78b25
--- /dev/null
+++ b/lib/openai/models/audio/transcription_text_done_event.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionTextDoneEvent < OpenAI::BaseModel
+        # @!attribute text
+        #   The text that was transcribed.
+        #
+        #   @return [String]
+        required :text, String
+
+        # @!attribute type
+        #   The type of the event. Always `transcript.text.done`.
+        #
+        #   @return [Symbol, :"transcript.text.done"]
+        required :type, const: :"transcript.text.done"
+
+        # @!attribute [r] logprobs
+        #   The log probabilities of the individual tokens in the transcription. Only
+        #     included if you
+        #     [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #     with the `include[]` parameter set to `logprobs`.
+        #
+        #   @return [Array<OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob>, nil]
+        optional :logprobs, -> { OpenAI::ArrayOf[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob] }
+
+        # @!parse
+        #   # @return [Array<OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob>]
+        #   attr_writer :logprobs
+
+        # @!parse
+        #   # Emitted when the transcription is complete. Contains the complete transcription
+        #   #   text. Only emitted when you
+        #   #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   #   with the `Stream` parameter set to `true`.
+        #   #
+        #   # @param text [String]
+        #   # @param logprobs [Array<OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob>]
+        #   # @param type [Symbol, :"transcript.text.done"]
+        #   #
+        #   def initialize(text:, logprobs: nil, type: :"transcript.text.done", **) = super
+
+        # def initialize: (Hash | OpenAI::BaseModel) -> void
+
+        class Logprob < OpenAI::BaseModel
+          # @!attribute [r] token
+          #   The token that was used to generate the log probability.
+          #
+          #   @return [String, nil]
+          optional :token, String
+
+          # @!parse
+          #   # @return [String]
+          #   attr_writer :token
+
+          # @!attribute [r] bytes
+          #   The bytes that were used to generate the log probability.
+          #
+          #   @return [Array<Object>, nil]
+          optional :bytes, OpenAI::ArrayOf[OpenAI::Unknown]
+
+          # @!parse
+          #   # @return [Array<Object>]
+          #   attr_writer :bytes
+
+          # @!attribute [r] logprob
+          #   The log probability of the token.
+          #
+          #   @return [Float, nil]
+          optional :logprob, Float
+
+          # @!parse
+          #   # @return [Float]
+          #   attr_writer :logprob
+
+          # @!parse
+          #   # @param token [String]
+          #   # @param bytes [Array<Object>]
+          #   # @param logprob [Float]
+          #   #
+          #   def initialize(token: nil, bytes: nil, logprob: nil, **) = super
+
+          # def initialize: (Hash | OpenAI::BaseModel) -> void
+        end
+      end
+    end
+  end
+end
diff --git a/lib/openai/models/audio/translation_create_params.rb b/lib/openai/models/audio/translation_create_params.rb
index 4fd4a4dc..0b31b58c 100644
--- a/lib/openai/models/audio/translation_create_params.rb
+++ b/lib/openai/models/audio/translation_create_params.rb
@@ -39,11 +39,11 @@ class TranslationCreateParams < OpenAI::BaseModel
         #   The format of the output, in one of these options: `json`, `text`, `srt`,
         #     `verbose_json`, or `vtt`.
         #
-        #   @return [Symbol, OpenAI::Models::AudioResponseFormat, nil]
-        optional :response_format, enum: -> { OpenAI::Models::AudioResponseFormat }
+        #   @return [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat, nil]
+        optional :response_format, enum: -> { OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat }
 
         # @!parse
-        #   # @return [Symbol, OpenAI::Models::AudioResponseFormat]
+        #   # @return [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat]
         #   attr_writer :response_format
 
         # @!attribute [r] temperature
@@ -64,7 +64,7 @@ class TranslationCreateParams < OpenAI::BaseModel
         #   # @param file [IO, StringIO]
         #   # @param model [String, Symbol, OpenAI::Models::AudioModel]
         #   # @param prompt [String]
-        #   # @param response_format [Symbol, OpenAI::Models::AudioResponseFormat]
+        #   # @param response_format [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat]
         #   # @param temperature [Float]
         #   # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
         #   #
@@ -88,6 +88,20 @@ class Model < OpenAI::Union
           #     def variants; end
           #   end
         end
+
+        # @abstract
+        #
+        # The format of the output, in one of these options: `json`, `text`, `srt`,
+        #   `verbose_json`, or `vtt`.
+        class ResponseFormat < OpenAI::Enum
+          JSON = :json
+          TEXT = :text
+          SRT = :srt
+          VERBOSE_JSON = :verbose_json
+          VTT = :vtt
+
+          finalize!
+        end
       end
     end
   end
diff --git a/lib/openai/models/audio_model.rb b/lib/openai/models/audio_model.rb
index 81db712e..88507173 100644
--- a/lib/openai/models/audio_model.rb
+++ b/lib/openai/models/audio_model.rb
@@ -5,6 +5,8 @@ module Models
     # @abstract
     class AudioModel < OpenAI::Enum
       WHISPER_1 = :"whisper-1"
+      GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
+      GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
 
       finalize!
     end
diff --git a/lib/openai/models/audio_response_format.rb b/lib/openai/models/audio_response_format.rb
index 8b92a3b9..9593d816 100644
--- a/lib/openai/models/audio_response_format.rb
+++ b/lib/openai/models/audio_response_format.rb
@@ -5,7 +5,8 @@ module Models
     # @abstract
     #
     # The format of the output, in one of these options: `json`, `text`, `srt`,
-    #   `verbose_json`, or `vtt`.
+    #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+    #   the only supported format is `json`.
     class AudioResponseFormat < OpenAI::Enum
       JSON = :json
       TEXT = :text
diff --git a/lib/openai/models/chat/chat_completion_chunk.rb b/lib/openai/models/chat/chat_completion_chunk.rb
index 5c0d47df..5f0a0fef 100644
--- a/lib/openai/models/chat/chat_completion_chunk.rb
+++ b/lib/openai/models/chat/chat_completion_chunk.rb
@@ -55,7 +55,7 @@ class ChatCompletionChunk < OpenAI::BaseModel
         #   # @return [String]
         #   attr_writer :system_fingerprint
 
-        # @!attribute [r] usage
+        # @!attribute usage
         #   An optional field that will only be present when you set
         #     `stream_options: {"include_usage": true}` in your request. When present, it
         #     contains a null value **except for the last chunk** which contains the token
@@ -65,11 +65,7 @@ class ChatCompletionChunk < OpenAI::BaseModel
         #     final usage chunk which contains the total token usage for the request.
         #
         #   @return [OpenAI::Models::CompletionUsage, nil]
-        optional :usage, -> { OpenAI::Models::CompletionUsage }
-
-        # @!parse
-        #   # @return [OpenAI::Models::CompletionUsage]
-        #   attr_writer :usage
+        optional :usage, -> { OpenAI::Models::CompletionUsage }, nil?: true
 
         # @!parse
         #   # Represents a streamed chunk of a chat completion response returned by the model,
@@ -82,7 +78,7 @@ class ChatCompletionChunk < OpenAI::BaseModel
         #   # @param model [String]
         #   # @param service_tier [Symbol, OpenAI::Models::Chat::ChatCompletionChunk::ServiceTier, nil]
         #   # @param system_fingerprint [String]
-        #   # @param usage [OpenAI::Models::CompletionUsage]
+        #   # @param usage [OpenAI::Models::CompletionUsage, nil]
         #   # @param object [Symbol, :"chat.completion.chunk"]
         #   #
         #   def initialize(
diff --git a/lib/openai/resources/audio/speech.rb b/lib/openai/resources/audio/speech.rb
index 9c5d8284..b5b584fe 100644
--- a/lib/openai/resources/audio/speech.rb
+++ b/lib/openai/resources/audio/speech.rb
@@ -11,13 +11,16 @@ class Speech
         #   @option params [String] :input The text to generate audio for. The maximum length is 4096 characters.
         #
         #   @option params [String, Symbol, OpenAI::Models::Audio::SpeechModel] :model One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #     `tts-1` or `tts-1-hd`
+        #     `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         #
         #   @option params [Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] :voice The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
         #     `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
         #     voices are available in the
         #     [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
         #
+        #   @option params [String] :instructions Control the voice of your generated audio with additional instructions. Does not
+        #     work with `tts-1` or `tts-1-hd`.
+        #
         #   @option params [Symbol, OpenAI::Models::Audio::SpeechCreateParams::ResponseFormat] :response_format The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
         #     `wav`, and `pcm`.
         #
diff --git a/lib/openai/resources/audio/transcriptions.rb b/lib/openai/resources/audio/transcriptions.rb
index 9e291700..3c7238bf 100644
--- a/lib/openai/resources/audio/transcriptions.rb
+++ b/lib/openai/resources/audio/transcriptions.rb
@@ -11,8 +11,15 @@ class Transcriptions
         #   @option params [IO, StringIO] :file The audio file object (not file name) to transcribe, in one of these formats:
         #     flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
         #
-        #   @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #     Whisper V2 model) is currently available.
+        #   @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. The options are `gpt-4o-transcribe`,
+        #     `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #     Whisper V2 model).
+        #
+        #   @option params [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] :include Additional information to include in the transcription response. `logprobs` will
+        #     return the log probabilities of the tokens in the response to understand the
+        #     model's confidence in the transcription. `logprobs` only works with
+        #     response_format set to `json` and only with the models `gpt-4o-transcribe` and
+        #     `gpt-4o-mini-transcribe`.
         #
         #   @option params [String] :language The language of the input audio. Supplying the input language in
         #     [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
@@ -24,7 +31,8 @@ class Transcriptions
         #     should match the audio language.
         #
         #   @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`,
-        #     `verbose_json`, or `vtt`.
+        #     `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+        #     the only supported format is `json`.
         #
         #   @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
         #     output more random, while lower values like 0.2 will make it more focused and
@@ -43,6 +51,10 @@ class Transcriptions
         # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose]
         def create(params)
           parsed, options = OpenAI::Models::Audio::TranscriptionCreateParams.dump_request(params)
+          if parsed[:stream]
+            message = "Please use `#create_streaming` for the streaming use case."
+            raise ArgumentError.new(message)
+          end
           @client.request(
             method: :post,
             path: "audio/transcriptions",
@@ -53,6 +65,69 @@ def create(params)
           )
         end
 
+        # Transcribes audio into the input language.
+        #
+        # @param params [OpenAI::Models::Audio::TranscriptionCreateParams, Hash{Symbol=>Object}] .
+        #
+        #   @option params [IO, StringIO] :file The audio file object (not file name) to transcribe, in one of these formats:
+        #     flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+        #
+        #   @option params [String, Symbol, OpenAI::Models::AudioModel] :model ID of the model to use. The options are `gpt-4o-transcribe`,
+        #     `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #     Whisper V2 model).
+        #
+        #   @option params [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] :include Additional information to include in the transcription response. `logprobs` will
+        #     return the log probabilities of the tokens in the response to understand the
+        #     model's confidence in the transcription. `logprobs` only works with
+        #     response_format set to `json` and only with the models `gpt-4o-transcribe` and
+        #     `gpt-4o-mini-transcribe`.
+        #
+        #   @option params [String] :language The language of the input audio. Supplying the input language in
+        #     [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+        #     format will improve accuracy and latency.
+        #
+        #   @option params [String] :prompt An optional text to guide the model's style or continue a previous audio
+        #     segment. The
+        #     [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+        #     should match the audio language.
+        #
+        #   @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`,
+        #     `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+        #     the only supported format is `json`.
+        #
+        #   @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+        #     output more random, while lower values like 0.2 will make it more focused and
+        #     deterministic. If set to 0, the model will use
+        #     [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+        #     automatically increase the temperature until certain thresholds are hit.
+        #
+        #   @option params [Array<Symbol, OpenAI::Models::Audio::TranscriptionCreateParams::TimestampGranularity>] :timestamp_granularities The timestamp granularities to populate for this transcription.
+        #     `response_format` must be set `verbose_json` to use timestamp granularities.
+        #     Either or both of these options are supported: `word`, or `segment`. Note: There
+        #     is no additional latency for segment timestamps, but generating word timestamps
+        #     incurs additional latency.
+        #
+        #   @option params [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil] :request_options
+        #
+        # @return [OpenAI::Stream<OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
+        def create_streaming(params)
+          parsed, options = OpenAI::Models::Audio::TranscriptionCreateParams.dump_request(params)
+          unless parsed.fetch(:stream, true)
+            message = "Please use `#create` for the non-streaming use case."
+            raise ArgumentError.new(message)
+          end
+          parsed.store(:stream, true)
+          @client.request(
+            method: :post,
+            path: "audio/transcriptions",
+            headers: {"content-type" => "multipart/form-data", "accept" => "text/event-stream"},
+            body: parsed,
+            stream: OpenAI::Stream,
+            model: OpenAI::Models::Audio::TranscriptionStreamEvent,
+            options: options
+          )
+        end
+
         # @param client [OpenAI::Client]
         def initialize(client:)
           @client = client
diff --git a/lib/openai/resources/audio/translations.rb b/lib/openai/resources/audio/translations.rb
index c1de4f8e..ea8e0e4a 100644
--- a/lib/openai/resources/audio/translations.rb
+++ b/lib/openai/resources/audio/translations.rb
@@ -19,7 +19,7 @@ class Translations
         #     [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
         #     should be in English.
         #
-        #   @option params [Symbol, OpenAI::Models::AudioResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`,
+        #   @option params [Symbol, OpenAI::Models::Audio::TranslationCreateParams::ResponseFormat] :response_format The format of the output, in one of these options: `json`, `text`, `srt`,
         #     `verbose_json`, or `vtt`.
         #
         #   @option params [Float] :temperature The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
diff --git a/rbi/lib/openai/models/audio/speech_create_params.rbi b/rbi/lib/openai/models/audio/speech_create_params.rbi
index 043a7179..e74cec3d 100644
--- a/rbi/lib/openai/models/audio/speech_create_params.rbi
+++ b/rbi/lib/openai/models/audio/speech_create_params.rbi
@@ -17,7 +17,7 @@ module OpenAI
         end
 
         # One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #   `tts-1` or `tts-1-hd`
+        #   `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         sig { returns(T.any(String, Symbol)) }
         def model
         end
@@ -38,6 +38,16 @@ module OpenAI
         def voice=(_)
         end
 
+        # Control the voice of your generated audio with additional instructions. Does not
+        #   work with `tts-1` or `tts-1-hd`.
+        sig { returns(T.nilable(String)) }
+        def instructions
+        end
+
+        sig { params(_: String).returns(String) }
+        def instructions=(_)
+        end
+
         # The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
         #   `wav`, and `pcm`.
         sig { returns(T.nilable(Symbol)) }
@@ -63,13 +73,14 @@ module OpenAI
             input: String,
             model: T.any(String, Symbol),
             voice: Symbol,
+            instructions: String,
             response_format: Symbol,
             speed: Float,
             request_options: T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything])
           )
             .returns(T.attached_class)
         end
-        def self.new(input:, model:, voice:, response_format: nil, speed: nil, request_options: {})
+        def self.new(input:, model:, voice:, instructions: nil, response_format: nil, speed: nil, request_options: {})
         end
 
         sig do
@@ -79,6 +90,7 @@ module OpenAI
                 input: String,
                 model: T.any(String, Symbol),
                 voice: Symbol,
+                instructions: String,
                 response_format: Symbol,
                 speed: Float,
                 request_options: OpenAI::RequestOptions
@@ -89,7 +101,7 @@ module OpenAI
         end
 
         # One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-        #   `tts-1` or `tts-1-hd`
+        #   `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
         class Model < OpenAI::Union
           abstract!
 
diff --git a/rbi/lib/openai/models/audio/speech_model.rbi b/rbi/lib/openai/models/audio/speech_model.rbi
index f465baf8..5228e000 100644
--- a/rbi/lib/openai/models/audio/speech_model.rbi
+++ b/rbi/lib/openai/models/audio/speech_model.rbi
@@ -10,6 +10,7 @@ module OpenAI
 
         TTS_1 = :"tts-1"
         TTS_1_HD = :"tts-1-hd"
+        GPT_4O_MINI_TTS = :"gpt-4o-mini-tts"
       end
     end
   end
diff --git a/rbi/lib/openai/models/audio/transcription.rbi b/rbi/lib/openai/models/audio/transcription.rbi
index bc8940ae..ac4346e0 100644
--- a/rbi/lib/openai/models/audio/transcription.rbi
+++ b/rbi/lib/openai/models/audio/transcription.rbi
@@ -13,15 +13,69 @@ module OpenAI
         def text=(_)
         end
 
+        # The log probabilities of the tokens in the transcription. Only returned with the
+        #   models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
+        #   to the `include` array.
+        sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::Transcription::Logprob])) }
+        def logprobs
+        end
+
+        sig do
+          params(_: T::Array[OpenAI::Models::Audio::Transcription::Logprob])
+            .returns(T::Array[OpenAI::Models::Audio::Transcription::Logprob])
+        end
+        def logprobs=(_)
+        end
+
         # Represents a transcription response returned by model, based on the provided
         #   input.
-        sig { params(text: String).returns(T.attached_class) }
-        def self.new(text:)
+        sig do
+          params(text: String, logprobs: T::Array[OpenAI::Models::Audio::Transcription::Logprob])
+            .returns(T.attached_class)
+        end
+        def self.new(text:, logprobs: nil)
         end
 
-        sig { override.returns({text: String}) }
+        sig { override.returns({text: String, logprobs: T::Array[OpenAI::Models::Audio::Transcription::Logprob]}) }
         def to_hash
         end
+
+        class Logprob < OpenAI::BaseModel
+          # The token in the transcription.
+          sig { returns(T.nilable(String)) }
+          def token
+          end
+
+          sig { params(_: String).returns(String) }
+          def token=(_)
+          end
+
+          # The bytes of the token.
+          sig { returns(T.nilable(T::Array[Float])) }
+          def bytes
+          end
+
+          sig { params(_: T::Array[Float]).returns(T::Array[Float]) }
+          def bytes=(_)
+          end
+
+          # The log probability of the token.
+          sig { returns(T.nilable(Float)) }
+          def logprob
+          end
+
+          sig { params(_: Float).returns(Float) }
+          def logprob=(_)
+          end
+
+          sig { params(token: String, bytes: T::Array[Float], logprob: Float).returns(T.attached_class) }
+          def self.new(token: nil, bytes: nil, logprob: nil)
+          end
+
+          sig { override.returns({token: String, bytes: T::Array[Float], logprob: Float}) }
+          def to_hash
+          end
+        end
       end
     end
   end
diff --git a/rbi/lib/openai/models/audio/transcription_create_params.rbi b/rbi/lib/openai/models/audio/transcription_create_params.rbi
index 29ecd8fb..026f8b77 100644
--- a/rbi/lib/openai/models/audio/transcription_create_params.rbi
+++ b/rbi/lib/openai/models/audio/transcription_create_params.rbi
@@ -17,8 +17,9 @@ module OpenAI
         def file=(_)
         end
 
-        # ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #   Whisper V2 model) is currently available.
+        # ID of the model to use. The options are `gpt-4o-transcribe`,
+        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #   Whisper V2 model).
         sig { returns(T.any(String, Symbol)) }
         def model
         end
@@ -27,6 +28,19 @@ module OpenAI
         def model=(_)
         end
 
+        # Additional information to include in the transcription response. `logprobs` will
+        #   return the log probabilities of the tokens in the response to understand the
+        #   model's confidence in the transcription. `logprobs` only works with
+        #   response_format set to `json` and only with the models `gpt-4o-transcribe` and
+        #   `gpt-4o-mini-transcribe`.
+        sig { returns(T.nilable(T::Array[Symbol])) }
+        def include
+        end
+
+        sig { params(_: T::Array[Symbol]).returns(T::Array[Symbol]) }
+        def include=(_)
+        end
+
         # The language of the input audio. Supplying the input language in
         #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
         #   format will improve accuracy and latency.
@@ -51,7 +65,8 @@ module OpenAI
         end
 
         # The format of the output, in one of these options: `json`, `text`, `srt`,
-        #   `verbose_json`, or `vtt`.
+        #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+        #   the only supported format is `json`.
         sig { returns(T.nilable(Symbol)) }
         def response_format
         end
@@ -90,6 +105,7 @@ module OpenAI
           params(
             file: T.any(IO, StringIO),
             model: T.any(String, Symbol),
+            include: T::Array[Symbol],
             language: String,
             prompt: String,
             response_format: Symbol,
@@ -102,6 +118,7 @@ module OpenAI
         def self.new(
           file:,
           model:,
+          include: nil,
           language: nil,
           prompt: nil,
           response_format: nil,
@@ -117,6 +134,7 @@ module OpenAI
               {
                 file: T.any(IO, StringIO),
                 model: T.any(String, Symbol),
+                include: T::Array[Symbol],
                 language: String,
                 prompt: String,
                 response_format: Symbol,
@@ -129,8 +147,9 @@ module OpenAI
         def to_hash
         end
 
-        # ID of the model to use. Only `whisper-1` (which is powered by our open source
-        #   Whisper V2 model) is currently available.
+        # ID of the model to use. The options are `gpt-4o-transcribe`,
+        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+        #   Whisper V2 model).
         class Model < OpenAI::Union
           abstract!
 
diff --git a/rbi/lib/openai/models/audio/transcription_include.rbi b/rbi/lib/openai/models/audio/transcription_include.rbi
new file mode 100644
index 00000000..7a60b02a
--- /dev/null
+++ b/rbi/lib/openai/models/audio/transcription_include.rbi
@@ -0,0 +1,15 @@
+# typed: strong
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionInclude < OpenAI::Enum
+        abstract!
+
+        Value = type_template(:out) { {fixed: Symbol} }
+
+        LOGPROBS = :logprobs
+      end
+    end
+  end
+end
diff --git a/rbi/lib/openai/models/audio/transcription_stream_event.rbi b/rbi/lib/openai/models/audio/transcription_stream_event.rbi
new file mode 100644
index 00000000..4c8cc6bc
--- /dev/null
+++ b/rbi/lib/openai/models/audio/transcription_stream_event.rbi
@@ -0,0 +1,25 @@
+# typed: strong
+
+module OpenAI
+  module Models
+    module Audio
+      # Emitted when there is an additional text delta. This is also the first event
+      #   emitted when the transcription starts. Only emitted when you
+      #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+      #   with the `Stream` parameter set to `true`.
+      class TranscriptionStreamEvent < OpenAI::Union
+        abstract!
+
+        Variants =
+          type_template(:out) do
+            {
+              fixed: T.any(
+                OpenAI::Models::Audio::TranscriptionTextDeltaEvent,
+                OpenAI::Models::Audio::TranscriptionTextDoneEvent
+              )
+            }
+          end
+      end
+    end
+  end
+end
diff --git a/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi b/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi
new file mode 100644
index 00000000..6c73838b
--- /dev/null
+++ b/rbi/lib/openai/models/audio/transcription_text_delta_event.rbi
@@ -0,0 +1,102 @@
+# typed: strong
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionTextDeltaEvent < OpenAI::BaseModel
+        # The text delta that was additionally transcribed.
+        sig { returns(String) }
+        def delta
+        end
+
+        sig { params(_: String).returns(String) }
+        def delta=(_)
+        end
+
+        # The type of the event. Always `transcript.text.delta`.
+        sig { returns(Symbol) }
+        def type
+        end
+
+        sig { params(_: Symbol).returns(Symbol) }
+        def type=(_)
+        end
+
+        # The log probabilities of the delta. Only included if you
+        #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   with the `include[]` parameter set to `logprobs`.
+        sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob])) }
+        def logprobs
+        end
+
+        sig do
+          params(_: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob])
+            .returns(T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob])
+        end
+        def logprobs=(_)
+        end
+
+        # Emitted when there is an additional text delta. This is also the first event
+        #   emitted when the transcription starts. Only emitted when you
+        #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   with the `Stream` parameter set to `true`.
+        sig do
+          params(
+            delta: String,
+            logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob],
+            type: Symbol
+          )
+            .returns(T.attached_class)
+        end
+        def self.new(delta:, logprobs: nil, type: :"transcript.text.delta")
+        end
+
+        sig do
+          override
+            .returns(
+              {delta: String, type: Symbol, logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]}
+            )
+        end
+        def to_hash
+        end
+
+        class Logprob < OpenAI::BaseModel
+          # The token that was used to generate the log probability.
+          sig { returns(T.nilable(String)) }
+          def token
+          end
+
+          sig { params(_: String).returns(String) }
+          def token=(_)
+          end
+
+          # The bytes that were used to generate the log probability.
+          sig { returns(T.nilable(T::Array[T.anything])) }
+          def bytes
+          end
+
+          sig { params(_: T::Array[T.anything]).returns(T::Array[T.anything]) }
+          def bytes=(_)
+          end
+
+          # The log probability of the token.
+          sig { returns(T.nilable(Float)) }
+          def logprob
+          end
+
+          sig { params(_: Float).returns(Float) }
+          def logprob=(_)
+          end
+
+          sig { params(token: String, bytes: T::Array[T.anything], logprob: Float).returns(T.attached_class) }
+          def self.new(token: nil, bytes: nil, logprob: nil)
+          end
+
+          sig { override.returns({token: String, bytes: T::Array[T.anything], logprob: Float}) }
+          def to_hash
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/rbi/lib/openai/models/audio/transcription_text_done_event.rbi b/rbi/lib/openai/models/audio/transcription_text_done_event.rbi
new file mode 100644
index 00000000..fb616718
--- /dev/null
+++ b/rbi/lib/openai/models/audio/transcription_text_done_event.rbi
@@ -0,0 +1,103 @@
+# typed: strong
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionTextDoneEvent < OpenAI::BaseModel
+        # The text that was transcribed.
+        sig { returns(String) }
+        def text
+        end
+
+        sig { params(_: String).returns(String) }
+        def text=(_)
+        end
+
+        # The type of the event. Always `transcript.text.done`.
+        sig { returns(Symbol) }
+        def type
+        end
+
+        sig { params(_: Symbol).returns(Symbol) }
+        def type=(_)
+        end
+
+        # The log probabilities of the individual tokens in the transcription. Only
+        #   included if you
+        #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   with the `include[]` parameter set to `logprobs`.
+        sig { returns(T.nilable(T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob])) }
+        def logprobs
+        end
+
+        sig do
+          params(_: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob])
+            .returns(T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob])
+        end
+        def logprobs=(_)
+        end
+
+        # Emitted when the transcription is complete. Contains the complete transcription
+        #   text. Only emitted when you
+        #   [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+        #   with the `Stream` parameter set to `true`.
+        sig do
+          params(
+            text: String,
+            logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob],
+            type: Symbol
+          )
+            .returns(T.attached_class)
+        end
+        def self.new(text:, logprobs: nil, type: :"transcript.text.done")
+        end
+
+        sig do
+          override
+            .returns(
+              {text: String, type: Symbol, logprobs: T::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]}
+            )
+        end
+        def to_hash
+        end
+
+        class Logprob < OpenAI::BaseModel
+          # The token that was used to generate the log probability.
+          sig { returns(T.nilable(String)) }
+          def token
+          end
+
+          sig { params(_: String).returns(String) }
+          def token=(_)
+          end
+
+          # The bytes that were used to generate the log probability.
+          sig { returns(T.nilable(T::Array[T.anything])) }
+          def bytes
+          end
+
+          sig { params(_: T::Array[T.anything]).returns(T::Array[T.anything]) }
+          def bytes=(_)
+          end
+
+          # The log probability of the token.
+          sig { returns(T.nilable(Float)) }
+          def logprob
+          end
+
+          sig { params(_: Float).returns(Float) }
+          def logprob=(_)
+          end
+
+          sig { params(token: String, bytes: T::Array[T.anything], logprob: Float).returns(T.attached_class) }
+          def self.new(token: nil, bytes: nil, logprob: nil)
+          end
+
+          sig { override.returns({token: String, bytes: T::Array[T.anything], logprob: Float}) }
+          def to_hash
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/rbi/lib/openai/models/audio/translation_create_params.rbi b/rbi/lib/openai/models/audio/translation_create_params.rbi
index fb5d4a71..ce2e6e77 100644
--- a/rbi/lib/openai/models/audio/translation_create_params.rbi
+++ b/rbi/lib/openai/models/audio/translation_create_params.rbi
@@ -99,6 +99,20 @@ module OpenAI
 
           Variants = type_template(:out) { {fixed: T.any(String, Symbol)} }
         end
+
+        # The format of the output, in one of these options: `json`, `text`, `srt`,
+        #   `verbose_json`, or `vtt`.
+        class ResponseFormat < OpenAI::Enum
+          abstract!
+
+          Value = type_template(:out) { {fixed: Symbol} }
+
+          JSON = :json
+          TEXT = :text
+          SRT = :srt
+          VERBOSE_JSON = :verbose_json
+          VTT = :vtt
+        end
       end
     end
   end
diff --git a/rbi/lib/openai/models/audio_model.rbi b/rbi/lib/openai/models/audio_model.rbi
index 85348552..917ce7d8 100644
--- a/rbi/lib/openai/models/audio_model.rbi
+++ b/rbi/lib/openai/models/audio_model.rbi
@@ -8,6 +8,8 @@ module OpenAI
       Value = type_template(:out) { {fixed: Symbol} }
 
       WHISPER_1 = :"whisper-1"
+      GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
+      GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
     end
   end
 end
diff --git a/rbi/lib/openai/models/audio_response_format.rbi b/rbi/lib/openai/models/audio_response_format.rbi
index fb54aad0..405da3e2 100644
--- a/rbi/lib/openai/models/audio_response_format.rbi
+++ b/rbi/lib/openai/models/audio_response_format.rbi
@@ -3,7 +3,8 @@
 module OpenAI
   module Models
     # The format of the output, in one of these options: `json`, `text`, `srt`,
-    #   `verbose_json`, or `vtt`.
+    #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+    #   the only supported format is `json`.
     class AudioResponseFormat < OpenAI::Enum
       abstract!
 
diff --git a/rbi/lib/openai/models/chat/chat_completion_chunk.rbi b/rbi/lib/openai/models/chat/chat_completion_chunk.rbi
index c2451af5..647695a9 100644
--- a/rbi/lib/openai/models/chat/chat_completion_chunk.rbi
+++ b/rbi/lib/openai/models/chat/chat_completion_chunk.rbi
@@ -86,7 +86,7 @@ module OpenAI
         def usage
         end
 
-        sig { params(_: OpenAI::Models::CompletionUsage).returns(OpenAI::Models::CompletionUsage) }
+        sig { params(_: T.nilable(OpenAI::Models::CompletionUsage)).returns(T.nilable(OpenAI::Models::CompletionUsage)) }
         def usage=(_)
         end
 
@@ -101,7 +101,7 @@ module OpenAI
             model: String,
             service_tier: T.nilable(Symbol),
             system_fingerprint: String,
-            usage: OpenAI::Models::CompletionUsage,
+            usage: T.nilable(OpenAI::Models::CompletionUsage),
             object: Symbol
           )
             .returns(T.attached_class)
@@ -129,7 +129,7 @@ module OpenAI
                 object: Symbol,
                 service_tier: T.nilable(Symbol),
                 system_fingerprint: String,
-                usage: OpenAI::Models::CompletionUsage
+                usage: T.nilable(OpenAI::Models::CompletionUsage)
               }
             )
         end
diff --git a/rbi/lib/openai/resources/audio/speech.rbi b/rbi/lib/openai/resources/audio/speech.rbi
index ae6f4be5..e2e85216 100644
--- a/rbi/lib/openai/resources/audio/speech.rbi
+++ b/rbi/lib/openai/resources/audio/speech.rbi
@@ -10,6 +10,7 @@ module OpenAI
             input: String,
             model: T.any(String, Symbol),
             voice: Symbol,
+            instructions: String,
             response_format: Symbol,
             speed: Float,
             request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything]))
@@ -20,13 +21,16 @@ module OpenAI
           # The text to generate audio for. The maximum length is 4096 characters.
           input:,
           # One of the available [TTS models](https://platform.openai.com/docs/models#tts):
-          #   `tts-1` or `tts-1-hd`
+          #   `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
           model:,
           # The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
           #   `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
           #   voices are available in the
           #   [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
           voice:,
+          # Control the voice of your generated audio with additional instructions. Does not
+          #   work with `tts-1` or `tts-1-hd`.
+          instructions: nil,
           # The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
           #   `wav`, and `pcm`.
           response_format: nil,
diff --git a/rbi/lib/openai/resources/audio/transcriptions.rbi b/rbi/lib/openai/resources/audio/transcriptions.rbi
index 50e5c416..a1340034 100644
--- a/rbi/lib/openai/resources/audio/transcriptions.rbi
+++ b/rbi/lib/openai/resources/audio/transcriptions.rbi
@@ -9,11 +9,13 @@ module OpenAI
           params(
             file: T.any(IO, StringIO),
             model: T.any(String, Symbol),
+            include: T::Array[Symbol],
             language: String,
             prompt: String,
             response_format: Symbol,
             temperature: Float,
             timestamp_granularities: T::Array[Symbol],
+            stream: T.noreturn,
             request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything]))
           )
             .returns(T.any(OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose))
@@ -22,9 +24,16 @@ module OpenAI
           # The audio file object (not file name) to transcribe, in one of these formats:
           #   flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
           file:,
-          # ID of the model to use. Only `whisper-1` (which is powered by our open source
-          #   Whisper V2 model) is currently available.
+          # ID of the model to use. The options are `gpt-4o-transcribe`,
+          #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+          #   Whisper V2 model).
           model:,
+          # Additional information to include in the transcription response. `logprobs` will
+          #   return the log probabilities of the tokens in the response to understand the
+          #   model's confidence in the transcription. `logprobs` only works with
+          #   response_format set to `json` and only with the models `gpt-4o-transcribe` and
+          #   `gpt-4o-mini-transcribe`.
+          include: nil,
           # The language of the input audio. Supplying the input language in
           #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
           #   format will improve accuracy and latency.
@@ -35,7 +44,8 @@ module OpenAI
           #   should match the audio language.
           prompt: nil,
           # The format of the output, in one of these options: `json`, `text`, `srt`,
-          #   `verbose_json`, or `vtt`.
+          #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+          #   the only supported format is `json`.
           response_format: nil,
           # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
           #   output more random, while lower values like 0.2 will make it more focused and
@@ -49,6 +59,78 @@ module OpenAI
           #   is no additional latency for segment timestamps, but generating word timestamps
           #   incurs additional latency.
           timestamp_granularities: nil,
+          # There is no need to provide `stream:`. Instead, use `#create_streaming` or
+          #   `#create` for streaming and non-streaming use cases, respectively.
+          stream: false,
+          request_options: {}
+        )
+        end
+
+        # Transcribes audio into the input language.
+        sig do
+          params(
+            file: T.any(IO, StringIO),
+            model: T.any(String, Symbol),
+            include: T::Array[Symbol],
+            language: String,
+            prompt: String,
+            response_format: Symbol,
+            temperature: Float,
+            timestamp_granularities: T::Array[Symbol],
+            stream: T.noreturn,
+            request_options: T.nilable(T.any(OpenAI::RequestOptions, T::Hash[Symbol, T.anything]))
+          )
+            .returns(
+              OpenAI::Stream[
+              T.any(
+                OpenAI::Models::Audio::TranscriptionTextDeltaEvent,
+                OpenAI::Models::Audio::TranscriptionTextDoneEvent
+              )
+              ]
+            )
+        end
+        def create_streaming(
+          # The audio file object (not file name) to transcribe, in one of these formats:
+          #   flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+          file:,
+          # ID of the model to use. The options are `gpt-4o-transcribe`,
+          #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+          #   Whisper V2 model).
+          model:,
+          # Additional information to include in the transcription response. `logprobs` will
+          #   return the log probabilities of the tokens in the response to understand the
+          #   model's confidence in the transcription. `logprobs` only works with
+          #   response_format set to `json` and only with the models `gpt-4o-transcribe` and
+          #   `gpt-4o-mini-transcribe`.
+          include: nil,
+          # The language of the input audio. Supplying the input language in
+          #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+          #   format will improve accuracy and latency.
+          language: nil,
+          # An optional text to guide the model's style or continue a previous audio
+          #   segment. The
+          #   [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+          #   should match the audio language.
+          prompt: nil,
+          # The format of the output, in one of these options: `json`, `text`, `srt`,
+          #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+          #   the only supported format is `json`.
+          response_format: nil,
+          # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+          #   output more random, while lower values like 0.2 will make it more focused and
+          #   deterministic. If set to 0, the model will use
+          #   [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+          #   automatically increase the temperature until certain thresholds are hit.
+          temperature: nil,
+          # The timestamp granularities to populate for this transcription.
+          #   `response_format` must be set `verbose_json` to use timestamp granularities.
+          #   Either or both of these options are supported: `word`, or `segment`. Note: There
+          #   is no additional latency for segment timestamps, but generating word timestamps
+          #   incurs additional latency.
+          timestamp_granularities: nil,
+          # There is no need to provide `stream:`. Instead, use `#create_streaming` or
+          #   `#create` for streaming and non-streaming use cases, respectively.
+          stream: true,
           request_options: {}
         )
         end
diff --git a/sig/openai/models/audio/speech_create_params.rbs b/sig/openai/models/audio/speech_create_params.rbs
index ae9debbf..eb21a97f 100644
--- a/sig/openai/models/audio/speech_create_params.rbs
+++ b/sig/openai/models/audio/speech_create_params.rbs
@@ -6,6 +6,7 @@ module OpenAI
           input: String,
           model: OpenAI::Models::Audio::SpeechCreateParams::model,
           voice: OpenAI::Models::Audio::SpeechCreateParams::voice,
+          instructions: String,
           response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format,
           speed: Float
         }
@@ -21,6 +22,10 @@ module OpenAI
 
         attr_accessor voice: OpenAI::Models::Audio::SpeechCreateParams::voice
 
+        attr_reader instructions: String?
+
+        def instructions=: (String) -> String
+
         attr_reader response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format?
 
         def response_format=: (
@@ -35,6 +40,7 @@ module OpenAI
           input: String,
           model: OpenAI::Models::Audio::SpeechCreateParams::model,
           voice: OpenAI::Models::Audio::SpeechCreateParams::voice,
+          ?instructions: String,
           ?response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format,
           ?speed: Float,
           ?request_options: OpenAI::request_opts
diff --git a/sig/openai/models/audio/speech_model.rbs b/sig/openai/models/audio/speech_model.rbs
index 7ab47f1f..357eaa4c 100644
--- a/sig/openai/models/audio/speech_model.rbs
+++ b/sig/openai/models/audio/speech_model.rbs
@@ -1,11 +1,12 @@
 module OpenAI
   module Models
     module Audio
-      type speech_model = :"tts-1" | :"tts-1-hd"
+      type speech_model = :"tts-1" | :"tts-1-hd" | :"gpt-4o-mini-tts"
 
       class SpeechModel < OpenAI::Enum
         TTS_1: :"tts-1"
         TTS_1_HD: :"tts-1-hd"
+        GPT_4O_MINI_TTS: :"gpt-4o-mini-tts"
 
         def self.values: -> ::Array[OpenAI::Models::Audio::speech_model]
       end
diff --git a/sig/openai/models/audio/transcription.rbs b/sig/openai/models/audio/transcription.rbs
index 0ea5f955..3f9bf1d4 100644
--- a/sig/openai/models/audio/transcription.rbs
+++ b/sig/openai/models/audio/transcription.rbs
@@ -1,14 +1,51 @@
 module OpenAI
   module Models
     module Audio
-      type transcription = { text: String }
+      type transcription =
+        {
+          text: String,
+          logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob]
+        }
 
       class Transcription < OpenAI::BaseModel
         attr_accessor text: String
 
-        def initialize: (text: String) -> void
+        attr_reader logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob]?
+
+        def logprobs=: (
+          ::Array[OpenAI::Models::Audio::Transcription::Logprob]
+        ) -> ::Array[OpenAI::Models::Audio::Transcription::Logprob]
+
+        def initialize: (
+          text: String,
+          ?logprobs: ::Array[OpenAI::Models::Audio::Transcription::Logprob]
+        ) -> void
 
         def to_hash: -> OpenAI::Models::Audio::transcription
+
+        type logprob = { token: String, bytes: ::Array[Float], logprob: Float }
+
+        class Logprob < OpenAI::BaseModel
+          attr_reader token: String?
+
+          def token=: (String) -> String
+
+          attr_reader bytes: ::Array[Float]?
+
+          def bytes=: (::Array[Float]) -> ::Array[Float]
+
+          attr_reader logprob: Float?
+
+          def logprob=: (Float) -> Float
+
+          def initialize: (
+            ?token: String,
+            ?bytes: ::Array[Float],
+            ?logprob: Float
+          ) -> void
+
+          def to_hash: -> OpenAI::Models::Audio::Transcription::logprob
+        end
       end
     end
   end
diff --git a/sig/openai/models/audio/transcription_create_params.rbs b/sig/openai/models/audio/transcription_create_params.rbs
index 16280f3e..28f79a92 100644
--- a/sig/openai/models/audio/transcription_create_params.rbs
+++ b/sig/openai/models/audio/transcription_create_params.rbs
@@ -5,6 +5,7 @@ module OpenAI
         {
           file: (IO | StringIO),
           model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
+          include: ::Array[OpenAI::Models::Audio::transcription_include],
           language: String,
           prompt: String,
           response_format: OpenAI::Models::audio_response_format,
@@ -21,6 +22,12 @@ module OpenAI
 
         attr_accessor model: OpenAI::Models::Audio::TranscriptionCreateParams::model
 
+        attr_reader include: ::Array[OpenAI::Models::Audio::transcription_include]?
+
+        def include=: (
+          ::Array[OpenAI::Models::Audio::transcription_include]
+        ) -> ::Array[OpenAI::Models::Audio::transcription_include]
+
         attr_reader language: String?
 
         def language=: (String) -> String
@@ -48,6 +55,7 @@ module OpenAI
         def initialize: (
           file: IO | StringIO,
           model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
+          ?include: ::Array[OpenAI::Models::Audio::transcription_include],
           ?language: String,
           ?prompt: String,
           ?response_format: OpenAI::Models::audio_response_format,
diff --git a/sig/openai/models/audio/transcription_include.rbs b/sig/openai/models/audio/transcription_include.rbs
new file mode 100644
index 00000000..cf06a929
--- /dev/null
+++ b/sig/openai/models/audio/transcription_include.rbs
@@ -0,0 +1,13 @@
+module OpenAI
+  module Models
+    module Audio
+      type transcription_include = :logprobs
+
+      class TranscriptionInclude < OpenAI::Enum
+        LOGPROBS: :logprobs
+
+        def self.values: -> ::Array[OpenAI::Models::Audio::transcription_include]
+      end
+    end
+  end
+end
diff --git a/sig/openai/models/audio/transcription_stream_event.rbs b/sig/openai/models/audio/transcription_stream_event.rbs
new file mode 100644
index 00000000..b9233feb
--- /dev/null
+++ b/sig/openai/models/audio/transcription_stream_event.rbs
@@ -0,0 +1,13 @@
+module OpenAI
+  module Models
+    module Audio
+      type transcription_stream_event =
+        OpenAI::Models::Audio::TranscriptionTextDeltaEvent
+        | OpenAI::Models::Audio::TranscriptionTextDoneEvent
+
+      class TranscriptionStreamEvent < OpenAI::Union
+        def self.variants: -> [OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent]
+      end
+    end
+  end
+end
diff --git a/sig/openai/models/audio/transcription_text_delta_event.rbs b/sig/openai/models/audio/transcription_text_delta_event.rbs
new file mode 100644
index 00000000..373c6aed
--- /dev/null
+++ b/sig/openai/models/audio/transcription_text_delta_event.rbs
@@ -0,0 +1,56 @@
+module OpenAI
+  module Models
+    module Audio
+      type transcription_text_delta_event =
+        {
+          delta: String,
+          type: :"transcript.text.delta",
+          logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]
+        }
+
+      class TranscriptionTextDeltaEvent < OpenAI::BaseModel
+        attr_accessor delta: String
+
+        attr_accessor type: :"transcript.text.delta"
+
+        attr_reader logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]?
+
+        def logprobs=: (
+          ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]
+        ) -> ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob]
+
+        def initialize: (
+          delta: String,
+          ?logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDeltaEvent::Logprob],
+          ?type: :"transcript.text.delta"
+        ) -> void
+
+        def to_hash: -> OpenAI::Models::Audio::transcription_text_delta_event
+
+        type logprob = { token: String, bytes: ::Array[top], logprob: Float }
+
+        class Logprob < OpenAI::BaseModel
+          attr_reader token: String?
+
+          def token=: (String) -> String
+
+          attr_reader bytes: ::Array[top]?
+
+          def bytes=: (::Array[top]) -> ::Array[top]
+
+          attr_reader logprob: Float?
+
+          def logprob=: (Float) -> Float
+
+          def initialize: (
+            ?token: String,
+            ?bytes: ::Array[top],
+            ?logprob: Float
+          ) -> void
+
+          def to_hash: -> OpenAI::Models::Audio::TranscriptionTextDeltaEvent::logprob
+        end
+      end
+    end
+  end
+end
diff --git a/sig/openai/models/audio/transcription_text_done_event.rbs b/sig/openai/models/audio/transcription_text_done_event.rbs
new file mode 100644
index 00000000..f1f1dd1d
--- /dev/null
+++ b/sig/openai/models/audio/transcription_text_done_event.rbs
@@ -0,0 +1,56 @@
+module OpenAI
+  module Models
+    module Audio
+      type transcription_text_done_event =
+        {
+          text: String,
+          type: :"transcript.text.done",
+          logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]
+        }
+
+      class TranscriptionTextDoneEvent < OpenAI::BaseModel
+        attr_accessor text: String
+
+        attr_accessor type: :"transcript.text.done"
+
+        attr_reader logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]?
+
+        def logprobs=: (
+          ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]
+        ) -> ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob]
+
+        def initialize: (
+          text: String,
+          ?logprobs: ::Array[OpenAI::Models::Audio::TranscriptionTextDoneEvent::Logprob],
+          ?type: :"transcript.text.done"
+        ) -> void
+
+        def to_hash: -> OpenAI::Models::Audio::transcription_text_done_event
+
+        type logprob = { token: String, bytes: ::Array[top], logprob: Float }
+
+        class Logprob < OpenAI::BaseModel
+          attr_reader token: String?
+
+          def token=: (String) -> String
+
+          attr_reader bytes: ::Array[top]?
+
+          def bytes=: (::Array[top]) -> ::Array[top]
+
+          attr_reader logprob: Float?
+
+          def logprob=: (Float) -> Float
+
+          def initialize: (
+            ?token: String,
+            ?bytes: ::Array[top],
+            ?logprob: Float
+          ) -> void
+
+          def to_hash: -> OpenAI::Models::Audio::TranscriptionTextDoneEvent::logprob
+        end
+      end
+    end
+  end
+end
diff --git a/sig/openai/models/audio/translation_create_params.rbs b/sig/openai/models/audio/translation_create_params.rbs
index 252ed2dc..83dc3322 100644
--- a/sig/openai/models/audio/translation_create_params.rbs
+++ b/sig/openai/models/audio/translation_create_params.rbs
@@ -6,7 +6,7 @@ module OpenAI
           file: (IO | StringIO),
           model: OpenAI::Models::Audio::TranslationCreateParams::model,
           prompt: String,
-          response_format: OpenAI::Models::audio_response_format,
+          response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format,
           temperature: Float
         }
         & OpenAI::request_parameters
@@ -23,11 +23,11 @@ module OpenAI
 
         def prompt=: (String) -> String
 
-        attr_reader response_format: OpenAI::Models::audio_response_format?
+        attr_reader response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format?
 
         def response_format=: (
-          OpenAI::Models::audio_response_format
-        ) -> OpenAI::Models::audio_response_format
+          OpenAI::Models::Audio::TranslationCreateParams::response_format
+        ) -> OpenAI::Models::Audio::TranslationCreateParams::response_format
 
         attr_reader temperature: Float?
 
@@ -37,7 +37,7 @@ module OpenAI
           file: IO | StringIO,
           model: OpenAI::Models::Audio::TranslationCreateParams::model,
           ?prompt: String,
-          ?response_format: OpenAI::Models::audio_response_format,
+          ?response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format,
           ?temperature: Float,
           ?request_options: OpenAI::request_opts
         ) -> void
@@ -49,6 +49,18 @@ module OpenAI
         class Model < OpenAI::Union
           def self.variants: -> [String, OpenAI::Models::audio_model]
         end
+
+        type response_format = :json | :text | :srt | :verbose_json | :vtt
+
+        class ResponseFormat < OpenAI::Enum
+          JSON: :json
+          TEXT: :text
+          SRT: :srt
+          VERBOSE_JSON: :verbose_json
+          VTT: :vtt
+
+          def self.values: -> ::Array[OpenAI::Models::Audio::TranslationCreateParams::response_format]
+        end
       end
     end
   end
diff --git a/sig/openai/models/audio_model.rbs b/sig/openai/models/audio_model.rbs
index f9841d31..72b67344 100644
--- a/sig/openai/models/audio_model.rbs
+++ b/sig/openai/models/audio_model.rbs
@@ -1,9 +1,12 @@
 module OpenAI
   module Models
-    type audio_model = :"whisper-1"
+    type audio_model =
+      :"whisper-1" | :"gpt-4o-transcribe" | :"gpt-4o-mini-transcribe"
 
     class AudioModel < OpenAI::Enum
       WHISPER_1: :"whisper-1"
+      GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe"
+      GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe"
 
       def self.values: -> ::Array[OpenAI::Models::audio_model]
     end
diff --git a/sig/openai/models/chat/chat_completion_chunk.rbs b/sig/openai/models/chat/chat_completion_chunk.rbs
index 3eef3ef4..fa2494d9 100644
--- a/sig/openai/models/chat/chat_completion_chunk.rbs
+++ b/sig/openai/models/chat/chat_completion_chunk.rbs
@@ -13,7 +13,7 @@ module OpenAI
           object: :"chat.completion.chunk",
           service_tier: OpenAI::Models::Chat::ChatCompletionChunk::service_tier?,
           system_fingerprint: String,
-          usage: OpenAI::Models::CompletionUsage
+          usage: OpenAI::Models::CompletionUsage?
         }
 
       class ChatCompletionChunk < OpenAI::BaseModel
@@ -33,11 +33,7 @@ module OpenAI
 
         def system_fingerprint=: (String) -> String
 
-        attr_reader usage: OpenAI::Models::CompletionUsage?
-
-        def usage=: (
-          OpenAI::Models::CompletionUsage
-        ) -> OpenAI::Models::CompletionUsage
+        attr_accessor usage: OpenAI::Models::CompletionUsage?
 
         def initialize: (
           id: String,
@@ -46,7 +42,7 @@ module OpenAI
           model: String,
           ?service_tier: OpenAI::Models::Chat::ChatCompletionChunk::service_tier?,
           ?system_fingerprint: String,
-          ?usage: OpenAI::Models::CompletionUsage,
+          ?usage: OpenAI::Models::CompletionUsage?,
           ?object: :"chat.completion.chunk"
         ) -> void
 
diff --git a/sig/openai/resources/audio/speech.rbs b/sig/openai/resources/audio/speech.rbs
index 65002d04..7e7d117c 100644
--- a/sig/openai/resources/audio/speech.rbs
+++ b/sig/openai/resources/audio/speech.rbs
@@ -6,6 +6,7 @@ module OpenAI
           input: String,
           model: OpenAI::Models::Audio::SpeechCreateParams::model,
           voice: OpenAI::Models::Audio::SpeechCreateParams::voice,
+          ?instructions: String,
           ?response_format: OpenAI::Models::Audio::SpeechCreateParams::response_format,
           ?speed: Float,
           ?request_options: OpenAI::request_opts
diff --git a/sig/openai/resources/audio/transcriptions.rbs b/sig/openai/resources/audio/transcriptions.rbs
index 9ee728bb..b52531d0 100644
--- a/sig/openai/resources/audio/transcriptions.rbs
+++ b/sig/openai/resources/audio/transcriptions.rbs
@@ -5,6 +5,7 @@ module OpenAI
         def create: (
           file: IO | StringIO,
           model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
+          ?include: ::Array[OpenAI::Models::Audio::transcription_include],
           ?language: String,
           ?prompt: String,
           ?response_format: OpenAI::Models::audio_response_format,
@@ -13,6 +14,18 @@ module OpenAI
           ?request_options: OpenAI::request_opts
         ) -> OpenAI::Models::Audio::transcription_create_response
 
+        def create_streaming: (
+          file: IO | StringIO,
+          model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
+          ?include: ::Array[OpenAI::Models::Audio::transcription_include],
+          ?language: String,
+          ?prompt: String,
+          ?response_format: OpenAI::Models::audio_response_format,
+          ?temperature: Float,
+          ?timestamp_granularities: ::Array[OpenAI::Models::Audio::TranscriptionCreateParams::timestamp_granularity],
+          ?request_options: OpenAI::request_opts
+        ) -> OpenAI::Stream[OpenAI::Models::Audio::transcription_stream_event]
+
         def initialize: (client: OpenAI::Client) -> void
       end
     end
diff --git a/sig/openai/resources/audio/translations.rbs b/sig/openai/resources/audio/translations.rbs
index f4c61adf..25ea82e3 100644
--- a/sig/openai/resources/audio/translations.rbs
+++ b/sig/openai/resources/audio/translations.rbs
@@ -6,7 +6,7 @@ module OpenAI
           file: IO | StringIO,
           model: OpenAI::Models::Audio::TranslationCreateParams::model,
           ?prompt: String,
-          ?response_format: OpenAI::Models::audio_response_format,
+          ?response_format: OpenAI::Models::Audio::TranslationCreateParams::response_format,
           ?temperature: Float,
           ?request_options: OpenAI::request_opts
         ) -> OpenAI::Models::Audio::translation_create_response