openai · stainless-app · Sep 12, 2025 · Sep 11, 2025 · Sep 11, 2025 · Sep 11, 2025
@@ -1,3 +1,3 @@
 {
-  ".": "0.23.1"
+  ".": "0.23.2"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
-openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-94b1e3cb0bdc616ff0c2f267c33dadd95f133b1f64e647aab6c64afb292b2793.yml
+openapi_spec_hash: 2395319ac9befd59b6536ae7f9564a05
 config_hash: 930dac3aa861344867e4ac84f037b5df
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.23.2 (2025-09-11)
+
+Full Changelog: [v0.23.1...v0.23.2](https://github.com/openai/openai-ruby/compare/v0.23.1...v0.23.2)
+
+### Chores
+
+* **api:** Minor docs and type updates for realtime ([ccef982](https://github.com/openai/openai-ruby/commit/ccef9827b31206fc9ba40d2b6165eeefda7621f5))
+
 ## 0.23.1 (2025-09-10)
 
 Full Changelog: [v0.23.0...v0.23.1](https://github.com/openai/openai-ruby/compare/v0.23.0...v0.23.1)

@@ -11,7 +11,7 @@ GIT
 PATH
   remote: .
   specs:
-    openai (0.23.1)
+    openai (0.23.2)
       connection_pool
 
 GEM

@@ -15,7 +15,7 @@ To use this gem, install via Bundler by adding the following to your application
 <!-- x-release-please-start-version -->
 
 ```ruby
-gem "openai", "~> 0.23.1"
+gem "openai", "~> 0.23.2"
 ```
 
 <!-- x-release-please-end -->

@@ -5,13 +5,15 @@ module Models
     module Realtime
       class InputAudioBufferTimeoutTriggered < OpenAI::Internal::Type::BaseModel
         # @!attribute audio_end_ms
-        #   Millisecond offset where speech ended within the buffered audio.
+        #   Millisecond offset of audio written to the input audio buffer at the time the
+        #   timeout was triggered.
         #
         #   @return [Integer]
         required :audio_end_ms, Integer
 
         # @!attribute audio_start_ms
-        #   Millisecond offset where speech started within the buffered audio.
+        #   Millisecond offset of audio written to the input audio buffer that was after the
+        #   playback time of the last model response.
         #
         #   @return [Integer]
         required :audio_start_ms, Integer
@@ -35,11 +37,29 @@ class InputAudioBufferTimeoutTriggered < OpenAI::Internal::Type::BaseModel
         required :type, const: :"input_audio_buffer.timeout_triggered"
 
         # @!method initialize(audio_end_ms:, audio_start_ms:, event_id:, item_id:, type: :"input_audio_buffer.timeout_triggered")
-        #   Returned when the server VAD timeout is triggered for the input audio buffer.
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::InputAudioBufferTimeoutTriggered} for more details.
         #
-        #   @param audio_end_ms [Integer] Millisecond offset where speech ended within the buffered audio.
+        #   Returned when the Server VAD timeout is triggered for the input audio buffer.
+        #   This is configured with `idle_timeout_ms` in the `turn_detection` settings of
+        #   the session, and it indicates that there hasn't been any speech detected for the
+        #   configured duration.
         #
-        #   @param audio_start_ms [Integer] Millisecond offset where speech started within the buffered audio.
+        #   The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio
+        #   after the last model response up to the triggering time, as an offset from the
+        #   beginning of audio written to the input audio buffer. This means it demarcates
+        #   the segment of audio that was silent and the difference between the start and
+        #   end values will roughly match the configured timeout.
+        #
+        #   The empty audio will be committed to the conversation as an `input_audio` item
+        #   (there will be a `input_audio_buffer.committed` event) and a model response will
+        #   be generated. There may be speech that didn't trigger VAD but is still detected
+        #   by the model, so the model may respond with something relevant to the
+        #   conversation or a prompt to continue speaking.
+        #
+        #   @param audio_end_ms [Integer] Millisecond offset of audio written to the input audio buffer at the time the ti
+        #
+        #   @param audio_start_ms [Integer] Millisecond offset of audio written to the input audio buffer that was after the
         #
         #   @param event_id [String] The unique ID of the server event.
         #

@@ -36,17 +36,20 @@ class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel
         # @!attribute turn_detection
         #   Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
         #   set to `null` to turn off, in which case the client must manually trigger model
-        #   response. Server VAD means that the model will detect the start and end of
-        #   speech based on audio volume and respond at the end of user speech. Semantic VAD
-        #   is more advanced and uses a turn detection model (in conjunction with VAD) to
-        #   semantically estimate whether the user has finished speaking, then dynamically
-        #   sets a timeout based on this probability. For example, if user audio trails off
-        #   with "uhhm", the model will score a low probability of turn end and wait longer
-        #   for the user to continue speaking. This can be useful for more natural
-        #   conversations, but may have a higher latency.
+        #   response.
         #
-        #   @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection, nil]
-        optional :turn_detection, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }
+        #   Server VAD means that the model will detect the start and end of speech based on
+        #   audio volume and respond at the end of user speech.
+        #
+        #   Semantic VAD is more advanced and uses a turn detection model (in conjunction
+        #   with VAD) to semantically estimate whether the user has finished speaking, then
+        #   dynamically sets a timeout based on this probability. For example, if user audio
+        #   trails off with "uhhm", the model will score a low probability of turn end and
+        #   wait longer for the user to continue speaking. This can be useful for more
+        #   natural conversations, but may have a higher latency.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil]
+        optional :turn_detection, union: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }, nil?: true
 
         # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
         #   Some parameter documentations has been truncated, see
@@ -58,7 +61,7 @@ class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel
         #
         #   @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to `
         #
-        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
 
         # @see OpenAI::Models::Realtime::RealtimeAudioConfigInput#noise_reduction
         class NoiseReduction < OpenAI::Internal::Type::BaseModel