-
Notifications
You must be signed in to change notification settings - Fork 502
Improve the types of RealtimeSession configuration #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| '@openai/agents-realtime': patch | ||
| --- | ||
|
|
||
| Improve the types of turnDetection and inputAudioTranscription in RealtimeAgent configuration |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,15 +33,52 @@ export type RealtimeTracingConfig = | |
| } | ||
| | 'auto'; | ||
|
|
||
| export type RealtimeInputAudioTranscriptionConfig = { | ||
| language?: string; | ||
| model?: | ||
| | 'gpt-4o-transcribe' | ||
| | 'gpt-4o-mini-transcribe' | ||
| | 'whisper-1' | ||
| | (string & {}); | ||
| prompt?: string; | ||
| }; | ||
|
|
||
| export type RealtimeTurnDetectionConfigAsIs = { | ||
| type?: 'semantic_vad' | 'server_vad'; | ||
| create_response?: boolean; | ||
| eagerness?: 'auto' | 'low' | 'medium' | 'high'; | ||
| interrupt_response?: boolean; | ||
| prefix_padding_ms?: number; | ||
| silence_duration_ms?: number; | ||
| threshold?: number; | ||
| }; | ||
|
|
||
| // The Realtime API accepts snake_cased keys, so when using this, this SDK coverts the keys to snake_case ones before passing it to the API | ||
| export type RealtimeTurnDetectionConfigCamelCase = { | ||
| type?: 'semantic_vad' | 'server_vad'; | ||
| createResponse?: boolean; | ||
| eagerness?: 'auto' | 'low' | 'medium' | 'high'; | ||
| interruptResponse?: boolean; | ||
| prefixPaddingMs?: number; | ||
| silenceDurationMs?: number; | ||
| threshold?: number; | ||
| }; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make it acceptable for this to also still take other properties inside of these two settings? Thinking how theoretically you could roll your own Realtime Transport Layer right now with other session config. But also fine to guide people to providerData for that and override this entire property
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, updated |
||
|
|
||
| export type RealtimeTurnDetectionConfig = ( | ||
| | RealtimeTurnDetectionConfigAsIs | ||
| | RealtimeTurnDetectionConfigCamelCase | ||
| ) & | ||
| Record<string, any>; | ||
|
|
||
| export type RealtimeSessionConfig = { | ||
| model: string; | ||
| instructions: string; | ||
| modalities: ('text' | 'audio')[]; | ||
| voice: string; | ||
| inputAudioFormat: RealtimeAudioFormat; | ||
| outputAudioFormat: RealtimeAudioFormat; | ||
| inputAudioTranscription: Record<string, any>; | ||
| turnDetection: Record<string, any>; | ||
| inputAudioTranscription: RealtimeInputAudioTranscriptionConfig; | ||
| turnDetection: RealtimeTurnDetectionConfig; | ||
| toolChoice: ModelSettingsToolChoice; | ||
| tools: FunctionToolDefinition[]; | ||
| tracing?: RealtimeTracingConfig | null; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ import { | |
| RealtimeClientMessage, | ||
| RealtimeSessionConfig, | ||
| RealtimeTracingConfig, | ||
| RealtimeTurnDetectionConfig, | ||
| RealtimeTurnDetectionConfigAsIs, | ||
| RealtimeUserInput, | ||
| } from './clientMessages'; | ||
| import { | ||
|
|
@@ -390,7 +392,7 @@ export abstract class OpenAIRealtimeBase | |
| config.inputAudioTranscription ?? | ||
| DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.inputAudioTranscription, | ||
| turn_detection: | ||
| config.turnDetection ?? | ||
| OpenAIRealtimeBase.buildTurnDetectionConfig(config.turnDetection) ?? | ||
| DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.turnDetection, | ||
| tool_choice: | ||
| config.toolChoice ?? DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.toolChoice, | ||
|
|
@@ -406,6 +408,48 @@ export abstract class OpenAIRealtimeBase | |
| return sessionData; | ||
| } | ||
|
|
||
| private static buildTurnDetectionConfig( | ||
| c: RealtimeTurnDetectionConfig | undefined, | ||
| ): RealtimeTurnDetectionConfigAsIs | undefined { | ||
| if (typeof c === 'undefined') { | ||
| return undefined; | ||
| } | ||
| const { | ||
| type, | ||
| createResponse, | ||
| create_response, | ||
| eagerness, | ||
| interruptResponse, | ||
| interrupt_response, | ||
| prefixPaddingMs, | ||
| prefix_padding_ms, | ||
| silenceDurationMs, | ||
| silence_duration_ms, | ||
| threshold, | ||
| ...rest | ||
| } = c; | ||
|
|
||
| const config: RealtimeTurnDetectionConfigAsIs & Record<string, any> = { | ||
| type, | ||
| create_response: createResponse ? createResponse : create_response, | ||
| eagerness, | ||
| interrupt_response: interruptResponse | ||
| ? interruptResponse | ||
| : interrupt_response, | ||
| prefix_padding_ms: prefixPaddingMs ? prefixPaddingMs : prefix_padding_ms, | ||
| silence_duration_ms: silenceDurationMs | ||
| ? silenceDurationMs | ||
| : silence_duration_ms, | ||
| threshold, | ||
| ...rest, | ||
| }; | ||
| // Remove undefined values from the config | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I verified the behavior, having undefined values could affect the connection establishment, so I added this logic. but if my observation is wrong or is missing something, please feel free to adjust this part. |
||
| Object.keys(config).forEach((key) => { | ||
| if (config[key] === undefined) delete config[key]; | ||
| }); | ||
| return Object.keys(config).length > 0 ? config : undefined; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the internal tracing config. This is used to track the tracing config that has been set | ||
| * during the session.create event. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -519,7 +519,7 @@ export class RealtimeSession< | |
| this.#transport.on('turn_done', (event) => { | ||
| const item = event.response.output[event.response.output.length - 1]; | ||
| const textOutput = getLastTextFromAudioOutputMessage(item) ?? ''; | ||
| const itemId = item.id ?? ''; | ||
| const itemId = item?.id ?? ''; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an unrelated existing bug i found while doing tests |
||
| this.emit('agent_end', this.#context, this.#currentAgent, textOutput); | ||
| this.#currentAgent.emit('agent_end', this.#context, textOutput); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed the example but either works!