diff --git a/api/OpenAI.net8.0.cs b/api/OpenAI.net8.0.cs index dc541eb90..cda555fa0 100644 --- a/api/OpenAI.net8.0.cs +++ b/api/OpenAI.net8.0.cs @@ -4341,13 +4341,12 @@ public class RealtimeClient { public virtual Task CreateEphemeralTokenAsync(BinaryContent content, RequestOptions options = null); public virtual ClientResult CreateEphemeralTranscriptionToken(BinaryContent content, RequestOptions options = null); public virtual Task CreateEphemeralTranscriptionTokenAsync(BinaryContent content, RequestOptions options = null); - public RealtimeSession StartConversationSession(string model, CancellationToken cancellationToken = default); - public virtual Task StartConversationSessionAsync(string model, RequestOptions options); - public virtual Task StartConversationSessionAsync(string model, CancellationToken cancellationToken = default); - public virtual Task StartSessionAsync(string model, string intent, RequestOptions options); - public RealtimeSession StartTranscriptionSession(CancellationToken cancellationToken = default); - public virtual Task StartTranscriptionSessionAsync(RequestOptions options); - public virtual Task StartTranscriptionSessionAsync(CancellationToken cancellationToken = default); + public RealtimeSession StartConversationSession(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartConversationSessionAsync(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public RealtimeSession StartSession(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartSessionAsync(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public RealtimeSession StartTranscriptionSession(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartTranscriptionSessionAsync(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); } [Experimental("OPENAI002")] [Flags] @@ -4387,7 +4386,7 @@ public class RealtimeItem : IJsonModel, IPersistableModel headers = null, CancellationToken cancellationToken = default); + protected internal virtual Task ConnectAsync(string queryString = null, IDictionary headers = null, CancellationToken cancellationToken = default); public virtual void DeleteItem(string itemId, CancellationToken cancellationToken = default); public virtual Task DeleteItemAsync(string itemId, CancellationToken cancellationToken = default); public void Dispose(); @@ -4430,6 +4429,11 @@ public class RealtimeSession : IDisposable { public virtual Task TruncateItemAsync(string itemId, int contentPartIndex, TimeSpan audioDuration, CancellationToken cancellationToken = default); } [Experimental("OPENAI002")] + public class RealtimeSessionOptions { + public IDictionary Headers { get; } + public string QueryString { get; set; } + } + [Experimental("OPENAI002")] public class RealtimeUpdate : IJsonModel, IPersistableModel { public string EventId { get; } public RealtimeUpdateKind Kind { get; } diff --git a/api/OpenAI.netstandard2.0.cs b/api/OpenAI.netstandard2.0.cs index 48fa38262..f9a98c422 100644 --- a/api/OpenAI.netstandard2.0.cs +++ b/api/OpenAI.netstandard2.0.cs @@ -3792,13 +3792,12 @@ public class RealtimeClient { public virtual Task CreateEphemeralTokenAsync(BinaryContent content, RequestOptions options = null); public virtual ClientResult CreateEphemeralTranscriptionToken(BinaryContent content, RequestOptions options = null); public virtual Task CreateEphemeralTranscriptionTokenAsync(BinaryContent content, RequestOptions options = null); - public RealtimeSession StartConversationSession(string model, CancellationToken cancellationToken = default); - public virtual Task StartConversationSessionAsync(string model, RequestOptions options); - public virtual Task StartConversationSessionAsync(string model, CancellationToken cancellationToken = default); - public virtual Task StartSessionAsync(string model, string intent, RequestOptions options); - public RealtimeSession StartTranscriptionSession(CancellationToken cancellationToken = default); - public virtual Task StartTranscriptionSessionAsync(RequestOptions options); - public virtual Task StartTranscriptionSessionAsync(CancellationToken cancellationToken = default); + public RealtimeSession StartConversationSession(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartConversationSessionAsync(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public RealtimeSession StartSession(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartSessionAsync(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public RealtimeSession StartTranscriptionSession(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); + public virtual Task StartTranscriptionSessionAsync(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default); } [Flags] public enum RealtimeContentModalities { @@ -3834,7 +3833,7 @@ public class RealtimeItem : IJsonModel, IPersistableModel headers = null, CancellationToken cancellationToken = default); + protected internal virtual Task ConnectAsync(string queryString = null, IDictionary headers = null, CancellationToken cancellationToken = default); public virtual void DeleteItem(string itemId, CancellationToken cancellationToken = default); public virtual Task DeleteItemAsync(string itemId, CancellationToken cancellationToken = default); public void Dispose(); @@ -3876,6 +3875,10 @@ public class RealtimeSession : IDisposable { public virtual void TruncateItem(string itemId, int contentPartIndex, TimeSpan audioDuration, CancellationToken cancellationToken = default); public virtual Task TruncateItemAsync(string itemId, int contentPartIndex, TimeSpan audioDuration, CancellationToken cancellationToken = default); } + public class RealtimeSessionOptions { + public IDictionary Headers { get; } + public string QueryString { get; set; } + } public class RealtimeUpdate : IJsonModel, IPersistableModel { public string EventId { get; } public RealtimeUpdateKind Kind { get; } diff --git a/src/Custom/Realtime/RealtimeClient.Protocol.cs b/src/Custom/Realtime/RealtimeClient.Protocol.cs index d54bddaf3..f5b9eb750 100644 --- a/src/Custom/Realtime/RealtimeClient.Protocol.cs +++ b/src/Custom/Realtime/RealtimeClient.Protocol.cs @@ -1,6 +1,7 @@ using System; using System.ClientModel; using System.ClientModel.Primitives; +using System.Threading; using System.Threading.Tasks; namespace OpenAI.Realtime; @@ -10,43 +11,80 @@ namespace OpenAI.Realtime; [CodeGenSuppress("CreateStartRealtimeSessionRequest", typeof(BinaryContent), typeof(RequestOptions))] public partial class RealtimeClient { - /// - /// [Protocol Method] - /// Creates a new realtime conversation operation instance, establishing a connection to the /realtime endpoint. - /// - /// - /// - /// - public virtual async Task StartConversationSessionAsync(string model, RequestOptions options) + /// Starts a new for multimodal conversation. + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public virtual async Task StartConversationSessionAsync(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) { Argument.AssertNotNull(model, nameof(model)); - return await StartSessionAsync(model, intent: null, options).ConfigureAwait(false); + + return await StartSessionAsync( + model: model, + intent: null, + options: options, + cancellationToken: cancellationToken).ConfigureAwait(false); } - /// - /// [Protocol Method] - /// Creates a new realtime transcription operation instance, establishing a connection to the /realtime endpoint. - /// - /// - /// - public virtual Task StartTranscriptionSessionAsync(RequestOptions options) - => StartSessionAsync(model: null, intent: "transcription", options); + /// Starts a new for multimodal conversation. + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public RealtimeSession StartConversationSession(string model, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) + { + Argument.AssertNotNull(model, nameof(model)); + + return StartSession( + model: model, + intent: null, + options: options, + cancellationToken: cancellationToken); + } - /// - /// [Protocol Method] - /// Creates a new realtime operation instance, establishing a connection to the /realtime endpoint. - /// - /// - /// - /// - /// - public virtual async Task StartSessionAsync(string model, string intent, RequestOptions options) + /// Starts a new for audio transcription. + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public virtual async Task StartTranscriptionSessionAsync(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) { - Uri fullEndpoint = BuildSessionEndpoint(_webSocketEndpoint, model, intent); - RealtimeSession provisionalSession = new(this, fullEndpoint, _keyCredential); + return await StartSessionAsync( + model: null, + intent: "transcription", + options: options, + cancellationToken: cancellationToken).ConfigureAwait(false); + } + + /// Starts a new for audio transcription. + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public RealtimeSession StartTranscriptionSession(RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) + { + return StartSession( + model: null, + intent: "transcription", + options: options, + cancellationToken: cancellationToken); + } + + /// Starts a new . + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public virtual async Task StartSessionAsync(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) + { + options ??= new(); + + RealtimeSession provisionalSession = new(_keyCredential, this, _webSocketEndpoint, model, intent); + try { - await provisionalSession.ConnectAsync(options).ConfigureAwait(false); + await provisionalSession.ConnectAsync(options.QueryString, options.Headers, cancellationToken).ConfigureAwait(false); RealtimeSession result = provisionalSession; provisionalSession = null; return result; @@ -57,18 +95,13 @@ public virtual async Task StartSessionAsync(string model, strin } } - private static Uri BuildSessionEndpoint(Uri baseEndpoint, string model, string intent) + /// Starts a new . + /// + /// The abstracts bidirectional communication between the caller and service, + /// simultaneously sending and receiving WebSocket messages. + /// + public RealtimeSession StartSession(string model, string intent, RealtimeSessionOptions options = null, CancellationToken cancellationToken = default) { - ClientUriBuilder builder = new(); - builder.Reset(baseEndpoint); - if (!string.IsNullOrEmpty(model)) - { - builder.AppendQuery("model", model, escape: true); - } - if (!string.IsNullOrEmpty(intent)) - { - builder.AppendQuery("intent", intent, escape: true); - } - return builder.ToUri(); + return StartSessionAsync(model, intent, options, cancellationToken).ConfigureAwait(false).GetAwaiter().GetResult(); } } \ No newline at end of file diff --git a/src/Custom/Realtime/RealtimeClient.cs b/src/Custom/Realtime/RealtimeClient.cs index f7f0a04bd..547d56ca3 100644 --- a/src/Custom/Realtime/RealtimeClient.cs +++ b/src/Custom/Realtime/RealtimeClient.cs @@ -102,79 +102,6 @@ protected internal RealtimeClient(ClientPipeline pipeline, OpenAIClientOptions o [Experimental("OPENAI001")] public Uri Endpoint => _endpoint; - /// - /// Starts a new for multimodal conversation. - /// - /// - /// The abstracts bidirectional communication between the caller and service, - /// simultaneously sending and receiving WebSocket messages. - /// - /// - /// The model that the session should use for new conversation items. - /// - /// - /// A new, connected instance of with default configuration. - public virtual async Task StartConversationSessionAsync( - string model, - CancellationToken cancellationToken = default) - { - Argument.AssertNotNull(model, nameof(model)); - - RequestOptions cancellationOptions = cancellationToken.ToRequestOptions(); - RealtimeSession newOperation = await StartConversationSessionAsync(model, cancellationOptions).ConfigureAwait(false); - return newOperation; - } - - /// - /// Starts a new for multimodal conversation. - /// - /// - /// The abstracts bidirectional communication between the caller and service, - /// simultaneously sending and receiving WebSocket messages. - /// - /// - /// The model that the session should use for new conversation items. - /// - /// - /// A new, connected instance of with default configuration. - public RealtimeSession StartConversationSession(string model, CancellationToken cancellationToken = default) - { - Argument.AssertNotNull(model, nameof(model)); - - return StartConversationSessionAsync(model, cancellationToken).ConfigureAwait(false).GetAwaiter().GetResult(); - } - - /// - /// Starts a new for audio transcription. - /// - /// - /// The abstracts bidirectional communication between the caller and service, - /// simultaneously sending and receiving WebSocket messages. - /// - /// - /// A new, connected instance of with default configuration. - public virtual async Task StartTranscriptionSessionAsync( - CancellationToken cancellationToken = default) - { - RequestOptions cancellationOptions = cancellationToken.ToRequestOptions(); - RealtimeSession newOperation = await StartTranscriptionSessionAsync(cancellationOptions).ConfigureAwait(false); - return newOperation; - } - - /// - /// Starts a new for audio transcription. - /// - /// - /// The abstracts bidirectional communication between the caller and service, - /// simultaneously sending and receiving WebSocket messages. - /// - /// - /// A new, connected instance of with default configuration. - public RealtimeSession StartTranscriptionSession(CancellationToken cancellationToken = default) - { - return StartTranscriptionSessionAsync(cancellationToken).ConfigureAwait(false).GetAwaiter().GetResult(); - } - private static Uri GetWebSocketEndpoint(OpenAIClientOptions options) { UriBuilder uriBuilder = new(options?.Endpoint ?? new("https://api.openai.com/v1")); diff --git a/src/Custom/Realtime/RealtimeSession.Protocol.cs b/src/Custom/Realtime/RealtimeSession.Protocol.cs index 13d71dd30..783c4a46d 100644 --- a/src/Custom/Realtime/RealtimeSession.Protocol.cs +++ b/src/Custom/Realtime/RealtimeSession.Protocol.cs @@ -2,7 +2,6 @@ using System.ClientModel; using System.ClientModel.Primitives; using System.Collections.Generic; -using System.ComponentModel; using System.Net.WebSockets; using System.Threading; using System.Threading.Tasks; @@ -19,26 +18,46 @@ public partial class RealtimeSession /// Initializes an underlying instance for communication with the /realtime endpoint and /// then connects to the service using this socket. /// - /// - /// - protected internal virtual async Task ConnectAsync(RequestOptions options) + protected internal virtual async Task ConnectAsync(string queryString = null, IDictionary headers = null, CancellationToken cancellationToken = default) { WebSocket?.Dispose(); + _credential.Deconstruct(out string dangerousCredential); + ClientWebSocket clientWebSocket = new(); clientWebSocket.Options.AddSubProtocol("realtime"); clientWebSocket.Options.SetRequestHeader("openai-beta", $"realtime=v1"); clientWebSocket.Options.SetRequestHeader("Authorization", $"Bearer {dangerousCredential}"); - await clientWebSocket.ConnectAsync(_endpoint, options?.CancellationToken ?? default) - .ConfigureAwait(false); + if (headers is not null) + { + foreach (KeyValuePair header in headers) + { + clientWebSocket.Options.SetRequestHeader(header.Key, header.Value); + } + } + + Uri webSocketUri; + + if (string.IsNullOrEmpty(queryString)) + { + webSocketUri = BuildSessionUri(_endpoint, _model, _intent); + } + else + { + UriBuilder uriBuilder = new(_endpoint); + uriBuilder.Query = queryString; + webSocketUri = uriBuilder.Uri; + } + + await clientWebSocket.ConnectAsync(webSocketUri, cancellationToken).ConfigureAwait(false); WebSocket = clientWebSocket; } - protected internal virtual void Connect(RequestOptions options) + protected internal virtual void Connect(string queryString = null, IDictionary headers = null, CancellationToken cancellationToken = default) { - ConnectAsync(options).Wait(); + ConnectAsync(queryString, headers, cancellationToken).Wait(); } public virtual async Task SendCommandAsync(BinaryData data, RequestOptions options) @@ -94,4 +113,21 @@ public virtual IEnumerable ReceiveUpdates(RequestOptions options) { throw new NotImplementedException(); } + + private static Uri BuildSessionUri(Uri endpoint, string model, string intent) + { + ClientUriBuilder builder = new(); + builder.Reset(endpoint); + + if (!string.IsNullOrEmpty(model)) + { + builder.AppendQuery("model", model, escape: true); + } + if (!string.IsNullOrEmpty(intent)) + { + builder.AppendQuery("intent", intent, escape: true); + } + + return builder.ToUri(); + } } \ No newline at end of file diff --git a/src/Custom/Realtime/RealtimeSession.cs b/src/Custom/Realtime/RealtimeSession.cs index e2a1e9eff..872ca3d2b 100644 --- a/src/Custom/Realtime/RealtimeSession.cs +++ b/src/Custom/Realtime/RealtimeSession.cs @@ -19,22 +19,23 @@ public partial class RealtimeSession : IDisposable private readonly RealtimeClient _parentClient; private readonly Uri _endpoint; + private readonly string _model; + private readonly string _intent; private readonly ApiKeyCredential _credential; private readonly SemaphoreSlim _audioSendSemaphore = new(1, 1); private bool _isSendingAudioStream = false; internal bool ShouldBufferTurnResponseData { get; set; } - protected internal RealtimeSession( - RealtimeClient parentClient, - Uri endpoint, - ApiKeyCredential credential) + protected internal RealtimeSession(ApiKeyCredential credential, RealtimeClient parentClient, Uri endpoint, string model, string intent) { Argument.AssertNotNull(endpoint, nameof(endpoint)); Argument.AssertNotNull(credential, nameof(credential)); _parentClient = parentClient; _endpoint = endpoint; + _model = model; + _intent = intent; _credential = credential; } diff --git a/src/Custom/Realtime/RealtimeSessionOptions.cs b/src/Custom/Realtime/RealtimeSessionOptions.cs new file mode 100644 index 000000000..7c53052ad --- /dev/null +++ b/src/Custom/Realtime/RealtimeSessionOptions.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +namespace OpenAI.Realtime; + +[Experimental("OPENAI002")] +public class RealtimeSessionOptions +{ + public RealtimeSessionOptions() + { + Headers = new ChangeTrackingDictionary(); + } + + public string QueryString { get; set; } + + public IDictionary Headers { get; } +} diff --git a/tests/Realtime/RealtimeProtocolTests.cs b/tests/Realtime/RealtimeProtocolTests.cs index d11d960ec..65368679f 100644 --- a/tests/Realtime/RealtimeProtocolTests.cs +++ b/tests/Realtime/RealtimeProtocolTests.cs @@ -36,7 +36,9 @@ public RealtimeProtocolTests(bool isAsync) : base(isAsync) public async Task ProtocolCanConfigureSession() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); BinaryData configureSessionCommand = BinaryData.FromString(""" { diff --git a/tests/Realtime/RealtimeTests.cs b/tests/Realtime/RealtimeTests.cs index 1be6a9078..f459cc5d5 100644 --- a/tests/Realtime/RealtimeTests.cs +++ b/tests/Realtime/RealtimeTests.cs @@ -26,8 +26,8 @@ public async Task CanConfigureSession() { RealtimeClient client = GetTestClient(); using RealtimeSession session = await client.StartConversationSessionAsync( - GetTestModel(), - CancellationToken); + model: GetTestModel(), + cancellationToken: CancellationToken); ConversationSessionOptions sessionOptions = new() { @@ -95,7 +95,10 @@ List GetReceivedUpdates() where T : RealtimeUpdate public async Task TextOnlyWorks() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); + await session.AddItemAsync( RealtimeItem.CreateUserMessage(["Hello, world!"]), cancellationToken: CancellationToken); @@ -183,7 +186,7 @@ public async Task TranscriptionOnlyWorks() Model = "gpt-4o-mini-transcribe", }, }; - RealtimeSession session = await client.StartTranscriptionSessionAsync(CancellationToken); + RealtimeSession session = await client.StartTranscriptionSessionAsync(cancellationToken: CancellationToken); await session.ConfigureTranscriptionSessionAsync(options, CancellationToken); // Sending the audio in a delayed stream allows us to validate bidirectional behavior, i.e. @@ -240,8 +243,8 @@ public async Task ItemManipulationWorks() { RealtimeClient client = GetTestClient(); using RealtimeSession session = await client.StartConversationSessionAsync( - GetTestModel(), - CancellationToken); + model: GetTestModel(), + cancellationToken: CancellationToken); await session.ConfigureConversationSessionAsync( new ConversationSessionOptions() @@ -316,7 +319,9 @@ await session.AddItemAsync( public async Task AudioStreamConvenienceBlocksCorrectly() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); string inputAudioFilePath = Path.Join("Assets", "realtime_whats_the_weather_pcm16_24khz_mono.wav"); using TestDelayedFileReadStream delayedStream = new(inputAudioFilePath, TimeSpan.FromMilliseconds(200), readsBeforeDelay: 2); @@ -356,7 +361,9 @@ public async Task AudioStreamConvenienceBlocksCorrectly() public async Task AudioWithToolsWorks(TestAudioSendType audioSendType) { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); ConversationFunctionTool getWeatherTool = new("get_weather_for_location") { @@ -481,7 +488,9 @@ public async Task AudioWithToolsWorks(TestAudioSendType audioSendType) public async Task CanDisableVoiceActivityDetection() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); await session.ConfigureConversationSessionAsync( new() @@ -530,7 +539,9 @@ or ResponseStartedUpdate public async Task BadCommandProvidesError() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); await session.SendCommandAsync( BinaryData.FromString(""" @@ -565,7 +576,11 @@ public async Task CanAddItems() { ContentModalities = RealtimeContentModalities.Text, }; - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); + await session.ConfigureConversationSessionAsync(sessionOptions, CancellationToken); List items = @@ -613,10 +628,15 @@ public async Task CanAddItems() public async Task CanUseOutOfBandResponses() { RealtimeClient client = GetTestClient(); - using RealtimeSession session = await client.StartConversationSessionAsync(GetTestModel(), CancellationToken); + + using RealtimeSession session = await client.StartConversationSessionAsync( + model: GetTestModel(), + cancellationToken: CancellationToken); + await session.AddItemAsync( RealtimeItem.CreateUserMessage(["Hello! My name is Bob."]), cancellationToken: CancellationToken); + await session.StartResponseAsync( new ConversationResponseOptions() {