Skip to content

Commit

Permalink
.Net: Improved Audio API by adding default setting values (#5026)
Browse files Browse the repository at this point in the history
### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

Current audio interfaces have optional execution settings parameters in
order to be able to use audio API through Kernel in the future, where
execution settings are optional. But when using audio services directly,
this is not user friendly, because when optional execution settings are
not configured, services will throw an error.

In case of text-to-audio service, required field is voice. This PR sets
default value to `alloy` with the ability to overwrite.
For audio-to-text service, required field is filename. This PR sets
default value to `file.mp3` with the ability to overwrite.

With these changes execution settings are no longer required (they are
optional) for text-to-audio and audio-to-text conversion.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄
  • Loading branch information
dmytrostruk committed Feb 15, 2024
1 parent 6feffd3 commit 1ba6236
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.SemanticKernel.Text;
Expand All @@ -11,6 +12,7 @@ namespace Microsoft.SemanticKernel.Connectors.OpenAI;
/// <summary>
/// Execution settings for OpenAI audio-to-text request.
/// </summary>
[Experimental("SKEXP0005")]
public sealed class OpenAIAudioToTextExecutionSettings : PromptExecutionSettings
{
/// <summary>
Expand Down Expand Up @@ -92,6 +94,14 @@ public float Temperature
}
}

/// <summary>
/// Creates an instance of <see cref="OpenAIAudioToTextExecutionSettings"/> class with default filename - "file.mp3".
/// </summary>
public OpenAIAudioToTextExecutionSettings()
: this(DefaultFilename)
{
}

/// <summary>
/// Creates an instance of <see cref="OpenAIAudioToTextExecutionSettings"/> class.
/// </summary>
Expand Down Expand Up @@ -124,7 +134,7 @@ public override PromptExecutionSettings Clone()
{
if (executionSettings is null)
{
return null;
return new OpenAIAudioToTextExecutionSettings();
}

if (executionSettings is OpenAIAudioToTextExecutionSettings settings)
Expand All @@ -146,6 +156,8 @@ public override PromptExecutionSettings Clone()

#region private ================================================================================

private const string DefaultFilename = "file.mp3";

private float _temperature = 0;
private string _responseFormat = "json";
private string _filename;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.SemanticKernel.Text;
Expand All @@ -11,6 +12,7 @@ namespace Microsoft.SemanticKernel.Connectors.OpenAI;
/// <summary>
/// Execution settings for OpenAI text-to-audio request.
/// </summary>
[Experimental("SKEXP0005")]
public sealed class OpenAITextToAudioExecutionSettings : PromptExecutionSettings
{
/// <summary>
Expand Down Expand Up @@ -58,6 +60,14 @@ public float Speed
}
}

/// <summary>
/// Creates an instance of <see cref="OpenAITextToAudioExecutionSettings"/> class with default voice - "alloy".
/// </summary>
public OpenAITextToAudioExecutionSettings()
: this(DefaultVoice)
{
}

/// <summary>
/// Creates an instance of <see cref="OpenAITextToAudioExecutionSettings"/> class.
/// </summary>
Expand Down Expand Up @@ -88,7 +98,7 @@ public override PromptExecutionSettings Clone()
{
if (executionSettings is null)
{
return null;
return new OpenAITextToAudioExecutionSettings();
}

if (executionSettings is OpenAITextToAudioExecutionSettings settings)
Expand All @@ -110,6 +120,8 @@ public override PromptExecutionSettings Clone()

#region private ================================================================================

private const string DefaultVoice = "alloy";

private float _speed = 1.0f;
private string _responseFormat = "mp3";
private string _voice;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ public void Dispose()

public static TheoryData<OpenAIAudioToTextExecutionSettings?, Type> ExecutionSettings => new()
{
{ null, typeof(ArgumentNullException) },
{ new OpenAIAudioToTextExecutionSettings(""), typeof(ArgumentException) },
{ new OpenAIAudioToTextExecutionSettings("file"), typeof(ArgumentException) }
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ namespace SemanticKernel.Connectors.UnitTests.OpenAI.AudioToText;
public sealed class OpenAIAudioToTextExecutionSettingsTests
{
[Fact]
public void ItReturnsNullWhenSettingsAreNull()
public void ItReturnsDefaultSettingsWhenSettingsAreNull()
{
Assert.Null(OpenAIAudioToTextExecutionSettings.FromExecutionSettings(null));
Assert.NotNull(OpenAIAudioToTextExecutionSettings.FromExecutionSettings(null));
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ public void Dispose()

public static TheoryData<OpenAITextToAudioExecutionSettings?, Type> ExecutionSettings => new()
{
{ null, typeof(ArgumentNullException) },
{ new OpenAITextToAudioExecutionSettings(""), typeof(ArgumentException) },
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ namespace SemanticKernel.Connectors.UnitTests.OpenAI.TextToAudio;
public sealed class OpenAITextToAudioExecutionSettingsTests
{
[Fact]
public void ItReturnsNullWhenSettingsAreNull()
public void ItReturnsDefaultSettingsWhenSettingsAreNull()
{
Assert.Null(OpenAITextToAudioExecutionSettings.FromExecutionSettings(null));
Assert.NotNull(OpenAITextToAudioExecutionSettings.FromExecutionSettings(null));
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ public void Dispose()

public static TheoryData<OpenAITextToAudioExecutionSettings?, Type> ExecutionSettings => new()
{
{ null, typeof(ArgumentNullException) },
{ new OpenAITextToAudioExecutionSettings(""), typeof(ArgumentException) },
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public async Task OpenAITextToAudioTestAsync()
var service = new OpenAITextToAudioService(openAIConfiguration.ModelId, openAIConfiguration.ApiKey);

// Act
var result = await service.GetAudioContentAsync("The sun rises in the east and sets in the west.", new OpenAITextToAudioExecutionSettings("alloy"));
var result = await service.GetAudioContentAsync("The sun rises in the east and sets in the west.");

// Assert
Assert.NotNull(result?.Data);
Expand All @@ -59,7 +59,7 @@ public async Task AzureOpenAITextToAudioTestAsync()
azureOpenAIConfiguration.ApiKey);

// Act
var result = await service.GetAudioContentAsync("The sun rises in the east and sets in the west.", new OpenAITextToAudioExecutionSettings("alloy"));
var result = await service.GetAudioContentAsync("The sun rises in the east and sets in the west.");

// Assert
Assert.NotNull(result?.Data);
Expand Down

0 comments on commit 1ba6236

Please sign in to comment.