Skip to content

Commit

Permalink
.Net Add Audio examples (#5019)
Browse files Browse the repository at this point in the history
### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

Issue #5016

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

Added samples which demonstrate using ITextToAudioService and
IAudioToTextService.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄

---------

Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
  • Loading branch information
Krzysztof318 and dmytrostruk committed Feb 15, 2024
1 parent 6fb8a46 commit 6feffd3
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 1 deletion.
93 changes: 93 additions & 0 deletions dotnet/samples/KernelSyntaxExamples/Example28_Audio.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.IO;
using System.Threading.Tasks;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AudioToText;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Contents;
using Microsoft.SemanticKernel.TextToAudio;
using Xunit;
using Xunit.Abstractions;

namespace Examples;

/// <summary>
/// Represents a class that demonstrates audio processing functionality.
/// </summary>
public sealed class Example28_Audio : BaseTest
{
private const string TextToAudioModel = "tts-1";
private const string AudioToTextModel = "whisper-1";
private const string AudioFilePath = "input.wav";

[Fact(Skip = "Needs setup.")]
public async Task TextToAudioAsync()
{
// Create a kernel with OpenAI text to audio service
var kernel = Kernel.CreateBuilder()
.AddOpenAITextToAudio(
modelId: TextToAudioModel,
apiKey: TestConfiguration.OpenAI.ApiKey)
.Build();

var textToAudioService = kernel.GetRequiredService<ITextToAudioService>();

string sampleText = "Hello, my name is John. I am a software engineer. I am working on a project to convert text to audio.";

// Set execution settings (optional)
OpenAITextToAudioExecutionSettings executionSettings = new("alloy")
{
Voice = "alloy", // The voice to use when generating the audio.
// Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
ResponseFormat = "mp3", // The format to audio in.
// Supported formats are mp3, opus, aac, and flac.
Speed = 1.0f // The speed of the generated audio.
// Select a value from 0.25 to 4.0. 1.0 is the default.
};

// Convert text to audio
AudioContent audioContent = await textToAudioService.GetAudioContentAsync(sampleText, executionSettings);

// Save audio content to a file
// await File.WriteAllBytesAsync("output.wav", audioContent.Data.ToArray());
}

[Fact(Skip = "Setup audio file input before running this test.")]
public async Task AudioToTextAsync()
{
// Create a kernel with OpenAI audio to text service
var kernel = Kernel.CreateBuilder()
.AddOpenAIAudioToText(
modelId: AudioToTextModel,
apiKey: TestConfiguration.OpenAI.ApiKey)
.Build();

var audioToTextService = kernel.GetRequiredService<IAudioToTextService>();

// Set execution settings (optional)
OpenAIAudioToTextExecutionSettings executionSettings = new("input.wav")
{
Language = "en", // The language of the audio data as two-letter ISO-639-1 language code (e.g. 'en' or 'es').
Prompt = "sample prompt", // An optional text to guide the model's style or continue a previous audio segment.
// The prompt should match the audio language.
ResponseFormat = "json", // The format to return the transcribed text in.
// Supported formats are json, text, srt, verbose_json, or vtt. Default is 'json'.
Temperature = 0.3f, // The randomness of the generated text.
// Select a value from 0.0 to 1.0. 0 is the default.
};

// Read audio content from a file
ReadOnlyMemory<byte> audioData = await File.ReadAllBytesAsync(AudioFilePath);
AudioContent audioContent = new(new BinaryData(audioData));

// Convert audio to text
var textContent = await audioToTextService.GetTextContentAsync(audioContent, executionSettings);

// Output the transcribed text
this.WriteLine(textContent.Text);
}

public Example28_Audio(ITestOutputHelper output) : base(output) { }
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<IsTestProject>true</IsTestProject>
<IsPackable>false</IsPackable>
<!-- Suppress: "Declare types in namespaces", "Require ConfigureAwait", "Experimental" -->
<NoWarn>CS8618,IDE0009,CA1051,CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0015,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102</NoWarn>
<NoWarn>CS8618,IDE0009,CA1051,CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0005,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0015,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102</NoWarn>
<OutputType>Library</OutputType>
</PropertyGroup>
<ItemGroup>
Expand Down

0 comments on commit 6feffd3

Please sign in to comment.