.Net Add Audio examples (#5019)

### Motivation and Context  Issue #5016 ### Description  Added samples which demonstrate using ITextToAudioService and IAudioToTextService. ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
microsoft · Feb 15, 2024 · 6feffd3 · 6feffd3
1 parent 6fb8a46
commit 6feffd3
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 1 deletion.
diff --git a/dotnet/samples/KernelSyntaxExamples/Example28_Audio.cs b/dotnet/samples/KernelSyntaxExamples/Example28_Audio.cs
@@ -0,0 +1,93 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.IO;
+using System.Threading.Tasks;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AudioToText;
+using Microsoft.SemanticKernel.Connectors.OpenAI;
+using Microsoft.SemanticKernel.Contents;
+using Microsoft.SemanticKernel.TextToAudio;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Examples;
+
+/// <summary>
+/// Represents a class that demonstrates audio processing functionality.
+/// </summary>
+public sealed class Example28_Audio : BaseTest
+{
+    private const string TextToAudioModel = "tts-1";
+    private const string AudioToTextModel = "whisper-1";
+    private const string AudioFilePath = "input.wav";
+
+    [Fact(Skip = "Needs setup.")]
+    public async Task TextToAudioAsync()
+    {
+        // Create a kernel with OpenAI text to audio service
+        var kernel = Kernel.CreateBuilder()
+            .AddOpenAITextToAudio(
+                modelId: TextToAudioModel,
+                apiKey: TestConfiguration.OpenAI.ApiKey)
+            .Build();
+
+        var textToAudioService = kernel.GetRequiredService<ITextToAudioService>();
+
+        string sampleText = "Hello, my name is John. I am a software engineer. I am working on a project to convert text to audio.";
+
+        // Set execution settings (optional)
+        OpenAITextToAudioExecutionSettings executionSettings = new("alloy")
+        {
+            Voice = "alloy", // The voice to use when generating the audio.
+                             // Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
+            ResponseFormat = "mp3", // The format to audio in.
+                                    // Supported formats are mp3, opus, aac, and flac.
+            Speed = 1.0f // The speed of the generated audio.
+                         // Select a value from 0.25 to 4.0. 1.0 is the default.
+        };
+
+        // Convert text to audio
+        AudioContent audioContent = await textToAudioService.GetAudioContentAsync(sampleText, executionSettings);
+
+        // Save audio content to a file
+        // await File.WriteAllBytesAsync("output.wav", audioContent.Data.ToArray());
+    }
+
+    [Fact(Skip = "Setup audio file input before running this test.")]
+    public async Task AudioToTextAsync()
+    {
+        // Create a kernel with OpenAI audio to text service
+        var kernel = Kernel.CreateBuilder()
+            .AddOpenAIAudioToText(
+                modelId: AudioToTextModel,
+                apiKey: TestConfiguration.OpenAI.ApiKey)
+            .Build();
+
+        var audioToTextService = kernel.GetRequiredService<IAudioToTextService>();
+
+        // Set execution settings (optional)
+        OpenAIAudioToTextExecutionSettings executionSettings = new("input.wav")
+        {
+            Language = "en", // The language of the audio data as two-letter ISO-639-1 language code (e.g. 'en' or 'es').
+            Prompt = "sample prompt", // An optional text to guide the model's style or continue a previous audio segment.
+                                      // The prompt should match the audio language.
+            ResponseFormat = "json", // The format to return the transcribed text in.
+                                     // Supported formats are json, text, srt, verbose_json, or vtt. Default is 'json'.
+            Temperature = 0.3f, // The randomness of the generated text.
+                                // Select a value from 0.0 to 1.0. 0 is the default.
+        };
+
+        // Read audio content from a file
+        ReadOnlyMemory<byte> audioData = await File.ReadAllBytesAsync(AudioFilePath);
+        AudioContent audioContent = new(new BinaryData(audioData));
+
+        // Convert audio to text
+        var textContent = await audioToTextService.GetTextContentAsync(audioContent, executionSettings);
+
+        // Output the transcribed text
+        this.WriteLine(textContent.Text);
+    }
+
+    public Example28_Audio(ITestOutputHelper output) : base(output) { }
+}
diff --git a/dotnet/samples/KernelSyntaxExamples/KernelSyntaxExamples.csproj b/dotnet/samples/KernelSyntaxExamples/KernelSyntaxExamples.csproj
@@ -10,7 +10,7 @@
     <IsTestProject>true</IsTestProject>
     <IsPackable>false</IsPackable>
     <!-- Suppress: "Declare types in namespaces", "Require ConfigureAwait", "Experimental" -->
-    <NoWarn>CS8618,IDE0009,CA1051,CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0015,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102</NoWarn>
+    <NoWarn>CS8618,IDE0009,CA1051,CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0005,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0015,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102</NoWarn>
     <OutputType>Library</OutputType>
   </PropertyGroup>
   <ItemGroup>