Skip to content

Commit

Permalink
.Net Kernel Contents Graduation (#6319)
Browse files Browse the repository at this point in the history
### Motivation and Context

⚠️ Breaking changes on non-experimental types **ImageContent**

Resolves #5625
Resolves #5295

For a brief time this changes will keep the content below as
experimental.

- BinaryContent
- AudioContent
- ImageContent 
- FunctionCallContent
- FunctionResultContent

Changes:

### **BinaryContent** 
- Removed providers for lazy loading content, simplifying its usage and
APIs.
- Removed `Stream` constructor to avoid IDisposable resource consumption
or bad practices.
- Added `Uri` dedicated for Referenced Uri information
- Added `DataUri` property which can be set or get dynamically (auto
generated if you created the
content using byte array with a mime type) 
Setting a `DataUri` will automatically update the `MimeType` property
and add any extra metadata that may be available in the data scheme
definition.
- Added a required `mimeType` property to the ByteArray constructor, to
encourage passing the mimeType when creating BinaryContent directly or
from specializations.
- Added `Data` property which can be set or get dynamically (auto
generated if you created the content using a data uri format)
Setting a Data on an existing BinaryContent will also reflect on the
getter of `DataUri` for the given content.
- Added DataUri and Base64 validation when setting DataUri on the
contents.
- When using DataUri parameters those merge with the current content
metadata.
i.e:
`data:image/jpeg;parameter1=value1;parameter2=value2;base64,binary==`

### ⚠️ **ImageContent** Fixes bugs and inconsistency behavior:
- Setting the Data of an image doesn't change the current data uri and
vice versa, allowing the sema image content instance to have different
binary data to representations.
- When an Image could not have DataUri and Uri in the same instance,
this limits scenarios where you have the image data but want to have a
reference to where that content is from.
- Wasn't possible to create an Image from a data uri greater than the
size limit of .Net System.Uri type here:

[dotnet/runtime#96544.

### **FunctionResultContent**
- Update `Id` property to `CallId`.
  • Loading branch information
RogerBarreto committed Jun 10, 2024
1 parent f1f53f4 commit 737385c
Show file tree
Hide file tree
Showing 38 changed files with 2,123 additions and 278 deletions.
437 changes: 437 additions & 0 deletions docs/decisions/0045-kernel-content-graduation.md

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions dotnet/samples/Concepts/Agents/Legacy_AgentCharts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,9 @@ await foreach (var message in thread.InvokeAsync(agent, question))
var path = Path.Combine(Environment.CurrentDirectory, filename);
Console.WriteLine($"# {message.Role}: {message.Content}");
Console.WriteLine($"# {message.Role}: {path}");
var content = fileService.GetFileContent(message.Content);
var content = await fileService.GetFileContentAsync(message.Content);
await using var outputStream = File.OpenWrite(filename);
await using var inputStream = await content.GetStreamAsync();
await inputStream.CopyToAsync(outputStream);
await outputStream.WriteAsync(content.Data!.Value);
Process.Start(
new ProcessStartInfo
{
Expand Down
2 changes: 1 addition & 1 deletion dotnet/samples/Concepts/Agents/Legacy_AgentTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public async Task RunRetrievalToolAsync()
var fileService = kernel.GetRequiredService<OpenAIFileService>();
var result =
await fileService.UploadContentAsync(
new BinaryContent(() => Task.FromResult(EmbeddedResource.ReadStream("travelinfo.txt")!)),
new BinaryContent(await EmbeddedResource.ReadAllAsync("travelinfo.txt")!, "text/plain"),
new OpenAIFileUploadExecutionSettings("travelinfo.txt", OpenAIFilePurpose.Assistants));

var fileId = result.Id;
Expand Down
146 changes: 146 additions & 0 deletions dotnet/samples/Concepts/Agents/OpenAIAssistant_MultipleContents.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright (c) Microsoft. All rights reserved.
using Azure.AI.OpenAI.Assistants;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Agents;
using Microsoft.SemanticKernel.Agents.OpenAI;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Resources;

namespace Agents;

/// <summary>
/// Demonstrate using retrieval on <see cref="OpenAIAssistantAgent"/> .
/// </summary>
public class OpenAIAssistant_MultipleContents(ITestOutputHelper output) : BaseTest(output)
{
/// <summary>
/// Retrieval tool not supported on Azure OpenAI.
/// </summary>
protected override bool ForceOpenAI => true;

[Fact]
public async Task RunAsync()
{
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);

BinaryContent[] files = [
// Audio is not supported by Assistant API
// new AudioContent(await EmbeddedResource.ReadAllAsync("test_audio.wav")!, mimeType:"audio/wav", innerContent: "test_audio.wav"),
new ImageContent(await EmbeddedResource.ReadAllAsync("sample_image.jpg")!, mimeType: "image/jpeg") { InnerContent = "sample_image.jpg" },
new ImageContent(await EmbeddedResource.ReadAllAsync("test_image.jpg")!, mimeType: "image/jpeg") { InnerContent = "test_image.jpg" },
new BinaryContent(data: await EmbeddedResource.ReadAllAsync("travelinfo.txt"), mimeType: "text/plain")
{
InnerContent = "travelinfo.txt"
}
];

var fileIds = new List<string>();
foreach (var file in files)
{
try
{
var uploadFile = await fileService.UploadContentAsync(file,
new OpenAIFileUploadExecutionSettings(file.InnerContent!.ToString()!, Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose.Assistants));

fileIds.Add(uploadFile.Id);
}
catch (HttpOperationException hex)
{
Console.WriteLine(hex.ResponseContent);
Assert.Fail($"Failed to upload file: {hex.Message}");
}
}

// Define the agent
OpenAIAssistantAgent agent =
await OpenAIAssistantAgent.CreateAsync(
kernel: new(),
config: new(this.ApiKey, this.Endpoint),
new()
{
EnableRetrieval = true, // Enable retrieval
ModelId = this.Model,
// FileIds = fileIds Currently Assistant API only supports text files, no images or audio.
FileIds = [fileIds.Last()]
});

// Create a chat for agent interaction.
var chat = new AgentGroupChat();

// Respond to user input
try
{
await InvokeAgentAsync("Where did sam go?");
await InvokeAgentAsync("When does the flight leave Seattle?");
await InvokeAgentAsync("What is the hotel contact info at the destination?");
}
finally
{
await agent.DeleteAsync();
}

// Local function to invoke agent and display the conversation messages.
async Task InvokeAgentAsync(string input)
{
chat.AddChatMessage(new ChatMessageContent(AuthorRole.User, input));

Console.WriteLine($"# {AuthorRole.User}: '{input}'");

await foreach (var content in chat.InvokeAsync(agent))
{
Console.WriteLine($"# {content.Role} - {content.AuthorName ?? "*"}: '{content.Content}'");
}
}
}

[Fact]
public async Task SendingAndRetrievingFilesAsync()
{
var openAIClient = new AssistantsClient(TestConfiguration.OpenAI.ApiKey);
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);

BinaryContent[] files = [
new AudioContent(await EmbeddedResource.ReadAllAsync("test_audio.wav")!, mimeType: "audio/wav") { InnerContent = "test_audio.wav" },
new ImageContent(await EmbeddedResource.ReadAllAsync("sample_image.jpg")!, mimeType: "image/jpeg") { InnerContent = "sample_image.jpg" },
new ImageContent(await EmbeddedResource.ReadAllAsync("test_image.jpg")!, mimeType: "image/jpeg") { InnerContent = "test_image.jpg" },
new BinaryContent(data: await EmbeddedResource.ReadAllAsync("travelinfo.txt"), mimeType: "text/plain") { InnerContent = "travelinfo.txt" }
];

var fileIds = new Dictionary<string, BinaryContent>();
foreach (var file in files)
{
var result = await openAIClient.UploadFileAsync(new BinaryData(file.Data), Azure.AI.OpenAI.Assistants.OpenAIFilePurpose.FineTune);
fileIds.Add(result.Value.Id, file);
}

foreach (var file in (await openAIClient.GetFilesAsync(Azure.AI.OpenAI.Assistants.OpenAIFilePurpose.FineTune)).Value)
{
if (!fileIds.ContainsKey(file.Id))
{
continue;
}

var data = (await openAIClient.GetFileContentAsync(file.Id)).Value;

var mimeType = fileIds[file.Id].MimeType;
var fileName = fileIds[file.Id].InnerContent!.ToString();
var metadata = new Dictionary<string, object?> { ["id"] = file.Id };
var uri = new Uri($"https://api.openai.com/v1/files/{file.Id}/content");
var content = mimeType switch
{
"image/jpeg" => new ImageContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata },
"audio/wav" => new AudioContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata },
_ => new BinaryContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata }
};

Console.WriteLine($"File: {fileName} - {mimeType}");

// Images tostring are different from the graduated contents for retrocompatibility
Console.WriteLine(content.ToString());

// Delete the test file remotely
await openAIClient.DeleteFileAsync(file.Id);
}
}
}
5 changes: 1 addition & 4 deletions dotnet/samples/Concepts/Agents/OpenAIAssistant_Retrieval.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,9 @@ public async Task RunAsync()
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);

OpenAIFileReference uploadFile =
await fileService.UploadContentAsync(
new BinaryContent(() => Task.FromResult(EmbeddedResource.ReadStream("travelinfo.txt")!)),
await fileService.UploadContentAsync(new BinaryContent(await EmbeddedResource.ReadAllAsync("travelinfo.txt")!, "text/plain"),
new OpenAIFileUploadExecutionSettings("travelinfo.txt", OpenAIFilePurpose.Assistants));

Console.WriteLine(this.ApiKey);

// Define the agent
OpenAIAssistantAgent agent =
await OpenAIAssistantAgent.CreateAsync(
Expand Down
2 changes: 1 addition & 1 deletion dotnet/samples/Concepts/AudioToText/OpenAI_AudioToText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public async Task AudioToTextAsync()
// Read audio content from a file
await using var audioFileStream = EmbeddedResource.ReadStream(AudioFilename);
var audioFileBinaryData = await BinaryData.FromStreamAsync(audioFileStream!);
AudioContent audioContent = new(audioFileBinaryData);
AudioContent audioContent = new(audioFileBinaryData, mimeType: null);

// Convert audio to text
var textContent = await audioToTextService.GetTextContentAsync(audioContent, executionSettings);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class ChatHistorySerialization(ITestOutputHelper output) : BaseTest(outpu
/// with <see cref="ChatMessageContent"/> having SK various content types as items.
/// </summary>
[Fact]
public void SerializeChatHistoryWithSKContentTypes()
public async Task SerializeChatHistoryWithSKContentTypesAsync()
{
int[] data = [1, 2, 3];

Expand All @@ -27,10 +27,8 @@ public void SerializeChatHistoryWithSKContentTypes()
[
new TextContent("Discuss the potential long-term consequences for the Earth's ecosystem as well."),
new ImageContent(new Uri("https://fake-random-test-host:123")),
new BinaryContent(new BinaryData(data)),
#pragma warning disable SKEXP0001
new AudioContent(new BinaryData(data))
#pragma warning restore SKEXP0001
new BinaryContent(new BinaryData(data), "application/octet-stream"),
new AudioContent(new BinaryData(data), "application/octet-stream")
]
};

Expand All @@ -49,7 +47,7 @@ public void SerializeChatHistoryWithSKContentTypes()

Console.WriteLine($"Image content: {(deserializedMessage.Items![1]! as ImageContent)!.Uri}");

Console.WriteLine($"Binary content: {Encoding.UTF8.GetString((deserializedMessage.Items![2]! as BinaryContent)!.Content!.Value.Span)}");
Console.WriteLine($"Binary content: {Encoding.UTF8.GetString((deserializedMessage.Items![2]! as BinaryContent)!.Data!.Value.Span)}");

Console.WriteLine($"Audio content: {Encoding.UTF8.GetString((deserializedMessage.Items![3]! as AudioContent)!.Data!.Value.Span)}");

Expand Down
4 changes: 2 additions & 2 deletions dotnet/samples/Concepts/ChatCompletion/Google_GeminiVision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public async Task GoogleAIAsync()
new TextContent("What’s in this image?"),
// Google AI Gemini API requires the image to be in base64 format, doesn't support URI
// You have to always provide the mimeType for the image
new ImageContent(bytes) { MimeType = "image/jpeg" },
new ImageContent(bytes, "image/jpeg"),
]);

var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
Expand Down Expand Up @@ -109,7 +109,7 @@ public async Task VertexAIAsync()
new TextContent("What’s in this image?"),
// Vertex AI Gemini API supports both base64 and URI format
// You have to always provide the mimeType for the image
new ImageContent(bytes) { MimeType = "image/jpeg" },
new ImageContent(bytes, "image/jpeg"),
// The Cloud Storage URI of the image to include in the prompt.
// The bucket that stores the file must be in the same Google Cloud project that's sending the request.
// new ImageContent(new Uri("gs://generativeai-downloads/images/scones.jpg"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public async Task LocalImageAsync()
chatHistory.AddUserMessage(
[
new TextContent("What’s in this image?"),
new ImageContent(imageBytes) { MimeType = "image/jpg" }
new ImageContent(imageBytes, "image/jpg")
]);

var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ public async Task ImageToTextAsync()

// Read image content from a file
ReadOnlyMemory<byte> imageData = await EmbeddedResource.ReadAllAsync(ImageFilePath);
ImageContent imageContent = new(new BinaryData(imageData))
{
MimeType = "image/jpeg"
};
ImageContent imageContent = new(new BinaryData(imageData), "image/jpeg");

// Convert image to text
var textContent = await imageToText.GetTextContentAsync(imageContent, executionSettings);
Expand Down
5 changes: 1 addition & 4 deletions dotnet/samples/Demos/HuggingFaceImageToText/FormMain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,7 @@ private void UpdateImageDescription(string description)
/// <param name="pictureBox">The target <see cref="PictureBox"/>.</param>
/// <returns>Returns a <see cref="ImageContent"/>.</returns>
private static ImageContent CreateImageContentFromPictureBox(PictureBox pictureBox)
=> new(ConvertImageToReadOnlyMemory(pictureBox))
{
MimeType = GetMimeType(pictureBox.Tag?.ToString()!)
};
=> new(ConvertImageToReadOnlyMemory(pictureBox), GetMimeType(pictureBox.Tag?.ToString()!));

/// <summary>
/// Gets the image binary array from a <see cref="PictureBox"/>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public void FromChatHistoryImageAsImageContentItReturnsGeminiRequestWithChatHist
chatHistory.AddUserMessage(contentItems:
[new ImageContent(new Uri("https://example-image.com/")) { MimeType = "image/png" }]);
chatHistory.AddUserMessage(contentItems:
[new ImageContent(imageAsBytes) { MimeType = "image/png" }]);
[new ImageContent(imageAsBytes, "image/png")]);
var executionSettings = new GeminiPromptExecutionSettings();

// Act
Expand Down
11 changes: 8 additions & 3 deletions dotnet/src/Connectors/Connectors.OpenAI/AzureSdk/ClientCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -340,14 +340,19 @@ internal ClientCore(ILogger? logger = null)
CancellationToken cancellationToken)
{
Verify.NotNull(content.Data);
var audioData = content.Data.Value;
if (audioData.IsEmpty)
{
throw new ArgumentException("Audio data cannot be empty", nameof(content));
}

OpenAIAudioToTextExecutionSettings? audioExecutionSettings = OpenAIAudioToTextExecutionSettings.FromExecutionSettings(executionSettings);

Verify.ValidFilename(audioExecutionSettings?.Filename);

var audioOptions = new AudioTranscriptionOptions
{
AudioData = BinaryData.FromBytes(content.Data.Value),
AudioData = BinaryData.FromBytes(audioData),
DeploymentName = this.DeploymentOrModelName,
Filename = audioExecutionSettings.Filename,
Language = audioExecutionSettings.Language,
Expand Down Expand Up @@ -1241,13 +1246,13 @@ private static List<ChatRequestMessage> GetRequestMessages(ChatMessageContent me

if (resultContent.Result is Exception ex)
{
toolMessages.Add(new ChatRequestToolMessage($"Error: Exception while invoking function. {ex.Message}", resultContent.Id));
toolMessages.Add(new ChatRequestToolMessage($"Error: Exception while invoking function. {ex.Message}", resultContent.CallId));
continue;
}

var stringResult = ProcessFunctionResult(resultContent.Result ?? string.Empty, toolCallBehavior);

toolMessages.Add(new ChatRequestToolMessage(stringResult ?? string.Empty, resultContent.Id));
toolMessages.Add(new ChatRequestToolMessage(stringResult ?? string.Empty, resultContent.CallId));
}

if (toolMessages is not null)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFileService.GetFileContent(System.String,System.Threading.CancellationToken)</Target>
<Left>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFileService.GetFileContent(System.String,System.Threading.CancellationToken)</Target>
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
</Suppressions>
Loading

0 comments on commit 737385c

Please sign in to comment.