Skip to content

Commit

Permalink
.Net Add image support for Agent responses (#5113)
Browse files Browse the repository at this point in the history
### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

Update model and message to support image response.

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

With code-interpreter tool enabled, the need to support image based
responses is critical.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄
  • Loading branch information
crickman committed Feb 26, 2024
1 parent 2322d2d commit 6f46b04
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 3 deletions.
105 changes: 105 additions & 0 deletions dotnet/samples/KernelSyntaxExamples/Example85_AgentCharts.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Diagnostics;
using System.IO;
using System.Threading.Tasks;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Experimental.Agents;
using Xunit;
using Xunit.Abstractions;

namespace Examples;

// ReSharper disable once InconsistentNaming
/// <summary>
/// Showcase usage of code_interpreter and retrieval tools.
/// </summary>
public sealed class Example85_AgentCharts : BaseTest
{
/// <summary>
/// Specific model is required that supports agents and parallel function calling.
/// Currently this is limited to Open AI hosted services.
/// </summary>
private const string OpenAIFunctionEnabledModel = "gpt-4-1106-preview";

/// <summary>
/// Create a chart and retrieve by file_id.
/// </summary>
[Fact(Skip = "Launches external processes")]
public async Task CreateChartAsync()
{
this.WriteLine("======== Using CodeInterpreter tool ========");

if (TestConfiguration.OpenAI.ApiKey == null)
{
this.WriteLine("OpenAI apiKey not found. Skipping example.");
return;
}

this.WriteLine(Environment.CurrentDirectory);

var fileService = new OpenAIFileService(TestConfiguration.OpenAI.ApiKey);

var agent =
await new AgentBuilder()
.WithOpenAIChatCompletion(OpenAIFunctionEnabledModel, TestConfiguration.OpenAI.ApiKey)
.WithCodeInterpreter()
.BuildAsync();

try
{
var thread = await agent.NewThreadAsync();

await InvokeAgentAsync(
thread,
"1-first", @"
Display this data using a bar-chart:
Banding Brown Pink Yellow Sum
X00000 339 433 126 898
X00300 48 421 222 691
X12345 16 395 352 763
Others 23 373 156 552
Sum 426 1622 856 2904
");
await InvokeAgentAsync(thread, "2-colors", "Can you regenerate this same chart using the category names as the bar colors?");
await InvokeAgentAsync(thread, "3-line", "Can you regenerate this as a line chart?");
}
finally
{
await agent.DeleteAsync();
}

async Task InvokeAgentAsync(IAgentThread thread, string imageName, string question)
{
await foreach (var message in thread.InvokeAsync(agent, question))
{
if (message.ContentType == ChatMessageType.Image)
{
var filename = $"{imageName}.jpg";
var content = fileService.GetFileContent(message.Content);
await using var outputStream = File.OpenWrite(filename);
await using var inputStream = await content.GetStreamAsync();
await inputStream.CopyToAsync(outputStream);
var path = Path.Combine(Environment.CurrentDirectory, filename);
this.WriteLine($"# {message.Role}: {path}");
Process.Start(
new ProcessStartInfo
{
FileName = "cmd.exe",
Arguments = $"/C start {path}"
});
}
else
{
this.WriteLine($"# {message.Role}: {message.Content}");
}
}

this.WriteLine();
}
}

public Example85_AgentCharts(ITestOutputHelper output) : base(output) { }
}
21 changes: 21 additions & 0 deletions dotnet/src/Experimental/Agents/IChatMessage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@

namespace Microsoft.SemanticKernel.Experimental.Agents;

/// <summary>
/// $$$
/// </summary>
public enum ChatMessageType
{
/// <summary>
/// $$$
/// </summary>
Text,

/// <summary>
/// $$$
/// </summary>
Image,
}

/// <summary>
/// Represents a message that is part of an agent thread.
/// </summary>
Expand All @@ -20,6 +36,11 @@ public interface IChatMessage
/// </summary>
string? AgentId { get; }

/// <summary>
/// $$$
/// </summary>
ChatMessageType ContentType { get; }

/// <summary>
/// The chat message content.
/// </summary>
Expand Down
14 changes: 11 additions & 3 deletions dotnet/src/Experimental/Agents/Internal/ChatMessage.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
Expand All @@ -19,6 +20,9 @@ internal sealed class ChatMessage : IChatMessage
/// <inheritdoc/>
public string? AgentId { get; }

/// <inheritdoc/>
public ChatMessageType ContentType { get; }

/// <inheritdoc/>
public string Content { get; }

Expand All @@ -36,13 +40,17 @@ internal sealed class ChatMessage : IChatMessage
internal ChatMessage(ThreadMessageModel model)
{
var content = model.Content.First();
var text = content.Text?.Value ?? string.Empty;
this.Annotations = content.Text!.Annotations.Select(a => new Annotation(a.Text, a.StartIndex, a.EndIndex, a.FileCitation?.FileId ?? a.FilePath!.FileId, a.FileCitation?.Quote)).ToArray();

this.Annotations =
content.Text == null ?
Array.Empty<IAnnotation>() :
content.Text.Annotations.Select(a => new Annotation(a.Text, a.StartIndex, a.EndIndex, a.FileCitation?.FileId ?? a.FilePath!.FileId, a.FileCitation?.Quote)).ToArray();

this.Id = model.Id;
this.AgentId = string.IsNullOrWhiteSpace(model.AssistantId) ? null : model.AssistantId;
this.Role = model.Role;
this.Content = text;
this.ContentType = content.Text == null ? ChatMessageType.Image : ChatMessageType.Text;
this.Content = content.Text?.Value ?? content.Image?.FileId ?? string.Empty;
this.Properties = new ReadOnlyDictionary<string, object>(model.Metadata);
}

Expand Down
18 changes: 18 additions & 0 deletions dotnet/src/Experimental/Agents/Models/ThreadMessageModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,31 @@ public sealed class ContentModel
[JsonPropertyName("type")]
public string Type { get; set; } = string.Empty;

/// <summary>
/// Text context.
/// </summary>
[JsonPropertyName("image_file")]
public ImageContentModel? Image { get; set; }

/// <summary>
/// Text context.
/// </summary>
[JsonPropertyName("text")]
public TextContentModel? Text { get; set; }
}

/// <summary>
/// Text content.
/// </summary>
public sealed class ImageContentModel
{
/// <summary>
/// The image file identifier.
/// </summary>
[JsonPropertyName("file_id")]
public string FileId { get; set; } = string.Empty;
}

/// <summary>
/// Text content.
/// </summary>
Expand Down

0 comments on commit 6f46b04

Please sign in to comment.