-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
.Net ImageToText Abstraction + HuggingFace Connector ImageToText supp…
…ort. (#5150) ### Motivation and Context Add support for a new modality Image -> Text including a Hugging Face service for it. Resolves #4947 --------- Co-authored-by: Krzysztof Kasprowicz <60486987+Krzysztof318@users.noreply.github.com> Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
- Loading branch information
1 parent
0f97fdc
commit 0e10dd5
Showing
16 changed files
with
551 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
dotnet/samples/KernelSyntaxExamples/Example85_ImageToText.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
using System.Threading.Tasks; | ||
using Microsoft.SemanticKernel; | ||
using Microsoft.SemanticKernel.Connectors.HuggingFace; | ||
using Microsoft.SemanticKernel.ImageToText; | ||
using Resources; | ||
using Xunit; | ||
using Xunit.Abstractions; | ||
|
||
namespace Examples; | ||
|
||
/// <summary> | ||
/// Represents a class that demonstrates image-to-text functionality. | ||
/// </summary> | ||
public sealed class Example85_ImageToText : BaseTest | ||
{ | ||
private const string ImageToTextModel = "Salesforce/blip-image-captioning-base"; | ||
private const string ImageFilePath = "test_image.jpg"; | ||
|
||
[Fact] | ||
public async Task ImageToTextAsync() | ||
{ | ||
// Create a kernel with HuggingFace image-to-text service | ||
var kernel = Kernel.CreateBuilder() | ||
.AddHuggingFaceImageToText( | ||
model: ImageToTextModel, | ||
apiKey: TestConfiguration.HuggingFace.ApiKey) | ||
.Build(); | ||
|
||
var imageToText = kernel.GetRequiredService<IImageToTextService>(); | ||
|
||
// Set execution settings (optional) | ||
HuggingFacePromptExecutionSettings executionSettings = new() | ||
{ | ||
MaxTokens = 500 | ||
}; | ||
|
||
// Read image content from a file | ||
ReadOnlyMemory<byte> imageData = await EmbeddedResource.ReadAllAsync(ImageFilePath); | ||
ImageContent imageContent = new(new BinaryData(imageData, "image/jpeg")); | ||
|
||
// Convert image to text | ||
var textContent = await imageToText.GetTextContentAsync(imageContent, executionSettings); | ||
|
||
// Output image description | ||
this.WriteLine(textContent.Text); | ||
} | ||
|
||
public Example85_ImageToText(ITestOutputHelper output) : base(output) { } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
218 changes: 218 additions & 0 deletions
218
...rc/Connectors/Connectors.HuggingFace.UnitTests/ImageToText/HuggingFaceImageToTextTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
using System.Linq; | ||
using System.Net.Http; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using Microsoft.SemanticKernel; | ||
using Microsoft.SemanticKernel.Connectors.HuggingFace; | ||
using Microsoft.SemanticKernel.ImageToText; | ||
using Xunit; | ||
|
||
namespace SemanticKernel.Connectors.HuggingFace.UnitTests; | ||
|
||
/// <summary> | ||
/// Unit tests for <see cref="HuggingFaceImageToTextService"/> class. | ||
/// </summary> | ||
public sealed class HuggingFaceImageToTextTests : IDisposable | ||
{ | ||
private readonly HttpMessageHandlerStub _messageHandlerStub; | ||
private readonly HttpClient _httpClient; | ||
private readonly ImageContent _imageContentInput; | ||
|
||
public HuggingFaceImageToTextTests() | ||
{ | ||
this._messageHandlerStub = new HttpMessageHandlerStub(); | ||
this._messageHandlerStub.ResponseToReturn.Content = new StringContent(HuggingFaceTestHelper.GetTestResponse("imagetotext_test_response.json")); | ||
|
||
this._httpClient = new HttpClient(this._messageHandlerStub, false); | ||
|
||
var expectedPayload = HuggingFaceTestHelper.GetTestResponseBytes("imagetotext_test_request.jpg"); | ||
this._imageContentInput = new ImageContent(new BinaryData(expectedPayload, "image/jpeg"), "model"); | ||
} | ||
|
||
[Fact] | ||
public async Task SpecifiedModelShouldBeUsedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.EndsWith("/fake-model", this._messageHandlerStub.RequestUri?.AbsoluteUri, StringComparison.OrdinalIgnoreCase); | ||
} | ||
|
||
[Fact] | ||
public async Task NoAuthorizationHeaderShouldBeAddedIfApiKeyIsNotProvidedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", apiKey: null, httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.False(this._messageHandlerStub.RequestHeaders?.Contains("Authorization")); | ||
} | ||
|
||
[Fact] | ||
public async Task AuthorizationHeaderShouldBeAddedIfApiKeyIsProvidedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", apiKey: "fake-api-key", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.True(this._messageHandlerStub.RequestHeaders?.Contains("Authorization")); | ||
|
||
var values = this._messageHandlerStub.RequestHeaders!.GetValues("Authorization"); | ||
|
||
var value = values.SingleOrDefault(); | ||
Assert.Equal("Bearer fake-api-key", value); | ||
} | ||
|
||
[Fact] | ||
public async Task UserAgentHeaderShouldBeUsedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.True(this._messageHandlerStub.RequestHeaders?.Contains("User-Agent")); | ||
|
||
var values = this._messageHandlerStub.RequestHeaders!.GetValues("User-Agent"); | ||
|
||
var value = values.SingleOrDefault(); | ||
Assert.Equal("Semantic-Kernel", value); | ||
} | ||
|
||
[Fact] | ||
public async Task ProvidedEndpointShouldBeUsedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", endpoint: new Uri("https://fake-random-test-host/fake-path"), httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.StartsWith("https://fake-random-test-host/fake-path", this._messageHandlerStub.RequestUri?.AbsoluteUri, StringComparison.OrdinalIgnoreCase); | ||
} | ||
|
||
[Fact] | ||
public async Task HttpClientBaseAddressShouldBeUsedAsync() | ||
{ | ||
//Arrange | ||
this._httpClient.BaseAddress = new Uri("https://fake-random-test-host/fake-path"); | ||
|
||
var sut = new HuggingFaceImageToTextService("fake-model", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.StartsWith("https://fake-random-test-host/fake-path", this._messageHandlerStub.RequestUri?.AbsoluteUri, StringComparison.OrdinalIgnoreCase); | ||
} | ||
|
||
[Fact] | ||
public async Task DefaultAddressShouldBeUsedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.StartsWith("https://api-inference.huggingface.co/models", this._messageHandlerStub.RequestUri?.AbsoluteUri, StringComparison.OrdinalIgnoreCase); | ||
} | ||
|
||
[Fact] | ||
public async Task ModelUrlShouldBeBuiltSuccessfullyAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", endpoint: new Uri("https://fake-random-test-host/fake-path"), httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.Equal("https://fake-random-test-host/fake-path/models/fake-model", this._messageHandlerStub.RequestUri?.AbsoluteUri); | ||
} | ||
|
||
[Fact] | ||
public async Task ShouldSendPromptToServiceAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", httpClient: this._httpClient); | ||
|
||
//Act | ||
await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
var requestPayload = this._messageHandlerStub.RequestContent; | ||
|
||
Assert.NotNull(requestPayload); | ||
Assert.Equal(this._imageContentInput.Data!.ToArray(), requestPayload); | ||
} | ||
|
||
[Fact] | ||
public async Task ShouldHandleServiceResponseAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", endpoint: new Uri("https://fake-random-test-host/fake-path"), httpClient: this._httpClient); | ||
var expectedPayload = HuggingFaceTestHelper.GetTestResponseBytes("imagetotext_test_request.jpg"); | ||
|
||
//Act | ||
var contents = await sut.GetTextContentsAsync(this._imageContentInput); | ||
|
||
//Assert | ||
Assert.NotNull(contents); | ||
|
||
var content = contents.SingleOrDefault(); | ||
Assert.NotNull(content); | ||
Assert.Equal("This is test completion response", content.Text); | ||
} | ||
|
||
[Fact] | ||
public async Task GetTextContentsShouldHaveModelIdDefinedAsync() | ||
{ | ||
//Arrange | ||
var sut = new HuggingFaceImageToTextService("fake-model", endpoint: new Uri("https://fake-random-test-host/fake-path"), httpClient: this._httpClient); | ||
|
||
//Act | ||
var contents = await sut.GetTextContentsAsync(this._imageContentInput); | ||
this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) | ||
{ | ||
Content = new StringContent(@" | ||
[ | ||
{ | ||
""generated_text"": ""Why the sky is blue? | Dept. of Science & Mathematics Education | University of Notre Dame\nWhen I was in high school I had a pretty simple conception of reality. I believed that if something made sense to me, then it must also be true. I believed that some problems were so fundamental that I couldn’t understand"" | ||
} | ||
]", | ||
Encoding.UTF8, | ||
"application/json") | ||
}; | ||
|
||
// Act | ||
var textContent = await sut.GetTextContentAsync(this._imageContentInput); | ||
|
||
// Assert | ||
Assert.NotNull(textContent.ModelId); | ||
Assert.Equal("fake-model", textContent.ModelId); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
this._httpClient.Dispose(); | ||
this._messageHandlerStub.Dispose(); | ||
} | ||
} |
Binary file added
BIN
+36.9 KB
...nnectors/Connectors.HuggingFace.UnitTests/TestData/imagetotext_test_request.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions
5
...t/src/Connectors/Connectors.HuggingFace.UnitTests/TestData/imagetotext_test_response.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[ | ||
{ | ||
"generated_text": "This is test completion response" | ||
} | ||
] |
2 changes: 1 addition & 1 deletion
2
dotnet/src/Connectors/Connectors.HuggingFace/Connectors.HuggingFace.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.