Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
aec47dd
Add dotnet integration test report to CI
giles17 Apr 27, 2026
450eab4
chore: trigger dotnet CI for report validation
giles17 Apr 27, 2026
f48c8b3
fix: use .junit extension (not .junit.xml) for xunit v3 output
giles17 Apr 27, 2026
df49b91
fix: use deterministic provider-qualified keys for dotnet tests
giles17 Apr 27, 2026
4ff5130
fix: match Python report summary format (Total, passed/total, etc.)
giles17 Apr 27, 2026
2aee1d8
feat: split dotnet report into per-framework tables
giles17 Apr 27, 2026
d874408
Re-enable 7 flaky dotnet integration tests with increased timeouts
giles17 Apr 28, 2026
d42cfae
Re-skip LLM non-determinism flaky tests, keep timeout fixes
giles17 Apr 28, 2026
a0d45a0
Enable Anthropic integration tests in CI
giles17 Apr 28, 2026
d9c29ff
Fix missing System using in AnthropicSkillsIntegrationTests
giles17 Apr 28, 2026
1cc3413
Skip flaky SingleAgentOrchestrationChainingSampleValidationAsync
giles17 Apr 28, 2026
d6261ef
Re-enable HITL and LongRunningTools tests with timeout and flexibilit…
giles17 Apr 30, 2026
bfcbe69
Increase AzureFunctions LongRunningTools test timeouts from 90s to 180s
giles17 May 1, 2026
c1486c8
Merge remote-tracking branch 'upstream/main' into dotnet-test-report
giles17 May 1, 2026
8aa49be
Merge main and fix dotnet report path after flaky_report rename
giles17 May 1, 2026
721709f
Add RetryFact to DurableTask and AzureFunctions integration tests
giles17 May 1, 2026
caec7cb
Merge branch 'main' into dotnet-test-report
giles17 May 4, 2026
4288879
Add persist-credentials: false to Integration Test Report checkout step
giles17 May 4, 2026
9372fd7
Merge branch 'main' into dotnet-test-report
giles17 May 6, 2026
2aacb42
small fixes
giles17 May 6, 2026
458a378
Merge branch 'main' into dotnet-test-report
giles17 May 6, 2026
2867351
disable anthropic failing tests
giles17 May 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/workflows/dotnet-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ jobs:
-c ${{ matrix.configuration }} `
--no-build -v Normal `
--report-xunit-trx `
--report-junit `
--results-directory ../IntegrationTestResults/ `
--ignore-exit-code 8 `
--filter-not-trait "Category=IntegrationDisabled" `
--filter-not-trait "Category=FoundryHostedAgents" `
Expand All @@ -294,6 +296,10 @@ jobs:
AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZURE_AI_MODEL_DEPLOYMENT_NAME }}
AZURE_AI_BING_CONNECTION_ID: ${{ vars.AZURE_AI_BING_CONNECTION_ID }}
# Anthropic Models
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_CHAT_MODEL_NAME: ${{ vars.ANTHROPIC_CHAT_MODEL_NAME }}
ANTHROPIC_REASONING_MODEL_NAME: ${{ vars.ANTHROPIC_REASONING_MODEL_NAME }}

# Generate test reports and check coverage
- name: Generate test reports
Expand All @@ -316,6 +322,14 @@ jobs:
shell: pwsh
run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD

- name: Upload integration test results
if: always() && github.event_name != 'pull_request' && matrix.integration-tests
uses: actions/upload-artifact@v7
with:
name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }}
path: IntegrationTestResults/**/*.junit
if-no-files-found: ignore

# The Foundry hosted-agent IT is costly (it builds a container, pushes to ACR, and provisions
# live agents on a separate Foundry project). Running it in its own job keeps the overall
# workflow time roughly flat: it executes in parallel to dotnet-build and dotnet-test and is
Expand Down Expand Up @@ -448,3 +462,64 @@ jobs:
uses: actions/github-script@v8
with:
script: core.setFailed('Integration Tests Cancelled!')

# Integration test trend report (aggregates JUnit XML results from dotnet test jobs)
dotnet-integration-test-report:
name: Integration Test Report
if: >
always() &&
github.event_name != 'pull_request' &&
(contains(join(needs.*.result, ','), 'success') ||
contains(join(needs.*.result, ','), 'failure'))
needs: [dotnet-test]
runs-on: ubuntu-latest
defaults:
run:
working-directory: python
steps:
- uses: actions/checkout@v6
Comment thread
giles17 marked this conversation as resolved.
Comment thread
giles17 marked this conversation as resolved.
with:
persist-credentials: false
sparse-checkout: |
.github/actions/python-setup
python
- name: Set up python and install the project
uses: ./.github/actions/python-setup
with:
python-version: "3.13"
os: ${{ runner.os }}
- name: Download all test results from current run
uses: actions/download-artifact@v4
with:
pattern: dotnet-test-results-*
path: dotnet-test-results/
- name: Restore report history cache
uses: actions/cache/restore@v4
with:
path: python/dotnet-integration-report-history.json
key: dotnet-integration-report-history-${{ github.run_id }}
restore-keys: |
dotnet-integration-report-history-
- name: Generate trend report
run: >
uv run python scripts/integration_test_report/aggregate.py
../dotnet-test-results/
dotnet-integration-report-history.json
dotnet-integration-test-report.md
- name: Post to Job Summary
if: always()
run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save report history cache
if: always()
uses: actions/cache/save@v4
with:
path: python/dotnet-integration-report-history.json
key: dotnet-integration-report-history-${{ github.run_id }}
- name: Upload trend report
if: always()
uses: actions/upload-artifact@v7
with:
name: dotnet-integration-test-report
path: |
python/dotnet-integration-test-report.md
python/dotnet-integration-report-history.json
1 change: 1 addition & 0 deletions dotnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram
- [Design Documents](../docs/design)
- [Architectural Decision Records](../docs/decisions)
- [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview)

Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ namespace AnthropicChatCompletion.IntegrationTests;

public class AnthropicChatCompletionFixture : IChatClientAgentFixture
{
// All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
internal const string SkipReason = "Integrations tests for local execution only";

private readonly bool _useReasoningModel;
private readonly bool _useBeta;

Expand Down Expand Up @@ -105,7 +102,22 @@ public Task DeleteSessionAsync(AgentSession session) =>

public async ValueTask InitializeAsync()
{
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
// Temporarily disabled: Anthropic SDK has a binary incompatibility with the current
// Microsoft.Extensions.AI version (WebSearchToolResultContent.Results method not found).
// See: https://github.com/microsoft/agent-framework/pull/5515
Assert.Skip("Anthropic integration tests temporarily disabled due to SDK incompatibility with Microsoft.Extensions.AI");

try
{
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey);
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicReasoningModelName);
}
catch (InvalidOperationException ex)
{
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
}
Comment thread
giles17 marked this conversation as resolved.

this._agent = await this.CreateChatClientAgentAsync();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Threading.Tasks;
using AgentConformance.IntegrationTests.Support;
using Anthropic;
Expand All @@ -17,19 +18,28 @@ namespace AnthropicChatCompletion.IntegrationTests;
/// Integration tests for Anthropic Skills functionality.
/// These tests are designed to be run locally with a valid Anthropic API key.
/// </summary>
/// <remarks>
/// Temporarily disabled due to Anthropic SDK binary incompatibility with
/// the current Microsoft.Extensions.AI version (WebSearchToolResultContent.Results).
/// </remarks>
[Trait("Category", "IntegrationDisabled")]
public sealed class AnthropicSkillsIntegrationTests
{
// All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
private const string SkipReason = "Integrations tests for local execution only";

[Fact]
public async Task CreateAgentWithPptxSkillAsync()
{
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);

// Arrange
AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
string model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
AnthropicClient? anthropicClient;
string? model;
try
{
anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
}
catch (InvalidOperationException ex)
{
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
return;
}

BetaSkillParams pptxSkill = new()
{
Expand All @@ -56,10 +66,16 @@ public async Task CreateAgentWithPptxSkillAsync()
[Fact]
public async Task ListAnthropicManagedSkillsAsync()
{
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);

// Arrange
AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
AnthropicClient? anthropicClient;
try
{
anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
}
catch (InvalidOperationException ex)
{
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
return;
}

// Act
SkillListPage skills = await anthropicClient.Beta.Skills.List(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
[Trait("Category", "SampleValidation")]
public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper) : SamplesValidationBase(outputHelper)
{
private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";

private static readonly string s_samplesPath = Path.GetFullPath(
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "..", "..", "samples", "04-hosting", "DurableAgents", "ConsoleApps"));

Expand Down Expand Up @@ -69,7 +67,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
});
}

[Fact]
[RetryFact(2, 5000)]
public async Task SingleAgentOrchestrationChainingSampleValidationAsync()
{
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
Expand Down Expand Up @@ -105,7 +103,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
});
}

[Fact]
[RetryFact(2, 5000)]
public async Task MultiAgentConcurrencySampleValidationAsync()
{
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
Expand Down Expand Up @@ -160,7 +158,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
});
}

[Fact]
[RetryFact(2, 5000)]
public async Task MultiAgentConditionalSampleValidationAsync()
{
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
Expand Down Expand Up @@ -237,14 +235,14 @@ private async Task TestSpamDetectionAsync(
Assert.True(foundSuccess, "Orchestration did not complete successfully.");
}

[Fact(Skip = SkipFlakyTimingTest)]
[RetryFact(2, 5000)]
public async Task SingleAgentOrchestrationHITLSampleValidationAsync()
{
string samplePath = Path.Combine(s_samplesPath, "05_AgentOrchestration_HITL");

await this.RunSampleTestAsync(samplePath, async (process, logs) =>
{
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));

// Start the HITL orchestration following the happy path from README
await this.WriteInputAsync(process, "The Future of Artificial Intelligence", testTimeoutCts.Token);
Expand All @@ -260,7 +258,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
{
// Look for notification that content is ready. The first time we see this, we should send a rejection.
// The second time we see this, we should send approval.
// Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
if (line.Contains("Content is ready for review", StringComparison.OrdinalIgnoreCase))
{
if (!rejectionSent)
Expand All @@ -275,20 +273,15 @@ await this.WriteInputAsync(
testTimeoutCts.Token);
rejectionSent = true;
}
else if (!approvalSent)
else
{
// Prompt: Approve? (y/n):
// Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
await this.WriteInputAsync(process, "y", testTimeoutCts.Token);

// Prompt: Feedback (optional):
await this.WriteInputAsync(process, "Looks good!", testTimeoutCts.Token);
approvalSent = true;
}
else
{
// This should never happen
Assert.Fail("Unexpected message found.");
}
}

// Look for success message
Expand All @@ -311,14 +304,14 @@ await this.WriteInputAsync(
});
}

[Fact(Skip = SkipFlakyTimingTest)]
[RetryFact(2, 5000)]
public async Task LongRunningToolsSampleValidationAsync()
{
string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools");
await this.RunSampleTestAsync(samplePath, async (process, logs) =>
{
// This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));

// Test starting an agent that schedules a content generation orchestration
await this.WriteInputAsync(
Expand All @@ -335,7 +328,7 @@ await this.WriteInputAsync(
while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
{
// Look for notification that content is ready. The first time we see this, we should send a rejection.
// The second time we see this, we should send approval.
// Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
if (line.Contains("NOTIFICATION: Please review the following content for approval", StringComparison.OrdinalIgnoreCase))
{
// Wait for the notification to be fully written to the console
Expand All @@ -350,20 +343,15 @@ await this.WriteInputAsync(
testTimeoutCts.Token);
rejectionSent = true;
}
else if (!approvalSent)
else
{
// Approve the content. Note that we need to send a newline character to the console first before sending the input.
// Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
await this.WriteInputAsync(
process,
"\nApprove the content",
testTimeoutCts.Token);
approvalSent = true;
}
else
{
// This should never happen
Assert.Fail("Unexpected message found.");
}
}

// Look for success message
Expand Down Expand Up @@ -396,14 +384,14 @@ await this.WriteInputAsync(
});
}

[Fact(Skip = SkipFlakyTimingTest)]
[RetryFact(2, 5000)]
public async Task ReliableStreamingSampleValidationAsync()
{
string samplePath = Path.Combine(s_samplesPath, "07_ReliableStreaming");
await this.RunSampleTestAsync(samplePath, async (process, logs) =>
{
// This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(150));

// Test the agent endpoint with a simple prompt
await this.WriteInputAsync(process, "Plan a 5-day trip to Seattle. Include daily activities.", testTimeoutCts.Token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
[Trait("Category", "Integration")]
public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDisposable
{
private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";

private static readonly TimeSpan s_defaultTimeout = Debugger.IsAttached
? TimeSpan.FromMinutes(5)
: TimeSpan.FromSeconds(60);
: TimeSpan.FromSeconds(120);

private static readonly IConfiguration s_configuration =
new ConfigurationBuilder()
Expand All @@ -38,7 +36,7 @@ public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDispo

public void Dispose() => this._cts.Dispose();

[Fact]
[RetryFact(2, 5000)]
public async Task SimplePromptAsync()
{
// Setup
Expand Down Expand Up @@ -77,7 +75,7 @@ await simpleAgentProxy.RunAsync(
Assert.Contains(agentLogs, log => log.EventId.Name == "LogAgentResponse");
}

[Fact(Skip = SkipFlakyTimingTest)]
[RetryFact(2, 5000)]
public async Task CallFunctionToolsAsync()
{
int weatherToolInvocationCount = 0;
Expand Down Expand Up @@ -129,7 +127,7 @@ string SuggestPackingList(string weather, bool isSunny)
Assert.Equal(1, packingListToolInvocationCount);
}

[Fact(Skip = SkipFlakyTimingTest)]
[RetryFact(2, 5000)]
public async Task CallLongRunningFunctionToolsAsync()
{
[Description("Starts a greeting workflow and returns the workflow instance ID")]
Expand Down
Loading
Loading