microsoft · giles17 · May 7, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml
@@ -273,6 +273,8 @@ jobs:
             -c ${{ matrix.configuration }} `
             --no-build -v Normal `
             --report-xunit-trx `
+            --report-junit `
+            --results-directory ../IntegrationTestResults/ `
             --ignore-exit-code 8 `
             --filter-not-trait "Category=IntegrationDisabled" `
             --filter-not-trait "Category=FoundryHostedAgents" `
@@ -294,6 +296,10 @@ jobs:
           AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
           AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZURE_AI_MODEL_DEPLOYMENT_NAME }}
           AZURE_AI_BING_CONNECTION_ID: ${{ vars.AZURE_AI_BING_CONNECTION_ID }}
+          # Anthropic Models
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          ANTHROPIC_CHAT_MODEL_NAME: ${{ vars.ANTHROPIC_CHAT_MODEL_NAME }}
+          ANTHROPIC_REASONING_MODEL_NAME: ${{ vars.ANTHROPIC_REASONING_MODEL_NAME }}
 
       # Generate test reports and check coverage
       - name: Generate test reports
@@ -316,6 +322,14 @@ jobs:
         shell: pwsh
         run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD
 
+      - name: Upload integration test results
+        if: always() && github.event_name != 'pull_request' && matrix.integration-tests
+        uses: actions/upload-artifact@v7
+        with:
+          name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }}
+          path: IntegrationTestResults/**/*.junit
+          if-no-files-found: ignore
+
   # The Foundry hosted-agent IT is costly (it builds a container, pushes to ACR, and provisions
   # live agents on a separate Foundry project). Running it in its own job keeps the overall
   # workflow time roughly flat: it executes in parallel to dotnet-build and dotnet-test and is
@@ -448,3 +462,64 @@ jobs:
         uses: actions/github-script@v8
         with:
           script: core.setFailed('Integration Tests Cancelled!')
+
+  # Integration test trend report (aggregates JUnit XML results from dotnet test jobs)
+  dotnet-integration-test-report:
+    name: Integration Test Report
+    if: >
+      always() &&
+      github.event_name != 'pull_request' &&
+      (contains(join(needs.*.result, ','), 'success') ||
+       contains(join(needs.*.result, ','), 'failure'))
+    needs: [dotnet-test]
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+          sparse-checkout: |
+            .github/actions/python-setup
+            python
+      - name: Set up python and install the project
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: "3.13"
+          os: ${{ runner.os }}
+      - name: Download all test results from current run
+        uses: actions/download-artifact@v4
+        with:
+          pattern: dotnet-test-results-*
+          path: dotnet-test-results/
+      - name: Restore report history cache
+        uses: actions/cache/restore@v4
+        with:
+          path: python/dotnet-integration-report-history.json
+          key: dotnet-integration-report-history-${{ github.run_id }}
+          restore-keys: |
+            dotnet-integration-report-history-
+      - name: Generate trend report
+        run: >
+          uv run python scripts/integration_test_report/aggregate.py
+          ../dotnet-test-results/
+          dotnet-integration-report-history.json
+          dotnet-integration-test-report.md
+      - name: Post to Job Summary
+        if: always()
+        run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY
+      - name: Save report history cache
+        if: always()
+        uses: actions/cache/save@v4
+        with:
+          path: python/dotnet-integration-report-history.json
+          key: dotnet-integration-report-history-${{ github.run_id }}
+      - name: Upload trend report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: dotnet-integration-test-report
+          path: |
+            python/dotnet-integration-test-report.md
+            python/dotnet-integration-report-history.json
diff --git a/dotnet/README.md b/dotnet/README.md
@@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram
 - [Design Documents](../docs/design)
 - [Architectural Decision Records](../docs/decisions)
 - [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview)
+
diff --git a/dotnet/tests/AnthropicChatCompletion.IntegrationTests/AnthropicChatCompletionFixture.cs b/dotnet/tests/AnthropicChatCompletion.IntegrationTests/AnthropicChatCompletionFixture.cs
@@ -17,9 +17,6 @@ namespace AnthropicChatCompletion.IntegrationTests;
 
 public class AnthropicChatCompletionFixture : IChatClientAgentFixture
 {
-    // All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
-    internal const string SkipReason = "Integrations tests for local execution only";
-
     private readonly bool _useReasoningModel;
     private readonly bool _useBeta;
 
@@ -105,7 +102,22 @@ public Task DeleteSessionAsync(AgentSession session) =>
 
     public async ValueTask InitializeAsync()
     {
-        Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
+        // Temporarily disabled: Anthropic SDK has a binary incompatibility with the current
+        // Microsoft.Extensions.AI version (WebSearchToolResultContent.Results method not found).
+        // See: https://github.com/microsoft/agent-framework/pull/5515
+        Assert.Skip("Anthropic integration tests temporarily disabled due to SDK incompatibility with Microsoft.Extensions.AI");
+
+        try
+        {
+            _ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey);
+            _ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
+            _ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicReasoningModelName);
+        }
+        catch (InvalidOperationException ex)
+        {
+            Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
+        }
+
         this._agent = await this.CreateChatClientAgentAsync();
     }
 

diff --git a/dotnet/tests/AnthropicChatCompletion.IntegrationTests/AnthropicSkillsIntegrationTests.cs b/dotnet/tests/AnthropicChatCompletion.IntegrationTests/AnthropicSkillsIntegrationTests.cs
@@ -1,5 +1,6 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+using System;
 using System.Threading.Tasks;
 using AgentConformance.IntegrationTests.Support;
 using Anthropic;
@@ -17,19 +18,28 @@ namespace AnthropicChatCompletion.IntegrationTests;
 /// Integration tests for Anthropic Skills functionality.
 /// These tests are designed to be run locally with a valid Anthropic API key.
 /// </summary>
+/// <remarks>
+/// Temporarily disabled due to Anthropic SDK binary incompatibility with
+/// the current Microsoft.Extensions.AI version (WebSearchToolResultContent.Results).
+/// </remarks>
+[Trait("Category", "IntegrationDisabled")]
 public sealed class AnthropicSkillsIntegrationTests
 {
-    // All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
-    private const string SkipReason = "Integrations tests for local execution only";
-
     [Fact]
     public async Task CreateAgentWithPptxSkillAsync()
     {
-        Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
-
-        // Arrange
-        AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
-        string model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
+        AnthropicClient? anthropicClient;
+        string? model;
+        try
+        {
+            anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
+            model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
+        }
+        catch (InvalidOperationException ex)
+        {
+            Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
+            return;
+        }
 
         BetaSkillParams pptxSkill = new()
         {
@@ -56,10 +66,16 @@ public async Task CreateAgentWithPptxSkillAsync()
     [Fact]
     public async Task ListAnthropicManagedSkillsAsync()
     {
-        Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
-
-        // Arrange
-        AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
+        AnthropicClient? anthropicClient;
+        try
+        {
+            anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
+        }
+        catch (InvalidOperationException ex)
+        {
+            Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
+            return;
+        }
 
         // Act
         SkillListPage skills = await anthropicClient.Beta.Skills.List(

diff --git a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs
@@ -13,8 +13,6 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
 [Trait("Category", "SampleValidation")]
 public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper) : SamplesValidationBase(outputHelper)
 {
-    private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";
-
     private static readonly string s_samplesPath = Path.GetFullPath(
         Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "..", "..", "samples", "04-hosting", "DurableAgents", "ConsoleApps"));
 
@@ -69,7 +67,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         });
     }
 
-    [Fact]
+    [RetryFact(2, 5000)]
     public async Task SingleAgentOrchestrationChainingSampleValidationAsync()
     {
         using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
@@ -105,7 +103,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         });
     }
 
-    [Fact]
+    [RetryFact(2, 5000)]
     public async Task MultiAgentConcurrencySampleValidationAsync()
     {
         using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
@@ -160,7 +158,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         });
     }
 
-    [Fact]
+    [RetryFact(2, 5000)]
     public async Task MultiAgentConditionalSampleValidationAsync()
     {
         using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
@@ -237,14 +235,14 @@ private async Task TestSpamDetectionAsync(
         Assert.True(foundSuccess, "Orchestration did not complete successfully.");
     }
 
-    [Fact(Skip = SkipFlakyTimingTest)]
+    [RetryFact(2, 5000)]
     public async Task SingleAgentOrchestrationHITLSampleValidationAsync()
     {
         string samplePath = Path.Combine(s_samplesPath, "05_AgentOrchestration_HITL");
 
         await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         {
-            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
+            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));
 
             // Start the HITL orchestration following the happy path from README
             await this.WriteInputAsync(process, "The Future of Artificial Intelligence", testTimeoutCts.Token);
@@ -260,7 +258,7 @@ await this.RunSampleTestAsync(samplePath, async (process, logs) =>
             while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
             {
                 // Look for notification that content is ready. The first time we see this, we should send a rejection.
-                // The second time we see this, we should send approval.
+                // Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
                 if (line.Contains("Content is ready for review", StringComparison.OrdinalIgnoreCase))
                 {
                     if (!rejectionSent)
@@ -275,20 +273,15 @@ await this.WriteInputAsync(
                             testTimeoutCts.Token);
                         rejectionSent = true;
                     }
-                    else if (!approvalSent)
+                    else
                     {
-                        // Prompt: Approve? (y/n):
+                        // Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
                         await this.WriteInputAsync(process, "y", testTimeoutCts.Token);
 
                         // Prompt: Feedback (optional):
                         await this.WriteInputAsync(process, "Looks good!", testTimeoutCts.Token);
                         approvalSent = true;
                     }
-                    else
-                    {
-                        // This should never happen
-                        Assert.Fail("Unexpected message found.");
-                    }
                 }
 
                 // Look for success message
@@ -311,14 +304,14 @@ await this.WriteInputAsync(
         });
     }
 
-    [Fact(Skip = SkipFlakyTimingTest)]
+    [RetryFact(2, 5000)]
     public async Task LongRunningToolsSampleValidationAsync()
     {
         string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools");
         await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         {
             // This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
-            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
+            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));
 
             // Test starting an agent that schedules a content generation orchestration
             await this.WriteInputAsync(
@@ -335,7 +328,7 @@ await this.WriteInputAsync(
             while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
             {
                 // Look for notification that content is ready. The first time we see this, we should send a rejection.
-                // The second time we see this, we should send approval.
+                // Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
                 if (line.Contains("NOTIFICATION: Please review the following content for approval", StringComparison.OrdinalIgnoreCase))
                 {
                     // Wait for the notification to be fully written to the console
@@ -350,20 +343,15 @@ await this.WriteInputAsync(
                             testTimeoutCts.Token);
                         rejectionSent = true;
                     }
-                    else if (!approvalSent)
+                    else
                     {
-                        // Approve the content. Note that we need to send a newline character to the console first before sending the input.
+                        // Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
                         await this.WriteInputAsync(
                             process,
                             "\nApprove the content",
                             testTimeoutCts.Token);
                         approvalSent = true;
                     }
-                    else
-                    {
-                        // This should never happen
-                        Assert.Fail("Unexpected message found.");
-                    }
                 }
 
                 // Look for success message
@@ -396,14 +384,14 @@ await this.WriteInputAsync(
         });
     }
 
-    [Fact(Skip = SkipFlakyTimingTest)]
+    [RetryFact(2, 5000)]
     public async Task ReliableStreamingSampleValidationAsync()
     {
         string samplePath = Path.Combine(s_samplesPath, "07_ReliableStreaming");
         await this.RunSampleTestAsync(samplePath, async (process, logs) =>
         {
             // This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
-            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
+            using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(150));
 
             // Test the agent endpoint with a simple prompt
             await this.WriteInputAsync(process, "Plan a 5-day trip to Seattle. Include daily activities.", testTimeoutCts.Token);

diff --git a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs
@@ -19,11 +19,9 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
 [Trait("Category", "Integration")]
 public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDisposable
 {
-    private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";
-
     private static readonly TimeSpan s_defaultTimeout = Debugger.IsAttached
         ? TimeSpan.FromMinutes(5)
-        : TimeSpan.FromSeconds(60);
+        : TimeSpan.FromSeconds(120);
 
     private static readonly IConfiguration s_configuration =
         new ConfigurationBuilder()
@@ -38,7 +36,7 @@ public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDispo
 
     public void Dispose() => this._cts.Dispose();
 
-    [Fact]
+    [RetryFact(2, 5000)]
     public async Task SimplePromptAsync()
     {
         // Setup
@@ -77,7 +75,7 @@ await simpleAgentProxy.RunAsync(
         Assert.Contains(agentLogs, log => log.EventId.Name == "LogAgentResponse");
     }
 
-    [Fact(Skip = SkipFlakyTimingTest)]
+    [RetryFact(2, 5000)]
     public async Task CallFunctionToolsAsync()
     {
         int weatherToolInvocationCount = 0;
@@ -129,7 +127,7 @@ string SuggestPackingList(string weather, bool isSunny)
         Assert.Equal(1, packingListToolInvocationCount);
     }
 
-    [Fact(Skip = SkipFlakyTimingTest)]
+    [RetryFact(2, 5000)]
     public async Task CallLongRunningFunctionToolsAsync()
     {
         [Description("Starts a greeting workflow and returns the workflow instance ID")]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram
		- [Design Documents](../docs/design)
		- [Architectural Decision Records](../docs/decisions)
		- [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview)