san360 · san360 · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.env.example b/.env.example
@@ -1,3 +1,9 @@
+# Identity (Service Principal) — populated by bootstrap.sh
+AZURE_CLIENT_ID=<app-registration-client-id>
+AZURE_TENANT_ID=<entra-tenant-id>
+AZURE_SUBSCRIPTION_ID=<azure-subscription-id>
+SP_OBJECT_ID=<service-principal-object-id>
+
 # Azure AI Foundry endpoints (no secrets — auth is via OIDC or az login)
 FOUNDRY_TEST_ENDPOINT=https://eastus.api.azureml.ms/foundry/v1/subscriptions/<sub>/resourceGroups/<rg>/projects/<test-project>
 FOUNDRY_PROD_ENDPOINT=https://eastus.api.azureml.ms/foundry/v1/subscriptions/<sub>/resourceGroups/<rg>/projects/<prod-project>
@@ -7,3 +13,9 @@ GPT_DEPLOYMENT=gpt-4o-2024-11-20
 
 # Bing Grounding connection name configured in the Foundry project
 BING_CONNECTION_NAME=bing-grounding
+
+# Resource metadata
+RESOURCE_GROUP=<resource-group-name>
+LOCATION=swedencentral
+ACCOUNT_NAME=<foundry-account-name>
+GITHUB_REPO=<owner/repo>
diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
@@ -73,8 +73,8 @@ jobs:
         with:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
-          agent-ids:                 "tech-trends-agent:latest"
-          data-path:                 "./evals/golden-dataset.jsonl"
+          agent-ids:                 "tech-trends-agent:${{ steps.deploy.outputs.agent_version }}"
+          data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "all-scores"
 
       - name: Post evaluation summary to PR

diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
@@ -16,20 +16,44 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
       - uses: azure/login@v2
         with:
           client-id:       ${{ vars.AZURE_CLIENT_ID }}
           tenant-id:       ${{ vars.AZURE_TENANT_ID }}
           subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
 
+      - name: Get latest agent version
+        id: agent
+        env:
+          FOUNDRY_PROD_ENDPOINT: ${{ vars.FOUNDRY_PROD_ENDPOINT }}
+        run: |
+          VERSION=$(python3 -c "
+          import os
+          from azure.ai.projects import AIProjectClient
+          from azure.identity import DefaultAzureCredential
+          client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential())
+          agent = client.agents.get_agent('tech-trends-agent')
+          print(agent.version)
+          ")
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+
       - name: Run evaluation against production agent
         id: eval
         uses: microsoft/ai-agent-evals@v3-beta
         with:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_PROD_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
-          agent-ids:                 "tech-trends-agent:latest"
-          data-path:                 "./evals/golden-dataset.jsonl"
+          agent-ids:                 "tech-trends-agent:${{ steps.agent.outputs.version }}"
+          data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "default"
 
       - name: Open GitHub issue if scores degraded

diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ build/
 
 # Environment
 .env
+.bootstrap-state.json
 
 # IDE
 .vscode/

diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
@@ -5,11 +5,11 @@
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "bing_grounding" }
+      { "type": "web_search" }
     ]
   },
   "eval": {
-    "dataset": "evals/golden-dataset.jsonl",
+    "dataset": "evals/golden-dataset.json",
     "phase_filter": "1",
     "config": "evals/eval-config.json"
   },

diff --git a/evals/eval-config.json b/evals/eval-config.json
@@ -1,16 +1,16 @@
 {
   "evaluators": [
-    "TaskAdherenceEvaluator",
-    "RelevanceEvaluator",
-    "GroundednessEvaluator",
-    "CoherenceEvaluator"
+    "builtin.task_adherence",
+    "builtin.relevance",
+    "builtin.groundedness",
+    "builtin.coherence"
   ],
   "thresholds": {
     "task_adherence": 0.80,
     "relevance": 0.75,
     "groundedness": 0.75,
     "coherence": 0.80
   },
-  "phase_filter": null,
-  "notes": "Set phase_filter to '1' or '2' to run only phase-specific cases. null runs all."
+  "phase_filter": "1",
+  "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded."
 }
diff --git a/evals/golden-dataset.json b/evals/golden-dataset.json
@@ -0,0 +1,19 @@
+{
+  "name": "tech-trends-agent-eval",
+  "evaluators": [
+    "builtin.task_adherence",
+    "builtin.relevance",
+    "builtin.groundedness",
+    "builtin.coherence"
+  ],
+  "data": [
+    {"query": "What are the top three AI model releases in the last 90 days and their key capabilities?", "ground_truth": "Response should identify at least 3 recent model releases with specific capability descriptions, cite web sources, and be structured with a summary and key findings section.", "phase": "1", "category": "trend_research"},
+    {"query": "How is the major cloud provider landscape shifting in 2025 regarding AI infrastructure?", "ground_truth": "Response should cover at least 2 major cloud providers, discuss AI infrastructure investment or announcements, and cite current sources.", "phase": "1", "category": "trend_research"},
+    {"query": "What is the current state of open-source LLM adoption in enterprise settings?", "ground_truth": "Response should address enterprise adoption, mention specific models or frameworks, and provide balanced perspective on open vs closed source.", "phase": "1", "category": "market_analysis"},
+    {"query": "Summarise recent developer sentiment around AI coding tools based on community discussions.", "ground_truth": "Response should reflect actual developer perspectives, not vendor claims, and cite community sources such as surveys, forums or publications.", "phase": "1", "category": "community_sentiment"},
+    {"query": "What are analysts predicting for AI chip demand over the next 12 months?", "ground_truth": "Response should include analyst predictions, reference specific companies or market segments, and note the source and date of predictions.", "phase": "1", "category": "market_forecast"},
+    {"query": "Search for the latest GPU benchmark comparison data and calculate which GPU offers the best performance-per-dollar ratio based on the data you find.", "ground_truth": "Response should retrieve actual benchmark data, perform a calculation or comparison, present results in a structured format, and cite the data source.", "phase": "2", "category": "data_analysis"},
+    {"query": "Find recent AI model API pricing tables and produce a comparison showing cost per million tokens for at least 4 models.", "ground_truth": "Response should retrieve current pricing data, present a structured comparison table, and identify the most cost-effective option for different use cases.", "phase": "2", "category": "data_analysis"},
+    {"query": "Retrieve the latest Stack Overflow developer survey data on AI tool usage and compute the percentage increase in adoption compared to the prior year.", "ground_truth": "Response should locate survey data, perform a percentage calculation, present the result clearly, and note the source and survey year.", "phase": "2", "category": "data_analysis"}
+  ]
+}
diff --git a/evals/golden-dataset.jsonl b/evals/golden-dataset.jsonl
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -23,7 +23,7 @@ param gptModelVersion string = '2024-11-20'
 param gptCapacity int = 30
 
 // --- Cognitive Services account (hosts the Foundry project) ---
-resource aiAccount 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
+resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-06-01' = {
   name: accountName
   location: location
   kind: 'AIServices'
@@ -33,19 +33,20 @@ resource aiAccount 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
   properties: {
     customSubDomainName: accountName
     publicNetworkAccess: 'Enabled'
+    allowProjectManagement: true
   }
 }
 
 // --- AI Project ---
-resource aiProject 'Microsoft.CognitiveServices/accounts/projects@2024-10-01' = {
+resource aiProject 'Microsoft.CognitiveServices/accounts/projects@2025-06-01' = {
   parent: aiAccount
   name: projectName
   location: location
   properties: {}
 }
 
 // --- GPT model deployment ---
-resource gptDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = {
+resource gptDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-06-01' = {
   parent: aiAccount
   name: gptDeploymentName
   sku: {
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,7 @@ build/ @@
     # Environment
     .env
+    .bootstrap-state.json
     # IDE
     .vscode/
@@ Expand Down @@