From 52eecbed7d00eb5a574c468fd13b819766b921c8 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:32:00 +0200
Subject: [PATCH 1/8] =?UTF-8?q?feat:=20Phase=201=20=E2=80=94=20tech=20tren?=
 =?UTF-8?q?ds=20agent=20with=20web=20search=20(Bing=20Grounding)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 evals/eval-config.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/evals/eval-config.json b/evals/eval-config.json
index 971e043..bba838d 100644
--- a/evals/eval-config.json
+++ b/evals/eval-config.json
@@ -11,6 +11,6 @@
     "groundedness": 0.75,
     "coherence": 0.80
   },
-  "phase_filter": null,
-  "notes": "Set phase_filter to '1' or '2' to run only phase-specific cases. null runs all."
+  "phase_filter": "1",
+  "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded."
 }

From d419a15caa0be3932ba038c2648c4747ddcbacfa Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:40:24 +0200
Subject: [PATCH 2/8] fix: switch from BingGroundingTool to WebSearchTool (no
 connection required)

- Replace bing_grounding tool with web_search in agent config and deploy script
- WebSearchTool requires no Bing resource or connection setup
- Update Bicep API version to 2025-06-01 and default region to swedencentral
- Add --skip-foundry flag to bootstrap for using existing Foundry projects
- Fix RBAC role assignment to use role definition ID for Azure AI User
- Add SPN and resource metadata to .env output
---
 .env.example                              |  12 ++
 .gitignore                                |   1 +
 agents/tech-trends-agent.json             |   2 +-
 infra/main.bicep                          |   7 +-
 scripts/bootstrap.sh                      | 148 +++++++++++++++-------
 scripts/deploy_agent.py                   |  22 ++--
 scripts/lifecycle/01-phase1-web-search.sh |   8 +-
 7 files changed, 136 insertions(+), 64 deletions(-)

diff --git a/.env.example b/.env.example
index c9a24b9..2d2f0d1 100644
--- a/.env.example
+++ b/.env.example
@@ -1,3 +1,9 @@
+# Identity (Service Principal) — populated by bootstrap.sh
+AZURE_CLIENT_ID=<app-registration-client-id>
+AZURE_TENANT_ID=<entra-tenant-id>
+AZURE_SUBSCRIPTION_ID=<azure-subscription-id>
+SP_OBJECT_ID=<service-principal-object-id>
+
 # Azure AI Foundry endpoints (no secrets — auth is via OIDC or az login)
 FOUNDRY_TEST_ENDPOINT=https://eastus.api.azureml.ms/foundry/v1/subscriptions/<sub>/resourceGroups/<rg>/projects/<test-project>
 FOUNDRY_PROD_ENDPOINT=https://eastus.api.azureml.ms/foundry/v1/subscriptions/<sub>/resourceGroups/<rg>/projects/<prod-project>
@@ -7,3 +13,9 @@ GPT_DEPLOYMENT=gpt-4o-2024-11-20
 
 # Bing Grounding connection name configured in the Foundry project
 BING_CONNECTION_NAME=bing-grounding
+
+# Resource metadata
+RESOURCE_GROUP=<resource-group-name>
+LOCATION=swedencentral
+ACCOUNT_NAME=<foundry-account-name>
+GITHUB_REPO=<owner/repo>
diff --git a/.gitignore b/.gitignore
index e617638..a15918c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ build/
 
 # Environment
 .env
+.bootstrap-state.json
 
 # IDE
 .vscode/
diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
index 6a095ca..fb6c08b 100644
--- a/agents/tech-trends-agent.json
+++ b/agents/tech-trends-agent.json
@@ -5,7 +5,7 @@
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "bing_grounding" }
+      { "type": "web_search" }
     ]
   },
   "eval": {
diff --git a/infra/main.bicep b/infra/main.bicep
index 83d6851..8e7346d 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -23,7 +23,7 @@ param gptModelVersion string = '2024-11-20'
 param gptCapacity int = 30
 
 // --- Cognitive Services account (hosts the Foundry project) ---
-resource aiAccount 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
+resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-06-01' = {
   name: accountName
   location: location
   kind: 'AIServices'
@@ -33,11 +33,12 @@ resource aiAccount 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
   properties: {
     customSubDomainName: accountName
     publicNetworkAccess: 'Enabled'
+    allowProjectManagement: true
   }
 }
 
 // --- AI Project ---
-resource aiProject 'Microsoft.CognitiveServices/accounts/projects@2024-10-01' = {
+resource aiProject 'Microsoft.CognitiveServices/accounts/projects@2025-06-01' = {
   parent: aiAccount
   name: projectName
   location: location
@@ -45,7 +46,7 @@ resource aiProject 'Microsoft.CognitiveServices/accounts/projects@2024-10-01' =
 }
 
 // --- GPT model deployment ---
-resource gptDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = {
+resource gptDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-06-01' = {
   parent: aiAccount
   name: gptDeploymentName
   sku: {
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index cd2d005..4763408 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -4,8 +4,8 @@
 #
 # Provisions:
 #   1. Resource group
-#   2. TEST Foundry project (Bicep)
-#   3. PROD Foundry project (Bicep)
+#   2. TEST Foundry project (Bicep) — skipped with --skip-foundry
+#   3. PROD Foundry project (Bicep) — skipped with --skip-foundry
 #   4. App Registration + Service Principal
 #   5. 3 Federated credentials (main, PR, tags)
 #   6. RBAC role assignments
@@ -16,15 +16,23 @@
 # Usage:
 #   ./scripts/bootstrap.sh \
 #     --resource-group rg-agent-devops \
-#     --location eastus \
+#     --location swedencentral \
 #     --account-name agentdevops \
 #     --github-repo san360/agent-devops
+#
+#   # Skip Foundry project creation (use existing projects):
+#   ./scripts/bootstrap.sh \
+#     --resource-group rg-agent-devops \
+#     --account-name agentdevops \
+#     --skip-foundry \
+#     --test-endpoint "https://..." \
+#     --prod-endpoint "https://..."
 
 set -euo pipefail
 
 # ---------- defaults ----------
 RESOURCE_GROUP=""
-LOCATION="eastus"
+LOCATION="swedencentral"
 ACCOUNT_NAME=""
 GITHUB_REPO="san360/agent-devops"
 GPT_MODEL_NAME="gpt-4o"
@@ -32,22 +40,34 @@ GPT_MODEL_VERSION="2024-11-20"
 GPT_DEPLOYMENT_NAME="gpt-4o-2024-11-20"
 GPT_CAPACITY=30
 BING_CONNECTION_NAME="bing-grounding"
+SKIP_FOUNDRY=false
+TEST_ENDPOINT=""
+PROD_ENDPOINT=""
 
 # ---------- parse args ----------
 while [[ $# -gt 0 ]]; do
   case $1 in
-    --resource-group)  RESOURCE_GROUP="$2";  shift 2 ;;
-    --location)        LOCATION="$2";        shift 2 ;;
-    --account-name)    ACCOUNT_NAME="$2";    shift 2 ;;
-    --github-repo)     GITHUB_REPO="$2";     shift 2 ;;
+    --resource-group)  RESOURCE_GROUP="$2";      shift 2 ;;
+    --location)        LOCATION="$2";            shift 2 ;;
+    --account-name)    ACCOUNT_NAME="$2";        shift 2 ;;
+    --github-repo)     GITHUB_REPO="$2";         shift 2 ;;
     --gpt-deployment)  GPT_DEPLOYMENT_NAME="$2"; shift 2 ;;
-    --gpt-capacity)    GPT_CAPACITY="$2";    shift 2 ;;
+    --gpt-capacity)    GPT_CAPACITY="$2";        shift 2 ;;
+    --skip-foundry)    SKIP_FOUNDRY=true;        shift   ;;
+    --test-endpoint)   TEST_ENDPOINT="$2";       shift 2 ;;
+    --prod-endpoint)   PROD_ENDPOINT="$2";       shift 2 ;;
     *)                 echo "Unknown flag: $1"; exit 1 ;;
   esac
 done
 
 if [[ -z "$RESOURCE_GROUP" || -z "$ACCOUNT_NAME" ]]; then
   echo "Usage: $0 --resource-group <rg> --account-name <name> [--location <loc>] [--github-repo <owner/repo>]"
+  echo "       Add --skip-foundry --test-endpoint <url> --prod-endpoint <url> to use existing projects"
+  exit 1
+fi
+
+if [[ "$SKIP_FOUNDRY" == true && ( -z "$TEST_ENDPOINT" || -z "$PROD_ENDPOINT" ) ]]; then
+  echo "ERROR: --skip-foundry requires both --test-endpoint and --prod-endpoint"
   exit 1
 fi
 
@@ -61,8 +81,14 @@ echo "============================================"
 echo " Resource Group:  $RESOURCE_GROUP"
 echo " Location:        $LOCATION"
 echo " Account Name:    $ACCOUNT_NAME"
-echo " Test Project:    $TEST_PROJECT"
-echo " Prod Project:    $PROD_PROJECT"
+echo " Skip Foundry:    $SKIP_FOUNDRY"
+if [[ "$SKIP_FOUNDRY" == true ]]; then
+  echo " TEST endpoint:   $TEST_ENDPOINT (provided)"
+  echo " PROD endpoint:   $PROD_ENDPOINT (provided)"
+else
+  echo " Test Project:    $TEST_PROJECT"
+  echo " Prod Project:    $PROD_PROJECT"
+fi
 echo " GitHub Repo:     $GITHUB_REPO"
 echo " GPT Deployment:  $GPT_DEPLOYMENT_NAME"
 echo "============================================"
@@ -75,39 +101,43 @@ az group create \
   --location "$LOCATION" \
   --output none
 
-# ---------- Step 2: Deploy TEST project ----------
-echo "[2/7] Deploying TEST Foundry project..."
-TEST_OUTPUT=$(az deployment group create \
-  --resource-group "$RESOURCE_GROUP" \
-  --template-file infra/main.bicep \
-  --parameters \
-    accountName="${ACCOUNT_NAME}test" \
-    projectName="$TEST_PROJECT" \
-    gptDeploymentName="$GPT_DEPLOYMENT_NAME" \
-    gptModelName="$GPT_MODEL_NAME" \
-    gptModelVersion="$GPT_MODEL_VERSION" \
-    gptCapacity="$GPT_CAPACITY" \
-  --output json)
-
-TEST_ENDPOINT=$(echo "$TEST_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
-echo "  TEST endpoint: $TEST_ENDPOINT"
-
-# ---------- Step 3: Deploy PROD project ----------
-echo "[3/7] Deploying PROD Foundry project..."
-PROD_OUTPUT=$(az deployment group create \
-  --resource-group "$RESOURCE_GROUP" \
-  --template-file infra/main.bicep \
-  --parameters \
-    accountName="${ACCOUNT_NAME}prod" \
-    projectName="$PROD_PROJECT" \
-    gptDeploymentName="$GPT_DEPLOYMENT_NAME" \
-    gptModelName="$GPT_MODEL_NAME" \
-    gptModelVersion="$GPT_MODEL_VERSION" \
-    gptCapacity="$GPT_CAPACITY" \
-  --output json)
-
-PROD_ENDPOINT=$(echo "$PROD_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
-echo "  PROD endpoint: $PROD_ENDPOINT"
+# ---------- Step 2 & 3: Deploy Foundry projects (or skip) ----------
+if [[ "$SKIP_FOUNDRY" == true ]]; then
+  echo "[2/7] Skipping TEST Foundry project (using provided endpoint)"
+  echo "[3/7] Skipping PROD Foundry project (using provided endpoint)"
+else
+  echo "[2/7] Deploying TEST Foundry project..."
+  TEST_OUTPUT=$(az deployment group create \
+    --resource-group "$RESOURCE_GROUP" \
+    --template-file infra/main.bicep \
+    --parameters \
+      accountName="${ACCOUNT_NAME}test" \
+      projectName="$TEST_PROJECT" \
+      gptDeploymentName="$GPT_DEPLOYMENT_NAME" \
+      gptModelName="$GPT_MODEL_NAME" \
+      gptModelVersion="$GPT_MODEL_VERSION" \
+      gptCapacity="$GPT_CAPACITY" \
+    --output json)
+
+  TEST_ENDPOINT=$(echo "$TEST_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
+  echo "  TEST endpoint: $TEST_ENDPOINT"
+
+  echo "[3/7] Deploying PROD Foundry project..."
+  PROD_OUTPUT=$(az deployment group create \
+    --resource-group "$RESOURCE_GROUP" \
+    --template-file infra/main.bicep \
+    --parameters \
+      accountName="${ACCOUNT_NAME}prod" \
+      projectName="$PROD_PROJECT" \
+      gptDeploymentName="$GPT_DEPLOYMENT_NAME" \
+      gptModelName="$GPT_MODEL_NAME" \
+      gptModelVersion="$GPT_MODEL_VERSION" \
+      gptCapacity="$GPT_CAPACITY" \
+    --output json)
+
+  PROD_ENDPOINT=$(echo "$PROD_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
+  echo "  PROD endpoint: $PROD_ENDPOINT"
+fi
 
 # ---------- Step 4: App Registration + Service Principal ----------
 echo "[4/7] Creating App Registration and Service Principal..."
@@ -159,7 +189,7 @@ SCOPE="/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP"
 
 az role assignment create \
   --assignee "$SP_OBJ_ID" \
-  --role "Azure AI User" \
+  --role "53ca6127-db72-4b80-b1b0-d745d6d5456d" \
   --scope "$SCOPE" \
   --output none
 echo "  + Azure AI User"
@@ -235,3 +265,31 @@ json.dump({
 }, open('$STATE_FILE', 'w'), indent=2)
 "
 echo " State saved to $STATE_FILE (used by teardown.sh)"
+
+# Save .env for local development
+ENV_FILE=".env"
+cat > "$ENV_FILE" <<EOF
+# Generated by bootstrap.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)
+# Identity (Service Principal)
+AZURE_CLIENT_ID=$APP_ID
+AZURE_TENANT_ID=$TENANT_ID
+AZURE_SUBSCRIPTION_ID=$SUBSCRIPTION_ID
+SP_OBJECT_ID=$SP_OBJ_ID
+
+# Azure AI Foundry endpoints
+FOUNDRY_TEST_ENDPOINT=$TEST_ENDPOINT
+FOUNDRY_PROD_ENDPOINT=$PROD_ENDPOINT
+
+# Model deployment
+GPT_DEPLOYMENT=$GPT_DEPLOYMENT_NAME
+
+# Bing Grounding connection name
+BING_CONNECTION_NAME=$BING_CONNECTION_NAME
+
+# Resource metadata
+RESOURCE_GROUP=$RESOURCE_GROUP
+LOCATION=$LOCATION
+ACCOUNT_NAME=$ACCOUNT_NAME
+GITHUB_REPO=$GITHUB_REPO
+EOF
+echo " Local .env written (git-ignored)"
diff --git a/scripts/deploy_agent.py b/scripts/deploy_agent.py
index 183e300..8f6338d 100644
--- a/scripts/deploy_agent.py
+++ b/scripts/deploy_agent.py
@@ -1,8 +1,8 @@
 """Deploy the tech-trends-agent to an Azure AI Foundry project.
 
 Usage:
-    python scripts/deploy_agent.py --env test --semver 1.0.0 --tools bing_grounding
-    python scripts/deploy_agent.py --env prod --semver 1.2.0 --tools bing_grounding,code_interpreter
+    python scripts/deploy_agent.py --env test --semver 1.0.0 --tools web_search
+    python scripts/deploy_agent.py --env prod --semver 1.2.0 --tools web_search,code_interpreter
 """
 
 import argparse
@@ -14,9 +14,9 @@
 
 from azure.ai.projects import AIProjectClient
 from azure.ai.projects.models import (
-    BingGroundingTool,
     CodeInterpreterTool,
     PromptAgentDefinition,
+    WebSearchTool,
 )
 from azure.identity import DefaultAzureCredential
 
@@ -37,11 +37,11 @@ def hash_file(path):
     return "sha256:" + hashlib.sha256(open(path, "rb").read()).hexdigest()
 
 
-def build_sdk_tools(tools, conn_name):
+def build_sdk_tools(tools):
     sdk_tools = []
     for t in tools:
-        if t["type"] == "bing_grounding":
-            sdk_tools.append(BingGroundingTool(connection_id=conn_name))
+        if t["type"] == "web_search":
+            sdk_tools.append(WebSearchTool())
         elif t["type"] == "code_interpreter":
             sdk_tools.append(CodeInterpreterTool())
     return sdk_tools
@@ -50,7 +50,6 @@ def build_sdk_tools(tools, conn_name):
 def deploy_agent(env: str, tools: list, semver: str):
     endpoint = os.environ[f"FOUNDRY_{env.upper()}_ENDPOINT"]
     model = os.environ["GPT_DEPLOYMENT"]
-    conn_name = os.environ.get("BING_CONNECTION_NAME", "bing-grounding")
 
     prompt_path = "prompts/tech-trends-agent.md"
     instructions = open(prompt_path).read()
@@ -63,11 +62,12 @@ def deploy_agent(env: str, tools: list, semver: str):
         f"model: {model} | commit: {short_sha} | v{semver}"
     )
 
-    sdk_tools = build_sdk_tools(tools, conn_name)
-
     client = AIProjectClient(
         endpoint=endpoint, credential=DefaultAzureCredential()
     )
+
+    sdk_tools = build_sdk_tools(tools)
+
     agent = client.agents.create_version(
         agent_name="tech-trends-agent",
         description=description,
@@ -120,8 +120,8 @@ def deploy_agent(env: str, tools: list, semver: str):
     p.add_argument("--semver", default="0.0.1")
     p.add_argument(
         "--tools",
-        default="bing_grounding",
-        help="comma-separated: bing_grounding,code_interpreter",
+        default="web_search",
+        help="comma-separated: web_search,code_interpreter",
     )
     args = p.parse_args()
 
diff --git a/scripts/lifecycle/01-phase1-web-search.sh b/scripts/lifecycle/01-phase1-web-search.sh
index 18cb1c1..fc5668f 100644
--- a/scripts/lifecycle/01-phase1-web-search.sh
+++ b/scripts/lifecycle/01-phase1-web-search.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# 01-phase1-web-search.sh — Phase 1: Initial agent with Bing Grounding only
+# 01-phase1-web-search.sh — Phase 1: Initial agent with Web Search only
 #
 # Creates a PR that sets the agent to Phase 1 configuration.
 # The evaluate.yml workflow will trigger, deploy to TEST, and run evals.
@@ -33,7 +33,7 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF'
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "bing_grounding" }
+      { "type": "web_search" }
     ]
   },
   "eval": {
@@ -102,7 +102,7 @@ EVAL_EOF
 
 # --- Commit, push, open PR ---
 git add agents/ prompts/ evals/
-git commit -m "feat: Phase 1 — tech trends agent with web search (Bing Grounding)"
+git commit -m "feat: Phase 1 — tech trends agent with web search"
 
 git push origin "$BRANCH"
 
@@ -111,7 +111,7 @@ PR_URL=$(gh pr create \
   --title "Phase 1: Tech Trends Agent with Web Search" \
   --body "$(cat <<'PR_EOF'
 ## Summary
-- Initial agent deployment with Bing Grounding (web search) capability
+- Initial agent deployment with Web Search capability (no connection required)
 - System prompt defines structured research analyst behaviour
 - Evaluation runs Phase 1 queries only (5 test cases)
 

From c7ac261e6f63a326b4dd29a4b579b57f5695e63a Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:44:48 +0200
Subject: [PATCH 3/8] fix: add Azure AI Developer role on Foundry account for
 agents/write permission

---
 scripts/bootstrap.sh | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 4763408..8eee2f9 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -192,14 +192,32 @@ az role assignment create \
   --role "53ca6127-db72-4b80-b1b0-d745d6d5456d" \
   --scope "$SCOPE" \
   --output none
-echo "  + Azure AI User"
+echo "  + Azure AI User (on resource group)"
 
 az role assignment create \
   --assignee "$SP_OBJ_ID" \
   --role "Cognitive Services OpenAI User" \
   --scope "$SCOPE" \
   --output none
-echo "  + Cognitive Services OpenAI User"
+echo "  + Cognitive Services OpenAI User (on resource group)"
+
+# Azure AI Developer on Foundry account scope (needed for agents/write data action)
+if [[ -n "$TEST_ENDPOINT" ]]; then
+  FOUNDRY_HOST=$(echo "$TEST_ENDPOINT" | sed -E 's|https://([^/]+)/.*|\1|')
+  FOUNDRY_ACCOUNT_NAME=$(echo "$FOUNDRY_HOST" | sed -E 's|\.services\.ai\.azure\.com||')
+  FOUNDRY_ACCOUNT_ID=$(az cognitiveservices account list \
+    --query "[?name=='${FOUNDRY_ACCOUNT_NAME}'].id | [0]" -o tsv 2>/dev/null)
+  if [[ -n "$FOUNDRY_ACCOUNT_ID" ]]; then
+    az role assignment create \
+      --assignee "$SP_OBJ_ID" \
+      --role "Azure AI Developer" \
+      --scope "$FOUNDRY_ACCOUNT_ID" \
+      --output none
+    echo "  + Azure AI Developer (on Foundry account: $FOUNDRY_ACCOUNT_NAME)"
+  else
+    echo "  ! Could not resolve Foundry account ID — assign Azure AI Developer manually"
+  fi
+fi
 
 # ---------- Step 7: GitHub Variables ----------
 echo "[7/7] Setting GitHub repository variables..."

From 0355c648510ecd5190443a450d9b26c55b028681 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:47:58 +0200
Subject: [PATCH 4/8] fix: assign Azure AI Developer role at project level, not
 account level

---
 scripts/bootstrap.sh | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 8eee2f9..0b97a27 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -201,21 +201,33 @@ az role assignment create \
   --output none
 echo "  + Cognitive Services OpenAI User (on resource group)"
 
-# Azure AI Developer on Foundry account scope (needed for agents/write data action)
+# Azure AI Developer on Foundry project scope (needed for agents/write data action)
 if [[ -n "$TEST_ENDPOINT" ]]; then
   FOUNDRY_HOST=$(echo "$TEST_ENDPOINT" | sed -E 's|https://([^/]+)/.*|\1|')
   FOUNDRY_ACCOUNT_NAME=$(echo "$FOUNDRY_HOST" | sed -E 's|\.services\.ai\.azure\.com||')
+  TEST_PROJECT_NAME=$(echo "$TEST_ENDPOINT" | sed -E 's|.*/projects/([^/]+).*|\1|')
   FOUNDRY_ACCOUNT_ID=$(az cognitiveservices account list \
     --query "[?name=='${FOUNDRY_ACCOUNT_NAME}'].id | [0]" -o tsv 2>/dev/null)
   if [[ -n "$FOUNDRY_ACCOUNT_ID" ]]; then
     az role assignment create \
       --assignee "$SP_OBJ_ID" \
       --role "Azure AI Developer" \
-      --scope "$FOUNDRY_ACCOUNT_ID" \
+      --scope "$FOUNDRY_ACCOUNT_ID/projects/$TEST_PROJECT_NAME" \
       --output none
-    echo "  + Azure AI Developer (on Foundry account: $FOUNDRY_ACCOUNT_NAME)"
+    echo "  + Azure AI Developer (on project: $TEST_PROJECT_NAME)"
   else
-    echo "  ! Could not resolve Foundry account ID — assign Azure AI Developer manually"
+    echo "  ! Could not resolve Foundry account — assign Azure AI Developer on project manually"
+  fi
+fi
+if [[ -n "$PROD_ENDPOINT" && "$PROD_ENDPOINT" != "$TEST_ENDPOINT" ]]; then
+  PROD_PROJECT_NAME=$(echo "$PROD_ENDPOINT" | sed -E 's|.*/projects/([^/]+).*|\1|')
+  if [[ -n "$FOUNDRY_ACCOUNT_ID" ]]; then
+    az role assignment create \
+      --assignee "$SP_OBJ_ID" \
+      --role "Azure AI Developer" \
+      --scope "$FOUNDRY_ACCOUNT_ID/projects/$PROD_PROJECT_NAME" \
+      --output none
+    echo "  + Azure AI Developer (on project: $PROD_PROJECT_NAME)"
   fi
 fi
 

From 08d3533150071c62c84800e2b1ed6e049110d680 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:52:22 +0200
Subject: [PATCH 5/8] fix: use Azure AI User (Foundry User) role on account
 scope for agents/write

---
 scripts/bootstrap.sh | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 0b97a27..05ef463 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -201,33 +201,21 @@ az role assignment create \
   --output none
 echo "  + Cognitive Services OpenAI User (on resource group)"
 
-# Azure AI Developer on Foundry project scope (needed for agents/write data action)
+# Azure AI User (Foundry User) on Foundry account scope — grants agents/write data action
 if [[ -n "$TEST_ENDPOINT" ]]; then
   FOUNDRY_HOST=$(echo "$TEST_ENDPOINT" | sed -E 's|https://([^/]+)/.*|\1|')
   FOUNDRY_ACCOUNT_NAME=$(echo "$FOUNDRY_HOST" | sed -E 's|\.services\.ai\.azure\.com||')
-  TEST_PROJECT_NAME=$(echo "$TEST_ENDPOINT" | sed -E 's|.*/projects/([^/]+).*|\1|')
   FOUNDRY_ACCOUNT_ID=$(az cognitiveservices account list \
     --query "[?name=='${FOUNDRY_ACCOUNT_NAME}'].id | [0]" -o tsv 2>/dev/null)
   if [[ -n "$FOUNDRY_ACCOUNT_ID" ]]; then
     az role assignment create \
       --assignee "$SP_OBJ_ID" \
-      --role "Azure AI Developer" \
-      --scope "$FOUNDRY_ACCOUNT_ID/projects/$TEST_PROJECT_NAME" \
+      --role "53ca6127-db72-4b80-b1b0-d745d6d5456d" \
+      --scope "$FOUNDRY_ACCOUNT_ID" \
       --output none
-    echo "  + Azure AI Developer (on project: $TEST_PROJECT_NAME)"
+    echo "  + Azure AI User / Foundry User (on account: $FOUNDRY_ACCOUNT_NAME)"
   else
-    echo "  ! Could not resolve Foundry account — assign Azure AI Developer on project manually"
-  fi
-fi
-if [[ -n "$PROD_ENDPOINT" && "$PROD_ENDPOINT" != "$TEST_ENDPOINT" ]]; then
-  PROD_PROJECT_NAME=$(echo "$PROD_ENDPOINT" | sed -E 's|.*/projects/([^/]+).*|\1|')
-  if [[ -n "$FOUNDRY_ACCOUNT_ID" ]]; then
-    az role assignment create \
-      --assignee "$SP_OBJ_ID" \
-      --role "Azure AI Developer" \
-      --scope "$FOUNDRY_ACCOUNT_ID/projects/$PROD_PROJECT_NAME" \
-      --output none
-    echo "  + Azure AI Developer (on project: $PROD_PROJECT_NAME)"
+    echo "  ! Could not resolve Foundry account — assign Azure AI User (53ca6127-...) manually"
   fi
 fi
 

From dad27752c19ee931be672de56016454698219474 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:55:32 +0200
Subject: [PATCH 6/8] fix: convert eval dataset from JSONL to JSON array for
 ai-agent-evals action

---
 .github/workflows/evaluate.yml                  |  2 +-
 .github/workflows/monitor.yml                   |  2 +-
 agents/tech-trends-agent.json                   |  2 +-
 evals/golden-dataset.json                       | 10 ++++++++++
 evals/golden-dataset.jsonl                      |  8 --------
 scripts/lifecycle/01-phase1-web-search.sh       |  2 +-
 scripts/lifecycle/02-phase2-code-interpreter.sh |  2 +-
 tests/conftest.py                               |  2 +-
 8 files changed, 16 insertions(+), 14 deletions(-)
 create mode 100644 evals/golden-dataset.json
 delete mode 100644 evals/golden-dataset.jsonl

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index c8d5659..e3a3f24 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -74,7 +74,7 @@ jobs:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
           agent-ids:                 "tech-trends-agent:latest"
-          data-path:                 "./evals/golden-dataset.jsonl"
+          data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "all-scores"
 
       - name: Post evaluation summary to PR
diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
index 3eec93d..9241bff 100644
--- a/.github/workflows/monitor.yml
+++ b/.github/workflows/monitor.yml
@@ -29,7 +29,7 @@ jobs:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_PROD_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
           agent-ids:                 "tech-trends-agent:latest"
-          data-path:                 "./evals/golden-dataset.jsonl"
+          data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "default"
 
       - name: Open GitHub issue if scores degraded
diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
index fb6c08b..9d6b181 100644
--- a/agents/tech-trends-agent.json
+++ b/agents/tech-trends-agent.json
@@ -9,7 +9,7 @@
     ]
   },
   "eval": {
-    "dataset": "evals/golden-dataset.jsonl",
+    "dataset": "evals/golden-dataset.json",
     "phase_filter": "1",
     "config": "evals/eval-config.json"
   },
diff --git a/evals/golden-dataset.json b/evals/golden-dataset.json
new file mode 100644
index 0000000..3a16666
--- /dev/null
+++ b/evals/golden-dataset.json
@@ -0,0 +1,10 @@
+[
+  {"query": "What are the top three AI model releases in the last 90 days and their key capabilities?", "ground_truth": "Response should identify at least 3 recent model releases with specific capability descriptions, cite web sources, and be structured with a summary and key findings section.", "phase": "1", "category": "trend_research"},
+  {"query": "How is the major cloud provider landscape shifting in 2025 regarding AI infrastructure?", "ground_truth": "Response should cover at least 2 major cloud providers, discuss AI infrastructure investment or announcements, and cite current sources.", "phase": "1", "category": "trend_research"},
+  {"query": "What is the current state of open-source LLM adoption in enterprise settings?", "ground_truth": "Response should address enterprise adoption, mention specific models or frameworks, and provide balanced perspective on open vs closed source.", "phase": "1", "category": "market_analysis"},
+  {"query": "Summarise recent developer sentiment around AI coding tools based on community discussions.", "ground_truth": "Response should reflect actual developer perspectives, not vendor claims, and cite community sources such as surveys, forums or publications.", "phase": "1", "category": "community_sentiment"},
+  {"query": "What are analysts predicting for AI chip demand over the next 12 months?", "ground_truth": "Response should include analyst predictions, reference specific companies or market segments, and note the source and date of predictions.", "phase": "1", "category": "market_forecast"},
+  {"query": "Search for the latest GPU benchmark comparison data and calculate which GPU offers the best performance-per-dollar ratio based on the data you find.", "ground_truth": "Response should retrieve actual benchmark data, perform a calculation or comparison, present results in a structured format, and cite the data source.", "phase": "2", "category": "data_analysis"},
+  {"query": "Find recent AI model API pricing tables and produce a comparison showing cost per million tokens for at least 4 models.", "ground_truth": "Response should retrieve current pricing data, present a structured comparison table, and identify the most cost-effective option for different use cases.", "phase": "2", "category": "data_analysis"},
+  {"query": "Retrieve the latest Stack Overflow developer survey data on AI tool usage and compute the percentage increase in adoption compared to the prior year.", "ground_truth": "Response should locate survey data, perform a percentage calculation, present the result clearly, and note the source and survey year.", "phase": "2", "category": "data_analysis"}
+]
diff --git a/evals/golden-dataset.jsonl b/evals/golden-dataset.jsonl
deleted file mode 100644
index 15b29a9..0000000
--- a/evals/golden-dataset.jsonl
+++ /dev/null
@@ -1,8 +0,0 @@
-{"query": "What are the top three AI model releases in the last 90 days and their key capabilities?", "ground_truth": "Response should identify at least 3 recent model releases with specific capability descriptions, cite web sources, and be structured with a summary and key findings section.", "phase": "1", "category": "trend_research"}
-{"query": "How is the major cloud provider landscape shifting in 2025 regarding AI infrastructure?", "ground_truth": "Response should cover at least 2 major cloud providers, discuss AI infrastructure investment or announcements, and cite current sources.", "phase": "1", "category": "trend_research"}
-{"query": "What is the current state of open-source LLM adoption in enterprise settings?", "ground_truth": "Response should address enterprise adoption, mention specific models or frameworks, and provide balanced perspective on open vs closed source.", "phase": "1", "category": "market_analysis"}
-{"query": "Summarise recent developer sentiment around AI coding tools based on community discussions.", "ground_truth": "Response should reflect actual developer perspectives, not vendor claims, and cite community sources such as surveys, forums or publications.", "phase": "1", "category": "community_sentiment"}
-{"query": "What are analysts predicting for AI chip demand over the next 12 months?", "ground_truth": "Response should include analyst predictions, reference specific companies or market segments, and note the source and date of predictions.", "phase": "1", "category": "market_forecast"}
-{"query": "Search for the latest GPU benchmark comparison data and calculate which GPU offers the best performance-per-dollar ratio based on the data you find.", "ground_truth": "Response should retrieve actual benchmark data, perform a calculation or comparison, present results in a structured format, and cite the data source.", "phase": "2", "category": "data_analysis"}
-{"query": "Find recent AI model API pricing tables and produce a comparison showing cost per million tokens for at least 4 models.", "ground_truth": "Response should retrieve current pricing data, present a structured comparison table, and identify the most cost-effective option for different use cases.", "phase": "2", "category": "data_analysis"}
-{"query": "Retrieve the latest Stack Overflow developer survey data on AI tool usage and compute the percentage increase in adoption compared to the prior year.", "ground_truth": "Response should locate survey data, perform a percentage calculation, present the result clearly, and note the source and survey year.", "phase": "2", "category": "data_analysis"}
diff --git a/scripts/lifecycle/01-phase1-web-search.sh b/scripts/lifecycle/01-phase1-web-search.sh
index fc5668f..247acfa 100644
--- a/scripts/lifecycle/01-phase1-web-search.sh
+++ b/scripts/lifecycle/01-phase1-web-search.sh
@@ -37,7 +37,7 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF'
     ]
   },
   "eval": {
-    "dataset": "evals/golden-dataset.jsonl",
+    "dataset": "evals/golden-dataset.json",
     "phase_filter": "1",
     "config": "evals/eval-config.json"
   },
diff --git a/scripts/lifecycle/02-phase2-code-interpreter.sh b/scripts/lifecycle/02-phase2-code-interpreter.sh
index 066670a..c2df73e 100644
--- a/scripts/lifecycle/02-phase2-code-interpreter.sh
+++ b/scripts/lifecycle/02-phase2-code-interpreter.sh
@@ -40,7 +40,7 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF'
     ]
   },
   "eval": {
-    "dataset": "evals/golden-dataset.jsonl",
+    "dataset": "evals/golden-dataset.json",
     "phase_filter": null,
     "config": "evals/eval-config.json"
   },
diff --git a/tests/conftest.py b/tests/conftest.py
index 9f07b9b..c3249a4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,7 +26,7 @@ def tmp_project(tmp_path):
             "tools": [{"type": "bing_grounding"}],
         },
         "eval": {
-            "dataset": "evals/golden-dataset.jsonl",
+            "dataset": "evals/golden-dataset.json",
             "phase_filter": "1",
             "config": "evals/eval-config.json",
         },

From 97bcafa843c1e9b242a21c62c07d19ad0b21dbdc Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 17:58:35 +0200
Subject: [PATCH 7/8] fix: pass actual agent version to ai-agent-evals action
 instead of 'latest'

---
 .github/workflows/evaluate.yml |  2 +-
 .github/workflows/monitor.yml  | 26 +++++++++++++++++++++++++-
 scripts/deploy_agent.py        |  7 +++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index e3a3f24..5e81110 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -73,7 +73,7 @@ jobs:
         with:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
-          agent-ids:                 "tech-trends-agent:latest"
+          agent-ids:                 "tech-trends-agent:${{ steps.deploy.outputs.agent_version }}"
           data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "all-scores"
 
diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
index 9241bff..cd9bbf3 100644
--- a/.github/workflows/monitor.yml
+++ b/.github/workflows/monitor.yml
@@ -16,19 +16,43 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
       - uses: azure/login@v2
         with:
           client-id:       ${{ vars.AZURE_CLIENT_ID }}
           tenant-id:       ${{ vars.AZURE_TENANT_ID }}
           subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
 
+      - name: Get latest agent version
+        id: agent
+        env:
+          FOUNDRY_PROD_ENDPOINT: ${{ vars.FOUNDRY_PROD_ENDPOINT }}
+        run: |
+          VERSION=$(python3 -c "
+          import os
+          from azure.ai.projects import AIProjectClient
+          from azure.identity import DefaultAzureCredential
+          client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential())
+          agent = client.agents.get_agent('tech-trends-agent')
+          print(agent.version)
+          ")
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+
       - name: Run evaluation against production agent
         id: eval
         uses: microsoft/ai-agent-evals@v3-beta
         with:
           azure-ai-project-endpoint: ${{ vars.FOUNDRY_PROD_ENDPOINT }}
           deployment-name:           ${{ vars.GPT_DEPLOYMENT }}
-          agent-ids:                 "tech-trends-agent:latest"
+          agent-ids:                 "tech-trends-agent:${{ steps.agent.outputs.version }}"
           data-path:                 "./evals/golden-dataset.json"
           evaluation-result-view:    "default"
 
diff --git a/scripts/deploy_agent.py b/scripts/deploy_agent.py
index 8f6338d..4cd433e 100644
--- a/scripts/deploy_agent.py
+++ b/scripts/deploy_agent.py
@@ -111,6 +111,13 @@ def deploy_agent(env: str, tools: list, semver: str):
         json.dump(artifact, f, indent=2)
 
     print(f"Deployed {agent.version} | artifact -> {artifact_path}")
+
+    # Output for GitHub Actions
+    gh_output = os.environ.get("GITHUB_OUTPUT")
+    if gh_output:
+        with open(gh_output, "a") as f:
+            f.write(f"agent_version={agent.version}\n")
+
     return artifact, artifact_path
 
 

From f62a258108a7538385ef5c0f21884837f5c377ac Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:03:32 +0200
Subject: [PATCH 8/8] fix: restructure eval dataset to match ai-agent-evals
 v3-beta expected format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The action expects a JSON object with name, evaluators, and data fields —
not a bare array. Also align evaluator names to builtin.* convention.
---
 evals/eval-config.json    |  8 ++++----
 evals/golden-dataset.json | 29 +++++++++++++++++++----------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/evals/eval-config.json b/evals/eval-config.json
index bba838d..5cafb0a 100644
--- a/evals/eval-config.json
+++ b/evals/eval-config.json
@@ -1,9 +1,9 @@
 {
   "evaluators": [
-    "TaskAdherenceEvaluator",
-    "RelevanceEvaluator",
-    "GroundednessEvaluator",
-    "CoherenceEvaluator"
+    "builtin.task_adherence",
+    "builtin.relevance",
+    "builtin.groundedness",
+    "builtin.coherence"
   ],
   "thresholds": {
     "task_adherence": 0.80,
diff --git a/evals/golden-dataset.json b/evals/golden-dataset.json
index 3a16666..25a48ff 100644
--- a/evals/golden-dataset.json
+++ b/evals/golden-dataset.json
@@ -1,10 +1,19 @@
-[
-  {"query": "What are the top three AI model releases in the last 90 days and their key capabilities?", "ground_truth": "Response should identify at least 3 recent model releases with specific capability descriptions, cite web sources, and be structured with a summary and key findings section.", "phase": "1", "category": "trend_research"},
-  {"query": "How is the major cloud provider landscape shifting in 2025 regarding AI infrastructure?", "ground_truth": "Response should cover at least 2 major cloud providers, discuss AI infrastructure investment or announcements, and cite current sources.", "phase": "1", "category": "trend_research"},
-  {"query": "What is the current state of open-source LLM adoption in enterprise settings?", "ground_truth": "Response should address enterprise adoption, mention specific models or frameworks, and provide balanced perspective on open vs closed source.", "phase": "1", "category": "market_analysis"},
-  {"query": "Summarise recent developer sentiment around AI coding tools based on community discussions.", "ground_truth": "Response should reflect actual developer perspectives, not vendor claims, and cite community sources such as surveys, forums or publications.", "phase": "1", "category": "community_sentiment"},
-  {"query": "What are analysts predicting for AI chip demand over the next 12 months?", "ground_truth": "Response should include analyst predictions, reference specific companies or market segments, and note the source and date of predictions.", "phase": "1", "category": "market_forecast"},
-  {"query": "Search for the latest GPU benchmark comparison data and calculate which GPU offers the best performance-per-dollar ratio based on the data you find.", "ground_truth": "Response should retrieve actual benchmark data, perform a calculation or comparison, present results in a structured format, and cite the data source.", "phase": "2", "category": "data_analysis"},
-  {"query": "Find recent AI model API pricing tables and produce a comparison showing cost per million tokens for at least 4 models.", "ground_truth": "Response should retrieve current pricing data, present a structured comparison table, and identify the most cost-effective option for different use cases.", "phase": "2", "category": "data_analysis"},
-  {"query": "Retrieve the latest Stack Overflow developer survey data on AI tool usage and compute the percentage increase in adoption compared to the prior year.", "ground_truth": "Response should locate survey data, perform a percentage calculation, present the result clearly, and note the source and survey year.", "phase": "2", "category": "data_analysis"}
-]
+{
+  "name": "tech-trends-agent-eval",
+  "evaluators": [
+    "builtin.task_adherence",
+    "builtin.relevance",
+    "builtin.groundedness",
+    "builtin.coherence"
+  ],
+  "data": [
+    {"query": "What are the top three AI model releases in the last 90 days and their key capabilities?", "ground_truth": "Response should identify at least 3 recent model releases with specific capability descriptions, cite web sources, and be structured with a summary and key findings section.", "phase": "1", "category": "trend_research"},
+    {"query": "How is the major cloud provider landscape shifting in 2025 regarding AI infrastructure?", "ground_truth": "Response should cover at least 2 major cloud providers, discuss AI infrastructure investment or announcements, and cite current sources.", "phase": "1", "category": "trend_research"},
+    {"query": "What is the current state of open-source LLM adoption in enterprise settings?", "ground_truth": "Response should address enterprise adoption, mention specific models or frameworks, and provide balanced perspective on open vs closed source.", "phase": "1", "category": "market_analysis"},
+    {"query": "Summarise recent developer sentiment around AI coding tools based on community discussions.", "ground_truth": "Response should reflect actual developer perspectives, not vendor claims, and cite community sources such as surveys, forums or publications.", "phase": "1", "category": "community_sentiment"},
+    {"query": "What are analysts predicting for AI chip demand over the next 12 months?", "ground_truth": "Response should include analyst predictions, reference specific companies or market segments, and note the source and date of predictions.", "phase": "1", "category": "market_forecast"},
+    {"query": "Search for the latest GPU benchmark comparison data and calculate which GPU offers the best performance-per-dollar ratio based on the data you find.", "ground_truth": "Response should retrieve actual benchmark data, perform a calculation or comparison, present results in a structured format, and cite the data source.", "phase": "2", "category": "data_analysis"},
+    {"query": "Find recent AI model API pricing tables and produce a comparison showing cost per million tokens for at least 4 models.", "ground_truth": "Response should retrieve current pricing data, present a structured comparison table, and identify the most cost-effective option for different use cases.", "phase": "2", "category": "data_analysis"},
+    {"query": "Retrieve the latest Stack Overflow developer survey data on AI tool usage and compute the percentage increase in adoption compared to the prior year.", "ground_truth": "Response should locate survey data, perform a percentage calculation, present the result clearly, and note the source and survey year.", "phase": "2", "category": "data_analysis"}
+  ]
+}