diff --git a/.github/workflows/agentex-tutorials-test.yml b/.github/workflows/agentex-tutorials-test.yml
new file mode 100644
index 00000000..eb6da476
--- /dev/null
+++ b/.github/workflows/agentex-tutorials-test.yml
@@ -0,0 +1,315 @@
+name: Test Tutorial Agents
+
+on:
+  pull_request:
+    branches: [ main ]
+  push:
+    branches: [ main ]
+  workflow_dispatch:
+
+jobs:
+  find-tutorials:
+    runs-on: ubuntu-latest
+    outputs:
+      tutorials: ${{ steps.get-tutorials.outputs.tutorials }}
+    steps:
+      - name: Checkout agentex-python repo
+        uses: actions/checkout@v4
+
+      - name: Find all tutorials
+        id: get-tutorials
+        run: |
+          cd examples/tutorials
+          # Find all tutorials and exclude specific temporal ones
+          all_tutorials=$(find . -name "manifest.yaml" -exec dirname {} \; | sort | sed 's|^\./||')
+
+          # Filter out the specified temporal tutorials that are being updated
+          filtered_tutorials=$(echo "$all_tutorials" | grep -v -E "(temporal)")
+
+          # Convert to JSON array
+          tutorials=$(echo "$filtered_tutorials" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+
+          echo "tutorials=$tutorials" >> $GITHUB_OUTPUT
+          echo "All tutorials found: $(echo "$all_tutorials" | wc -l)"
+          echo "Filtered tutorials: $(echo "$filtered_tutorials" | wc -l)"
+          echo "Excluded tutorials:"
+          echo "$all_tutorials" | grep -E "(10_temporal/050_|10_temporal/070_|10_temporal/080_)" || echo "  (none matched exclusion pattern)"
+          echo "Final tutorial list: $tutorials"
+
+  test-tutorial:
+    needs: find-tutorials
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      matrix:
+        tutorial: ${{ fromJson(needs.find-tutorials.outputs.tutorials) }}
+      fail-fast: false
+    name: test-${{ matrix.tutorial }}
+
+    steps:
+      - name: Checkout agentex-python repo
+        uses: actions/checkout@v4
+
+      - name: Install UV
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+      - name: Pull latest AgentEx image
+        run: |
+          echo "🐳 Pulling latest Scale AgentEx Docker image..."
+          docker pull ghcr.io/scaleapi/scale-agentex/agentex:latest
+          echo "✅ Successfully pulled AgentEx Docker image"
+
+      - name: Checkout scale-agentex repo
+        uses: actions/checkout@v4
+        with:
+          repository: scaleapi/scale-agentex
+          path: scale-agentex
+
+      - name: Configure Docker Compose for pulled image and host networking
+        run: |
+          cd scale-agentex/agentex
+          echo "🔧 Configuring AgentEx container to use pulled image and host networking..."
+
+          # Install yq for YAML manipulation
+          sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
+          sudo chmod +x /usr/local/bin/yq
+
+          # Override to use pulled image instead of building
+          yq eval '.services.agentex.image = "ghcr.io/scaleapi/scale-agentex/agentex:latest"' -i docker-compose.yml
+          yq eval 'del(.services.agentex.build)' -i docker-compose.yml
+
+          # Add extra_hosts to agentex service to make host.docker.internal work
+          yq eval '.services.agentex.extra_hosts = ["host.docker.internal:host-gateway"]' -i docker-compose.yml
+
+          echo "✅ Configured docker-compose to use pulled image with host access"
+
+      - name: Start AgentEx Server
+        run: |
+          cd scale-agentex/agentex
+          echo "🚀 Starting AgentEx server and dependencies..."
+
+          # Start all services
+          docker compose up -d
+
+          echo "⏳ Waiting for dependencies to be healthy..."
+
+          # Wait for services to be healthy
+          for i in {1..30}; do
+            if docker compose ps | grep -q "healthy"; then
+              echo "✅ Dependencies are healthy"
+              break
+            fi
+            echo "  Attempt $i/30: Waiting for services..."
+            sleep 5
+          done
+
+          # Wait specifically for AgentEx server to be ready
+          echo "⏳ Waiting for AgentEx server to be ready..."
+          for i in {1..30}; do
+            if curl -s --max-time 5 http://localhost:5003/health >/dev/null 2>&1; then
+              echo "✅ AgentEx server is ready"
+              break
+            fi
+            echo "  Attempt $i/30: Waiting for AgentEx server..."
+            sleep 5
+          done
+
+      - name: Build AgentEx SDK
+        run: |
+          echo "🔨 Building AgentEx SDK wheel..."
+          uv build
+          echo "✅ SDK built successfully"
+          ls -la dist/
+
+      - name: Test Tutorial
+        id: run-test
+        working-directory: ./examples/tutorials
+        env:
+          OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
+          HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
+        run: |
+          echo "Testing tutorial: ${{ matrix.tutorial }}"
+          AGENTEX_API_BASE_URL="http://localhost:5003" \
+            ./run_agent_test.sh --build-cli "${{ matrix.tutorial }}"
+
+      - name: Print agent logs on failure
+        if: failure()
+        working-directory: ./examples/tutorials
+        run: |
+          echo "🚨 Test failed for tutorial: ${{ matrix.tutorial }}"
+          echo "📋 Printing agent logs..."
+
+          # Look for agent log files in the tutorial directory
+          if find "${{ matrix.tutorial }}" -name "*.log" -type f 2>/dev/null | grep -q .; then
+            echo "Found agent log files:"
+            find "${{ matrix.tutorial }}" -name "*.log" -type f -exec echo "=== {} ===" \; -exec cat {} \;
+          else
+            echo "No .log files found, checking for other common log locations..."
+          fi
+
+          # Check for any output files or dumps
+          if find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" 2>/dev/null | grep -q .; then
+            echo "Found other output files:"
+            find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" -exec echo "=== {} ===" \; -exec cat {} \;
+          fi
+
+          # Print the last 50 lines of any python processes that might still be running
+          echo "🔍 Checking for running python processes..."
+          ps aux | grep python || echo "No python processes found"
+
+      - name: Record test result
+        id: test-result
+        if: always()
+        run: |
+          # Create results directory
+          mkdir -p test-results
+
+          # Determine result
+          if [ "${{ steps.run-test.outcome }}" == "success" ]; then
+            result="passed"
+            echo "result=passed" >> $GITHUB_OUTPUT
+            echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
+          else
+            result="failed"
+            echo "result=failed" >> $GITHUB_OUTPUT
+            echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
+          fi
+
+          # Save result to file for artifact upload
+          # Create a safe filename from tutorial path
+          safe_name=$(echo "${{ matrix.tutorial }}" | tr '/' '_' | tr -d ' ')
+          echo "$result" > "test-results/result-${safe_name}.txt"
+          echo "${{ matrix.tutorial }}" > "test-results/tutorial-${safe_name}.txt"
+          echo "safe_name=${safe_name}" >> $GITHUB_OUTPUT
+
+      - name: Upload test result
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-result-${{ steps.test-result.outputs.safe_name }}
+          path: test-results/
+          retention-days: 1
+
+  test-summary:
+    if: always()
+    needs: [find-tutorials, test-tutorial]
+    runs-on: ubuntu-latest
+    name: Test Summary
+    steps:
+      - name: Download all test results
+        uses: actions/download-artifact@v4
+        with:
+          pattern: test-result-*
+          path: all-results/
+          merge-multiple: true
+        continue-on-error: true
+
+      - name: Generate Test Summary
+        run: |
+          echo "# 🧪 Tutorial Tests Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          # Initialize counters
+          passed_count=0
+          failed_count=0
+          skipped_count=0
+          total_count=0
+
+          # Get all tutorials that were supposed to run
+          tutorials='${{ needs.find-tutorials.outputs.tutorials }}'
+
+          if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
+            echo "📊 Processing individual test results from artifacts..."
+
+            echo "## Test Results" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "| Tutorial | Status | Result |" >> $GITHUB_STEP_SUMMARY
+            echo "|----------|--------|--------|" >> $GITHUB_STEP_SUMMARY
+
+            # Process each result file
+            for result_file in all-results/result-*.txt; do
+              if [ -f "$result_file" ]; then
+                # Extract the safe name from filename
+                safe_name=$(basename "$result_file" .txt | sed 's/result-//')
+
+                # Get corresponding tutorial name file
+                tutorial_file="all-results/tutorial-${safe_name}.txt"
+
+                if [ -f "$tutorial_file" ]; then
+                  tutorial_name=$(cat "$tutorial_file")
+                  result=$(cat "$result_file")
+
+                  total_count=$((total_count + 1))
+
+                  if [ "$result" = "passed" ]; then
+                    echo "| \`$tutorial_name\` | ✅ | Passed |" >> $GITHUB_STEP_SUMMARY
+                    passed_count=$((passed_count + 1))
+                  else
+                    echo "| \`$tutorial_name\` | ❌ | Failed |" >> $GITHUB_STEP_SUMMARY
+                    failed_count=$((failed_count + 1))
+                  fi
+                fi
+              fi
+            done
+
+            # Check for any tutorials that didn't have results (skipped/cancelled)
+            echo "$tutorials" | jq -r '.[]' | while read expected_tutorial; do
+              safe_expected=$(echo "$expected_tutorial" | tr '/' '_' | tr -d ' ')
+              if [ ! -f "all-results/result-${safe_expected}.txt" ]; then
+                echo "| \`$expected_tutorial\` | ⏭️ | Skipped/Cancelled |" >> $GITHUB_STEP_SUMMARY
+                skipped_count=$((skipped_count + 1))
+                total_count=$((total_count + 1))
+              fi
+            done
+
+          else
+            echo "⚠️ No individual test results found. This could mean:"
+            echo "- Test jobs were cancelled before completion"
+            echo "- Artifacts failed to upload"
+            echo "- No tutorials were found to test"
+            echo ""
+
+            overall_result="${{ needs.test-tutorial.result }}"
+            echo "Overall job status: **$overall_result**"
+
+            if [[ "$overall_result" == "success" ]]; then
+              echo "✅ All tests appear to have passed based on job status."
+            elif [[ "$overall_result" == "failure" ]]; then
+              echo "❌ Some tests appear to have failed based on job status."
+              echo ""
+              echo "💡 **Tip:** Check individual job logs for specific failure details."
+            elif [[ "$overall_result" == "cancelled" ]]; then
+              echo "⏭️ Tests were cancelled."
+            else
+              echo "❓ Test status is unclear: $overall_result"
+            fi
+
+            # Don't show detailed breakdown when we don't have individual results
+            tutorial_count=$(echo "$tutorials" | jq -r '. | length')
+            echo ""
+            echo "Expected tutorial count: $tutorial_count"
+          fi
+
+          # Only show detailed statistics if we have individual results
+          if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "## Summary Statistics" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "- **Total Tests:** $total_count" >> $GITHUB_STEP_SUMMARY
+            echo "- **Passed:** $passed_count ✅" >> $GITHUB_STEP_SUMMARY
+            echo "- **Failed:** $failed_count ❌" >> $GITHUB_STEP_SUMMARY
+            echo "- **Skipped:** $skipped_count ⏭️" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+
+            if [ $failed_count -eq 0 ] && [ $passed_count -gt 0 ]; then
+              echo "🎉 **All tests passed!**" >> $GITHUB_STEP_SUMMARY
+            elif [ $failed_count -gt 0 ]; then
+              echo "⚠️ **Some tests failed.** Check individual job logs for details." >> $GITHUB_STEP_SUMMARY
+              echo "" >> $GITHUB_STEP_SUMMARY
+              echo "💡 **Tip:** Look for the 'Print agent logs on failure' step in failed jobs for debugging information." >> $GITHUB_STEP_SUMMARY
+            else
+              echo "ℹ️ **Tests were cancelled or skipped.**" >> $GITHUB_STEP_SUMMARY
+            fi
+          fi
diff --git a/examples/tutorials/10_async/00_base/000_hello_acp/tests/test_agent.py b/examples/tutorials/10_async/00_base/000_hello_acp/tests/test_agent.py
index 257c27df..08cac7a7 100644
--- a/examples/tutorials/10_async/00_base/000_hello_acp/tests/test_agent.py
+++ b/examples/tutorials/10_async/00_base/000_hello_acp/tests/test_agent.py
@@ -96,7 +96,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
         ):
             assert isinstance(message, TaskMessage)
             if message.content and message.content.type == "text" and message.content.author == "agent":
-                assert "Hello! I've received your message" in message.content.content
+                assert "Hello! I've received your task" in message.content.content
                 break
 
 
diff --git a/examples/tutorials/10_async/00_base/040_other_sdks/project/acp.py b/examples/tutorials/10_async/00_base/040_other_sdks/project/acp.py
index efeed774..d2ec84fc 100644
--- a/examples/tutorials/10_async/00_base/040_other_sdks/project/acp.py
+++ b/examples/tutorials/10_async/00_base/040_other_sdks/project/acp.py
@@ -42,6 +42,7 @@
     config=AsyncACPConfig(type="base"),
 )
 
+
 class StateModel(BaseModel):
     input_list: List[dict]
     turn_number: int
@@ -53,11 +54,7 @@ class StateModel(BaseModel):
         args=["-y", "@modelcontextprotocol/server-sequential-thinking"],
     ),
     StdioServerParameters(
-        command="uvx",
-        args=["openai-websearch-mcp"],
-        env={
-            "OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY", "")
-        }
+        command="uvx", args=["openai-websearch-mcp"], env={"OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY", "")}
     ),
 ]
 
@@ -72,6 +69,7 @@ async def handle_task_create(params: CreateTaskParams):
     )
     await adk.state.create(task_id=params.task.id, agent_id=params.agent.id, state=state)
 
+
 @acp.on_task_event_send
 async def handle_event_send(params: SendEventParams):
     # !!! Warning: Because "Async" ACPs are designed to be fully asynchronous, race conditions can occur if parallel events are sent. It is highly recommended to use the "temporal" type in the AsyncACPConfig instead to handle complex use cases. The "base" ACP is only designed to be used for simple use cases and for learning purposes.
@@ -85,7 +83,6 @@ async def handle_event_send(params: SendEventParams):
     if params.event.content.author != "user":
         raise ValueError(f"Expected user message, got {params.event.content.author}")
 
-
     # Retrieve the task state. Each event is handled as a new turn, so we need to get the state for the current turn.
     task_state = await adk.state.get_by_task_and_agent(task_id=params.task.id, agent_id=params.agent.id)
     if not task_state:
@@ -94,12 +91,8 @@ async def handle_event_send(params: SendEventParams):
     state.turn_number += 1
     # Add the new user message to the message history
     state.input_list.append({"role": "user", "content": params.event.content.content})
-    
-    async with adk.tracing.span(
-        trace_id=params.task.id,
-        name=f"Turn {state.turn_number}",
-        input=state
-    ) as span:
+
+    async with adk.tracing.span(trace_id=params.task.id, name=f"Turn {state.turn_number}", input=state) as span:
         # Echo back the user's message so it shows up in the UI. This is not done by default so the agent developer has full control over what is shown to the user.
         await adk.messages.create(
             task_id=params.task.id,
@@ -156,6 +149,7 @@ async def handle_event_send(params: SendEventParams):
         if span:
             span.output = state
 
+
 @acp.on_task_cancel
 async def handle_task_cancel(params: CancelTaskParams):
     """Default task cancel handler"""
@@ -173,8 +167,8 @@ async def mcp_server_context(mcp_server_params: list[StdioServerParameters]):
     servers = []
     for params in mcp_server_params:
         server = MCPServerStdio(
-            name=f"Server: {params.command}", 
-            params=params.model_dump(), 
+            name=f"Server: {params.command}",
+            params=params.model_dump(),
             cache_tools_list=True,
             client_session_timeout_seconds=60,
         )
@@ -253,7 +247,6 @@ async def run_openai_agent_with_custom_streaming(
             try:
                 # Process streaming events with TaskMessage creation
                 async for event in result.stream_events():
-
                     if event.type == "run_item_stream_event":
                         if event.item.type == "tool_call_item":
                             tool_call_item = event.item.raw_item
@@ -374,9 +367,7 @@ async def run_openai_agent_with_custom_streaming(
         if span:
             span.output = {
                 "new_items": [
-                    item.raw_item.model_dump()
-                    if isinstance(item.raw_item, BaseModel)
-                    else item.raw_item
+                    item.raw_item.model_dump() if isinstance(item.raw_item, BaseModel) else item.raw_item
                     for item in result.new_items
                 ],
                 "final_output": result.final_output,
diff --git a/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py b/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py
index a8d83902..429d8d87 100644
--- a/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py
+++ b/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py
@@ -114,7 +114,7 @@ async def test_send_event_and_poll_simple_query(self, client: AsyncAgentex, agen
                 break
 
         # Verify state has been updated by polling the states for 10 seconds
-        for i in range(10):
+        for i in range(20):
             if i == 9:
                 raise Exception("Timeout waiting for state updates")
             states = await client.states.list(agent_id=agent_id, task_id=task.id)
@@ -187,7 +187,12 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
             sleep_interval=1.0,
         ):
             assert isinstance(message, TaskMessage)
-            if message.content and message.content.type == "text" and message.content.author == "agent" and message.content.content:
+            if (
+                message.content
+                and message.content.type == "text"
+                and message.content.author == "agent"
+                and message.content.content
+            ):
                 break
 
         ## keep polling the states for 10 seconds for the input_list and turn_number to be updated
@@ -216,7 +221,12 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
             timeout=30,
             sleep_interval=1.0,
         ):
-            if message.content and message.content.type == "text" and message.content.author == "agent" and message.content.content:
+            if (
+                message.content
+                and message.content.type == "text"
+                and message.content.author == "agent"
+                and message.content.content
+            ):
                 response_text = message.content.content.lower()
                 assert "blue" in response_text
                 found_response = True
@@ -273,7 +283,10 @@ async def stream_messages() -> None:
                 # For full messages, content is at the top level
                 # For delta messages, we need to check parent_task_message
                 if msg_type == "full":
-                    if event.get("content", {}).get("type") == "text" and event.get("content", {}).get("author") == "user":
+                    if (
+                        event.get("content", {}).get("type") == "text"
+                        and event.get("content", {}).get("author") == "user"
+                    ):
                         user_message_found = True
                 elif msg_type == "done":
                     break
diff --git a/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py
index bd1f8426..2710b909 100644
--- a/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py
+++ b/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py
@@ -160,7 +160,12 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
             sleep_interval=1.0,
         ):
             assert isinstance(message, TaskMessage)
-            if message.content and message.content.type == "text" and message.content.author == "agent" and message.content.content:
+            if (
+                message.content
+                and message.content.type == "text"
+                and message.content.author == "agent"
+                and message.content.content
+            ):
                 break
 
         # Wait a bit for state to update
@@ -177,7 +182,12 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
             timeout=30,
             sleep_interval=1.0,
         ):
-            if message.content and message.content.type == "text" and message.content.author == "agent" and message.content.content:
+            if (
+                message.content
+                and message.content.type == "text"
+                and message.content.author == "agent"
+                and message.content.content
+            ):
                 response_text = message.content.content.lower()
                 assert "blue" in response_text, f"Expected 'blue' in response but got: {response_text}"
                 found_response = True
@@ -211,16 +221,24 @@ async def stream_messages() -> None:  # noqa: ANN101
             async for event in stream_agent_response(
                 client=client,
                 task_id=task.id,
-                timeout=20,
+                timeout=60,
             ):
                 msg_type = event.get("type")
                 if msg_type == "full":
                     task_message_update = StreamTaskMessageFull.model_validate(event)
                     if task_message_update.parent_task_message and task_message_update.parent_task_message.id:
                         finished_message = await client.messages.retrieve(task_message_update.parent_task_message.id)
-                        if finished_message.content and finished_message.content.type == "text" and finished_message.content.author == "user":
+                        if (
+                            finished_message.content
+                            and finished_message.content.type == "text"
+                            and finished_message.content.author == "user"
+                        ):
                             user_message_found = True
-                        elif finished_message.content and finished_message.content.type == "text" and finished_message.content.author == "agent":
+                        elif (
+                            finished_message.content
+                            and finished_message.content.type == "text"
+                            and finished_message.content.author == "agent"
+                        ):
                             agent_response_found = True
                         elif finished_message.content and finished_message.content.type == "reasoning":
                             tool_response_found = True
@@ -243,5 +261,6 @@ async def stream_messages() -> None:  # noqa: ANN101
         assert user_message_found, "User message not found in stream"
         assert agent_response_found, "Agent response not found in stream"
 
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/run_all_async_tests.sh b/examples/tutorials/run_agent_test.sh
similarity index 55%
rename from examples/tutorials/run_all_async_tests.sh
rename to examples/tutorials/run_agent_test.sh
index 7d5c82ed..f396cfd0 100755
--- a/examples/tutorials/run_all_async_tests.sh
+++ b/examples/tutorials/run_agent_test.sh
@@ -1,16 +1,15 @@
 #!/bin/bash
 #
-# Run all async tutorial tests
+# Run a single agent tutorial test
 #
-# This script runs the test runner for all async tutorials in sequence.
-# It stops at the first failure unless --continue-on-error is specified.
+# This script runs the test for a single agent tutorial.
+# It starts the agent, runs tests against it, then stops the agent.
 #
 # Usage:
-#   ./run_all_async_tests.sh                              # Run all tutorials
-#   ./run_all_async_tests.sh --continue-on-error          # Run all, continue on error
-#   ./run_all_async_tests.sh <tutorial_path>              # Run single tutorial
-#   ./run_all_async_tests.sh --view-logs                  # View most recent agent logs
-#   ./run_all_async_tests.sh --view-logs <tutorial_path>  # View logs for specific tutorial
+#   ./run_agent_test.sh <tutorial_path>                     # Run single tutorial test
+#   ./run_agent_test.sh --build-cli <tutorial_path>         # Build CLI from source and run test
+#   ./run_agent_test.sh --view-logs <tutorial_path>         # View logs for specific tutorial
+#   ./run_agent_test.sh --view-logs                         # View most recent agent logs
 #
 
 set -e  # Exit on error
@@ -24,50 +23,21 @@ GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 
-AGENT_PORT=8000
-AGENTEX_SERVER_PORT=5003
-
 # Parse arguments
-CONTINUE_ON_ERROR=false
-SINGLE_TUTORIAL=""
+TUTORIAL_PATH=""
 VIEW_LOGS=false
+BUILD_CLI=false
 
 for arg in "$@"; do
-    if [[ "$arg" == "--continue-on-error" ]]; then
-        CONTINUE_ON_ERROR=true
-    elif [[ "$arg" == "--view-logs" ]]; then
+    if [[ "$arg" == "--view-logs" ]]; then
         VIEW_LOGS=true
+    elif [[ "$arg" == "--build-cli" ]]; then
+        BUILD_CLI=true
     else
-        SINGLE_TUTORIAL="$arg"
+        TUTORIAL_PATH="$arg"
     fi
 done
 
-# Find all async tutorial directories
-ALL_TUTORIALS=(
-    # sync tutorials
-    "00_sync/000_hello_acp"
-    "00_sync/010_multiturn"
-    "00_sync/020_streaming"
-    # base tutorials
-    # "10_async/00_base/000_hello_acp"
-    # "10_async/00_base/010_multiturn"
-    # "10_async/00_base/020_streaming"
-    # "10_async/00_base/030_tracing"
-    # "10_async/00_base/040_other_sdks"
-    # "10_async/00_base/080_batch_events"
-    # temporal tutorials
-    # "10_async/10_temporal/000_hello_acp"
-    # "10_async/10_temporal/010_agent_chat"
-    # "10_async/10_temporal/020_state_machine"
-    "10_async/10_temporal/060_open_ai_agents_sdk_hello_world"
-    "10_async/10_temporal/070_open_ai_agents_sdk_tools"
-    "10_async/10_temporal/080_open_ai_agents_sdk_human_in_the_loop"
-)
-
-PASSED=0
-FAILED=0
-FAILED_TESTS=()
-
 # Function to check prerequisites for running this test suite
 check_prerequisites() {
     # Check that we are in the examples/tutorials directory
@@ -90,23 +60,38 @@ check_prerequisites() {
 wait_for_agent_ready() {
     local name=$1
     local logfile="/tmp/agentex-${name}.log"
-    local timeout=30  # seconds
+    local timeout=45  # seconds - increased to account for package installation time
     local elapsed=0
 
     echo -e "${YELLOW}⏳ Waiting for ${name} agent to be ready...${NC}"
 
     while [ $elapsed -lt $timeout ]; do
-        if grep -q "Application startup complete" "$logfile" 2>/dev/null || \
-           grep -q "Running workers for task queue" "$logfile" 2>/dev/null; then
-            echo -e "${GREEN}✅ ${name} agent is ready${NC}"
-            return 0
+        # Check if agent is successfully registered
+        if grep -q "Successfully registered agent" "$logfile" 2>/dev/null; then
+
+            # For temporal agents, also wait for workers to be ready
+            if [[ "$tutorial_path" == *"temporal"* ]]; then
+                # This is a temporal agent - wait for workers too
+                if grep -q "Running workers for task queue" "$logfile" 2>/dev/null; then
+                    return 0
+                fi
+            else
+                return 0
+            fi
         fi
         sleep 1
         ((elapsed++))
     done
 
     echo -e "${RED}❌ Timeout waiting for ${name} agent to be ready${NC}"
-    echo "Check logs: tail -f $logfile"
+    echo -e "${YELLOW}📋 Agent logs:${NC}"
+    if [[ -f "$logfile" ]]; then
+        echo "----------------------------------------"
+        tail -50 "$logfile"
+        echo "----------------------------------------"
+    else
+        echo "❌ Log file not found: $logfile"
+    fi
     return 1
 }
 
@@ -137,7 +122,25 @@ start_agent() {
     cd "$tutorial_path" || return 1
 
     # Start the agent in background and capture PID
-    uv run agentex agents run --manifest manifest.yaml > "$logfile" 2>&1 &
+    local manifest_path="$PWD/manifest.yaml"  # Always use full path
+
+    if [ "$BUILD_CLI" = true ]; then
+
+        # Use wheel from dist directory at repo root
+        local wheel_file=$(ls /home/runner/work/*/*/dist/agentex_sdk-*.whl 2>/dev/null | head -n1)
+        if [[ -z "$wheel_file" ]]; then
+            echo -e "${RED}❌ No built wheel found in dist/agentex_sdk-*.whl${NC}"
+            echo -e "${YELLOW}💡 Please build the local SDK first by running: uv build${NC}"
+            echo -e "${YELLOW}💡 From the repo root directory${NC}"
+            cd "$original_dir"
+            return 1
+        fi
+
+        # Use the built wheel
+        uv run --with "$wheel_file" agentex agents run --manifest "$manifest_path" > "$logfile" 2>&1 &
+    else
+        uv run agentex agents run --manifest manifest.yaml > "$logfile" 2>&1 &
+    fi
     local pid=$!
 
     # Return to original directory
@@ -255,9 +258,30 @@ run_test() {
     # Change to tutorial directory
     cd "$tutorial_path" || return 1
 
-    # Run the tests
-    uv run pytest tests/test_agent.py -v -s
-    local exit_code=$?
+
+    # Run the tests with retry mechanism
+    local max_retries=5
+    local retry_count=0
+    local exit_code=1
+
+    while [ $retry_count -lt $max_retries ]; do
+        if [ $retry_count -gt 0 ]; then
+            echo -e "${YELLOW}🔄 Retrying tests (attempt $((retry_count + 1))/$max_retries)...${NC}"
+        fi
+
+        # Stream pytest output directly in real-time
+        uv run pytest tests/test_agent.py -v -s
+        exit_code=$?
+
+        if [ $exit_code -eq 0 ]; then
+            break
+        else
+            retry_count=$((retry_count + 1))
+            if [ $retry_count -lt $max_retries ]; then
+                sleep 5
+            fi
+        fi
+    done
 
     # Return to original directory
     cd "$original_dir"
@@ -276,15 +300,13 @@ execute_tutorial_test() {
     local tutorial=$1
 
     echo ""
-    echo "--------------------------------------------------------------------------------"
+    echo "================================================================================"
     echo "Testing: $tutorial"
-    echo "--------------------------------------------------------------------------------"
+    echo "================================================================================"
 
     # Start the agent
     if ! start_agent "$tutorial"; then
         echo -e "${RED}❌ FAILED to start agent: $tutorial${NC}"
-        ((FAILED++))
-        FAILED_TESTS+=("$tutorial")
         return 1
     fi
 
@@ -292,12 +314,9 @@ execute_tutorial_test() {
     local test_passed=false
     if run_test "$tutorial"; then
         echo -e "${GREEN}✅ PASSED: $tutorial${NC}"
-        ((PASSED++))
         test_passed=true
     else
         echo -e "${RED}❌ FAILED: $tutorial${NC}"
-        ((FAILED++))
-        FAILED_TESTS+=("$tutorial")
     fi
 
     # Stop the agent
@@ -312,75 +331,97 @@ execute_tutorial_test() {
     fi
 }
 
+# Function to check if built wheel is available
+check_built_wheel() {
+
+    # Navigate to the repo root (two levels up from examples/tutorials)
+    local repo_root="../../"
+    local original_dir="$PWD"
+
+    cd "$repo_root" || {
+        echo -e "${RED}❌ Failed to navigate to repo root${NC}"
+        return 1
+    }
+
+    # Check if wheel exists in dist directory at repo root
+    local wheel_file=$(ls /home/runner/work/*/*/dist/agentex_sdk-*.whl 2>/dev/null | head -n1)
+    if [[ -z "$wheel_file" ]]; then
+        echo -e "${RED}❌ No built wheel found in dist/agentex_sdk-*.whl${NC}"
+        echo -e "${YELLOW}💡 Please build the local SDK first by running: uv build${NC}"
+        echo -e "${YELLOW}💡 From the repo root directory${NC}"
+        cd "$original_dir"
+        return 1
+    fi
+
+    # Test the wheel by running agentex --help
+    if ! uv run --with "$wheel_file" agentex --help >/dev/null 2>&1; then
+        echo -e "${RED}❌ Failed to run agentex with built wheel${NC}"
+        cd "$original_dir"
+        return 1
+    fi
+    cd "$original_dir"
+    return 0
+}
+
+
 # Main execution function
 main() {
     # Handle --view-logs flag
     if [ "$VIEW_LOGS" = true ]; then
-        if [[ -n "$SINGLE_TUTORIAL" ]]; then
-            view_agent_logs "$SINGLE_TUTORIAL"
+        if [[ -n "$TUTORIAL_PATH" ]]; then
+            view_agent_logs "$TUTORIAL_PATH"
         else
             view_agent_logs
         fi
         exit 0
     fi
+        # Require tutorial path
+    if [[ -z "$TUTORIAL_PATH" ]]; then
+        echo -e "${RED}❌ Error: Tutorial path is required${NC}"
+        echo ""
+        echo "Usage:"
+        echo "  ./run_agent_test.sh <tutorial_path>                     # Run single tutorial test"
+        echo "  ./run_agent_test.sh --build-cli <tutorial_path>         # Build CLI from source and run test"
+        echo "  ./run_agent_test.sh --view-logs <tutorial_path>         # View logs for specific tutorial"
+        echo "  ./run_agent_test.sh --view-logs                         # View most recent agent logs"
+        echo ""
+        echo "Examples:"
+        echo "  ./run_agent_test.sh 00_sync/000_hello_acp"
+        echo "  ./run_agent_test.sh --build-cli 00_sync/000_hello_acp"
+        exit 1
+    fi
 
     echo "================================================================================"
-    if [[ -n "$SINGLE_TUTORIAL" ]]; then
-        echo "Running Single Tutorial Test: $SINGLE_TUTORIAL"
-    else
-        echo "Running All Async Tutorial Tests"
-        if [ "$CONTINUE_ON_ERROR" = true ]; then
-            echo -e "${YELLOW}⚠️  Running in continue-on-error mode${NC}"
-        fi
-    fi
+    echo "Running Tutorial Test: $TUTORIAL_PATH"
     echo "================================================================================"
-    echo ""
 
     # Check prerequisites
     check_prerequisites
 
     echo ""
 
-    # Determine which tutorials to run
-    if [[ -n "$SINGLE_TUTORIAL" ]]; then
-        TUTORIALS=("$SINGLE_TUTORIAL")
-    else
-        TUTORIALS=("${ALL_TUTORIALS[@]}")
-    fi
-
-    # Iterate over tutorials
-    for tutorial in "${TUTORIALS[@]}"; do
-        execute_tutorial_test "$tutorial"
-
-        # Exit early if in fail-fast mode
-        if [ "$CONTINUE_ON_ERROR" = false ] && [ $FAILED -gt 0 ]; then
-            echo ""
-            echo -e "${RED}Stopping due to test failure. Use --continue-on-error to continue.${NC}"
+    # Check built wheel if requested
+    if [ "$BUILD_CLI" = true ]; then
+        if ! check_built_wheel; then
+            echo -e "${RED}❌ Failed to find or verify built wheel${NC}"
             exit 1
         fi
-    done
-
-    # Print summary
-    echo ""
-    echo "================================================================================"
-    echo "Test Summary"
-    echo "================================================================================"
-    echo -e "Total:  $((PASSED + FAILED))"
-    echo -e "${GREEN}Passed: $PASSED${NC}"
-    echo -e "${RED}Failed: $FAILED${NC}"
-    echo ""
+        echo ""
+    fi
 
-    if [ $FAILED -gt 0 ]; then
-        echo "Failed tests:"
-        for test in "${FAILED_TESTS[@]}"; do
-            echo -e "  ${RED}✗${NC} $test"
-        done
+    # Execute the single tutorial test
+    if execute_tutorial_test "$TUTORIAL_PATH"; then
         echo ""
-        exit 1
+        echo "================================================================================"
+        echo -e "${GREEN}🎉 Test passed for: $TUTORIAL_PATH${NC}"
+        echo "================================================================================"
+        exit 0
     else
-        echo -e "${GREEN}🎉 All tests passed!${NC}"
         echo ""
-        exit 0
+        echo "================================================================================"
+        echo -e "${RED}❌ Test failed for: $TUTORIAL_PATH${NC}"
+        echo "================================================================================"
+        exit 1
     fi
 }
 
diff --git a/uv.lock b/uv.lock
index b2e419c4..82183068 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12, <4"
 resolution-markers = [
     "python_full_version >= '3.13'",