# Observability and Tracing with Manual OpenTelemetry

This notebook demonstrates how to implement tracing using the Microsoft Learn approach with explicit OpenTelemetry configuration and manual span creation.

In [None]:
import asyncio
import os
from azure.monitor.opentelemetry import configure_azure_monitor
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode

from agent_framework import Executor, WorkflowBuilder, WorkflowContext, handler

In [None]:
class StartExecutor(Executor):
    @handler  # type: ignore[misc]
    async def handle_input(self, message: str, ctx: WorkflowContext[str]) -> None:
        tracer = trace.get_tracer(__name__)
        with tracer.start_as_current_span("start_executor_process") as span:
            span.set_attribute("input.message", message)
            span.set_attribute("executor.id", "start")
            
            try:
                # Transform and forward downstream
                result = message.upper()
                span.set_attribute("output.message", result)
                await ctx.send_message(result)
                span.set_status(Status(StatusCode.OK))
            except Exception as e:
                span.set_status(Status(StatusCode.ERROR, str(e)))
                span.record_exception(e)
                raise

In [None]:
class EndExecutor(Executor):
    @handler  # type: ignore[misc]
    async def handle_final(self, message: str, ctx: WorkflowContext) -> None:
        tracer = trace.get_tracer(__name__)
        with tracer.start_as_current_span("end_executor_process") as span:
            span.set_attribute("final.message", message)
            span.set_attribute("executor.id", "end")
            
            try:
                print(f"Final result: {message}")
                span.set_status(Status(StatusCode.OK))
            except Exception as e:
                span.set_status(Status(StatusCode.ERROR, str(e)))
                span.record_exception(e)
                raise

In [None]:
async def main() -> None:
    # Configure Azure Monitor with explicit connection string
    connection_string = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
    if connection_string:
        configure_azure_monitor(connection_string=connection_string)
        print("Azure Monitor configured successfully")
    else:
        print("Warning: No Application Insights connection string found")
        print("Falling back to console exporter for local development")
        # You could fall back to console exporter for local development
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.sdk.trace.export import ConsoleSpanExporter, BatchSpanProcessor
        trace.set_tracer_provider(TracerProvider())
        trace.get_tracer_provider().add_span_processor(
            BatchSpanProcessor(ConsoleSpanExporter())
        )
    
    tracer = trace.get_tracer(__name__)
    
    with tracer.start_as_current_span("workflow_execution") as workflow_span:
        workflow_span.set_attribute("workflow.type", "simple_pipeline")
        workflow_span.set_attribute("workflow.nodes", 2)
        
        try:
            # Build workflow with explicit tracing
            with tracer.start_as_current_span("workflow_build") as build_span:
                workflow = (
                    WorkflowBuilder()
                    .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                    .set_start_executor("start")
                    .build()
                )
                build_span.set_status(Status(StatusCode.OK))
                print("Workflow built successfully")
            
            # Run workflow with explicit tracing
            with tracer.start_as_current_span("workflow_run") as run_span:
                input_message = "We are demonstrating Tracing to DVC"
                run_span.set_attribute("input.message", input_message)
                
                print(f"Running workflow with input: {input_message}")
                await workflow.run(input_message)
                run_span.set_status(Status(StatusCode.OK))
                print("Workflow completed successfully")
                
        except Exception as e:
            workflow_span.set_status(Status(StatusCode.ERROR, str(e)))
            workflow_span.record_exception(e)
            print(f"Error during workflow execution: {e}")
            raise

In [None]:
# Enhanced main function with client requests and interactive prompts
async def main_with_client() -> None:
    """Enhanced main function that creates a client and makes trackable requests"""
    import time
    
    # Configure Azure Monitor with explicit connection string
    connection_string = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
    if connection_string:
        configure_azure_monitor(connection_string=connection_string)
        print("✅ Azure Monitor configured successfully")
    else:
        print("⚠️  Warning: No Application Insights connection string found")
        print("🔄 Falling back to console exporter for local development")
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.sdk.trace.export import ConsoleSpanExporter, BatchSpanProcessor
        trace.set_tracer_provider(TracerProvider())
        trace.get_tracer_provider().add_span_processor(
            BatchSpanProcessor(ConsoleSpanExporter())
        )
    
    tracer = trace.get_tracer(__name__)
    
    # Get user input for requests
    print("\n🎯 Interactive Tracing Demo")
    print("=" * 50)
    
    # Prompt for number of requests
    try:
        num_requests = int(input("How many requests would you like to make? (1-5): ") or "3")
        num_requests = min(max(1, num_requests), 5)  # Limit between 1-5
    except ValueError:
        num_requests = 3
        print(f"Invalid input, using default: {num_requests}")
    
    # Prompt for request type
    request_type = input("""
Select request type:
1. Text transformation
2. Data processing
3. Business logic simulation
4. Custom workflow

Enter choice (1-4): """).strip() or "1"
    
    # Map request types
    request_types = {
        "1": "text_transformation",
        "2": "data_processing", 
        "3": "business_logic",
        "4": "custom_workflow"
    }
    
    selected_type = request_types.get(request_type, "text_transformation")
    print(f"🚀 Selected: {selected_type}")
    
    with tracer.start_as_current_span("client_session") as session_span:
        session_span.set_attribute("session.type", selected_type)
        session_span.set_attribute("session.request_count", num_requests)
        session_span.set_attribute("session.start_time", time.time())
        
        try:
            # Build workflow
            with tracer.start_as_current_span("workflow_initialization") as init_span:
                workflow = (
                    WorkflowBuilder()
                    .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                    .set_start_executor("start")
                    .build()
                )
                init_span.set_status(Status(StatusCode.OK))
                print("✅ Workflow initialized successfully")
            
            # Process multiple requests
            results = []
            for i in range(num_requests):
                with tracer.start_as_current_span(f"client_request_{i+1}") as request_span:
                    # Get user input for each request
                    if selected_type == "text_transformation":
                        user_input = input(f"\nRequest {i+1} - Enter text to transform: ") or f"Sample text {i+1}"
                        expected_output = user_input.upper()
                    elif selected_type == "data_processing":
                        user_input = input(f"\nRequest {i+1} - Enter data to process: ") or f"data_item_{i+1}"
                        expected_output = f"PROCESSED_{user_input}"
                    elif selected_type == "business_logic":
                        user_input = input(f"\nRequest {i+1} - Enter business scenario: ") or f"scenario_{i+1}"
                        expected_output = f"BUSINESS_RESULT_{user_input.upper()}"
                    else:
                        user_input = input(f"\nRequest {i+1} - Enter custom input: ") or f"custom_input_{i+1}"
                        expected_output = user_input.upper()
                    
                    # Add request attributes
                    request_span.set_attribute("request.number", i+1)
                    request_span.set_attribute("request.input", user_input)
                    request_span.set_attribute("request.input_length", len(user_input))
                    request_span.set_attribute("request.type", selected_type)
                    request_span.set_attribute("request.timestamp", time.time())
                    
                    print(f"🔄 Processing request {i+1}: '{user_input}'")
                    
                    try:
                        # Process the request through workflow
                        start_time = time.time()
                        await workflow.run(user_input)
                        end_time = time.time()
                        
                        # Record successful result
                        processing_time = end_time - start_time
                        request_span.set_attribute("request.processing_time_ms", processing_time * 1000)
                        request_span.set_attribute("request.status", "success")
                        request_span.set_attribute("request.expected_output", expected_output)
                        request_span.set_status(Status(StatusCode.OK))
                        
                        results.append({
                            "request_id": i+1,
                            "input": user_input,
                            "expected_output": expected_output,
                            "processing_time": processing_time,
                            "status": "success"
                        })
                        
                        print(f"✅ Request {i+1} completed in {processing_time*1000:.2f}ms")
                        
                    except Exception as e:
                        # Record error
                        request_span.set_attribute("request.status", "error")
                        request_span.set_attribute("request.error_message", str(e))
                        request_span.set_status(Status(StatusCode.ERROR, str(e)))
                        request_span.record_exception(e)
                        
                        results.append({
                            "request_id": i+1,
                            "input": user_input,
                            "status": "error",
                            "error": str(e)
                        })
                        
                        print(f"❌ Request {i+1} failed: {e}")
            
            # Session summary
            session_span.set_attribute("session.end_time", time.time())
            session_span.set_attribute("session.total_requests", len(results))
            session_span.set_attribute("session.successful_requests", sum(1 for r in results if r["status"] == "success"))
            session_span.set_attribute("session.failed_requests", sum(1 for r in results if r["status"] == "error"))
            
            # Print summary
            print(f"\n📊 Session Summary:")
            print(f"   Total requests: {len(results)}")
            print(f"   Successful: {sum(1 for r in results if r['status'] == 'success')}")
            print(f"   Failed: {sum(1 for r in results if r['status'] == 'error')}")
            
            if any(r["status"] == "success" for r in results):
                avg_time = sum(r.get("processing_time", 0) for r in results if r["status"] == "success") / sum(1 for r in results if r["status"] == "success")
                print(f"   Average processing time: {avg_time*1000:.2f}ms")
                session_span.set_attribute("session.average_processing_time_ms", avg_time * 1000)
            
            session_span.set_status(Status(StatusCode.OK))
            print("🎉 Client session completed successfully!")
            
        except Exception as e:
            session_span.set_status(Status(StatusCode.ERROR, str(e)))
            session_span.record_exception(e)
            print(f"❌ Session failed: {e}")
            raise

In [None]:
# Run the enhanced main function with client interactions
print("🚀 Starting interactive client session...")
await main_with_client()

## Client Request Simulation

The enhanced main function now includes:

- **Interactive Prompts**: Users can specify the number and type of requests
- **Request Types**: Text transformation, data processing, business logic, or custom workflows
- **Input/Output Logging**: Each request logs input, expected output, processing time, and status
- **Session Tracking**: Overall session metrics including success rates and performance
- **Detailed Spans**: Each request creates its own span with comprehensive attributes

### Traced Attributes Include:
- `request.input` - User input data
- `request.input_length` - Length of input
- `request.processing_time_ms` - Time taken to process
- `request.status` - Success or error status
- `session.request_count` - Total requests in session
- `session.average_processing_time_ms` - Average processing time

In [None]:
# Alternative: Automated client requests for testing
async def automated_client_demo():
    """Run automated requests without user input for quick testing"""
    import time
    import random
    
    # Configure tracing
    connection_string = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
    if connection_string:
        configure_azure_monitor(connection_string=connection_string)
    else:
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.sdk.trace.export import ConsoleSpanExporter, BatchSpanProcessor
        trace.set_tracer_provider(TracerProvider())
        trace.get_tracer_provider().add_span_processor(
            BatchSpanProcessor(ConsoleSpanExporter())
        )
    
    tracer = trace.get_tracer(__name__)
    
    # Sample data for automated testing
    test_inputs = [
        ("hello world", "text_transformation"),
        ("process this data", "data_processing"),
        ("calculate quarterly revenue", "business_logic"),
        ("analyze customer feedback", "custom_workflow"),
        ("transform user input", "text_transformation")
    ]
    
    with tracer.start_as_current_span("automated_client_session") as session_span:
        session_span.set_attribute("session.mode", "automated")
        session_span.set_attribute("session.test_data_count", len(test_inputs))
        
        workflow = (
            WorkflowBuilder()
            .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
            .set_start_executor("start")
            .build()
        )
        
        print("🤖 Running automated client demo...")
        
        for i, (input_text, request_type) in enumerate(test_inputs):
            with tracer.start_as_current_span(f"automated_request_{i+1}") as request_span:
                # Add simulated client metadata
                request_span.set_attribute("client.id", f"client_{random.randint(1000, 9999)}")
                request_span.set_attribute("client.request_id", f"req_{i+1}_{int(time.time())}")
                request_span.set_attribute("request.input", input_text)
                request_span.set_attribute("request.type", request_type)
                request_span.set_attribute("request.automated", True)
                
                # Add business context
                if request_type == "business_logic":
                    request_span.set_attribute("business.department", random.choice(["finance", "marketing", "operations"]))
                    request_span.set_attribute("business.priority", random.choice(["high", "medium", "low"]))
                elif request_type == "data_processing":
                    request_span.set_attribute("data.size_bytes", len(input_text) * 8)
                    request_span.set_attribute("data.format", "text")
                
                try:
                    print(f"  Processing: '{input_text}' ({request_type})")
                    start_time = time.time()
                    
                    # Simulate processing time variation
                    if request_type == "business_logic":
                        await asyncio.sleep(0.1)  # Simulate longer processing
                    
                    await workflow.run(input_text)
                    
                    processing_time = time.time() - start_time
                    request_span.set_attribute("request.processing_time_ms", processing_time * 1000)
                    request_span.set_attribute("request.status", "success")
                    request_span.set_status(Status(StatusCode.OK))
                    
                    print(f"  ✅ Completed in {processing_time*1000:.2f}ms")
                    
                except Exception as e:
                    request_span.set_attribute("request.status", "error")
                    request_span.set_attribute("request.error", str(e))
                    request_span.set_status(Status(StatusCode.ERROR, str(e)))
                    print(f"  ❌ Failed: {e}")
        
        session_span.set_status(Status(StatusCode.OK))
        print("🎉 Automated demo completed!")

# Run automated demo
print("Running automated client requests for quick testing...")
await automated_client_demo()

In [None]:
# Advanced Client with Error Simulation
async def advanced_client_demo():
    """Advanced client demo with error scenarios and complex tracing"""
    import time
    import random
    
    tracer = trace.get_tracer(__name__)
    
    print("🔬 Advanced Client Demo with Error Scenarios")
    print("=" * 50)
    
    # Prompt for demo type
    demo_choice = input("""
Select advanced demo:
1. Error handling simulation
2. Performance stress test  
3. Business workflow simulation
4. All scenarios

Enter choice (1-4): """).strip() or "1"
    
    with tracer.start_as_current_span("advanced_client_session") as session_span:
        session_span.set_attribute("demo.type", demo_choice)
        session_span.set_attribute("demo.advanced", True)
        
        workflow = (
            WorkflowBuilder()
            .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
            .set_start_executor("start")
            .build()
        )
        
        if demo_choice in ["1", "4"]:
            print("\n🧪 Testing Error Handling...")
            error_scenarios = [
                ("normal input", False),
                ("", True),  # Empty input
                ("a" * 10000, True),  # Very long input  
                ("special chars: !@#$%^&*()", False),
                (None, True)  # Null input
            ]
            
            for i, (test_input, should_error) in enumerate(error_scenarios):
                with tracer.start_as_current_span(f"error_test_{i+1}") as error_span:
                    error_span.set_attribute("test.input", str(test_input) if test_input else "None")
                    error_span.set_attribute("test.expected_error", should_error)
                    error_span.set_attribute("test.scenario", "error_handling")
                    
                    try:
                        if test_input is None:
                            raise ValueError("Null input not allowed")
                        if len(str(test_input)) > 1000:
                            raise ValueError("Input too long")
                        if test_input == "":
                            raise ValueError("Empty input not allowed")
                            
                        await workflow.run(str(test_input))
                        error_span.set_attribute("test.result", "success")
                        error_span.set_status(Status(StatusCode.OK))
                        print(f"  ✅ Test {i+1}: Success with '{str(test_input)[:20]}...'")
                        
                    except Exception as e:
                        error_span.set_attribute("test.result", "error")
                        error_span.set_attribute("test.error_message", str(e))
                        error_span.set_status(Status(StatusCode.ERROR, str(e)))
                        error_span.record_exception(e)
                        print(f"  ❌ Test {i+1}: Expected error - {e}")
        
        if demo_choice in ["2", "4"]:
            print("\n⚡ Performance Stress Test...")
            num_concurrent = 3
            
            async def stress_request(request_id: int):
                with tracer.start_as_current_span(f"stress_request_{request_id}") as stress_span:
                    stress_span.set_attribute("stress.request_id", request_id)
                    stress_span.set_attribute("stress.concurrent_requests", num_concurrent)
                    
                    # Simulate random processing time
                    processing_delay = random.uniform(0.1, 0.5)
                    stress_span.set_attribute("stress.simulated_delay", processing_delay)
                    
                    await asyncio.sleep(processing_delay)
                    await workflow.run(f"Stress test message {request_id}")
                    
                    stress_span.set_status(Status(StatusCode.OK))
                    return request_id
            
            # Run concurrent requests
            start_time = time.time()
            tasks = [stress_request(i) for i in range(num_concurrent)]
            results = await asyncio.gather(*tasks, return_exceptions=True)
            end_time = time.time()
            
            session_span.set_attribute("stress.total_time", end_time - start_time)
            session_span.set_attribute("stress.concurrent_count", num_concurrent)
            print(f"  🏁 Completed {len(results)} concurrent requests in {end_time - start_time:.2f}s")
        
        if demo_choice in ["3", "4"]:
            print("\n💼 Business Workflow Simulation...")
            business_workflows = [
                ("process customer order", "order_management", "high"),
                ("generate monthly report", "reporting", "medium"),
                ("validate user credentials", "authentication", "high"),
                ("backup user data", "data_management", "low")
            ]
            
            for workflow_name, category, priority in business_workflows:
                with tracer.start_as_current_span(f"business_workflow") as biz_span:
                    biz_span.set_attribute("business.workflow_name", workflow_name)
                    biz_span.set_attribute("business.category", category)
                    biz_span.set_attribute("business.priority", priority)
                    biz_span.set_attribute("business.user_id", f"user_{random.randint(1000, 9999)}")
                    biz_span.set_attribute("business.department", random.choice(["sales", "finance", "IT", "HR"]))
                    
                    # Simulate business logic complexity
                    if priority == "high":
                        biz_span.set_attribute("business.sla_seconds", 5)
                    elif priority == "medium":
                        biz_span.set_attribute("business.sla_seconds", 30)
                    else:
                        biz_span.set_attribute("business.sla_seconds", 300)
                    
                    await workflow.run(workflow_name)
                    biz_span.set_status(Status(StatusCode.OK))
                    print(f"  💼 Completed: {workflow_name} ({priority} priority)")
        
        session_span.set_status(Status(StatusCode.OK))
        print("\n🎉 Advanced demo completed!")

# Interactive choice for advanced demo
run_advanced = input("\nWould you like to run the advanced client demo? (y/n): ").lower().startswith('y')
if run_advanced:
    await advanced_client_demo()
else:
    print("Skipping advanced demo. You can run it later by executing the cell above.")

## Summary of Client Request Features

Your notebook now includes three different client simulation approaches:

### 🎯 Interactive Client (`main_with_client()`)
- **User Input**: Prompts for number of requests and request types
- **Request Types**: Text transformation, data processing, business logic, custom workflows
- **Real-time Feedback**: Shows processing status and timing for each request
- **Session Metrics**: Tracks success rates, average processing time, and total requests

### 🤖 Automated Client (`automated_client_demo()`)
- **Predefined Test Data**: Uses sample inputs for quick testing
- **Simulated Metadata**: Adds client IDs, request IDs, and business context
- **Performance Variation**: Simulates different processing times based on request type
- **Consistent Testing**: Great for reproducible testing scenarios

### 🔬 Advanced Client (`advanced_client_demo()`)
- **Error Scenarios**: Tests handling of null, empty, and oversized inputs
- **Stress Testing**: Runs concurrent requests to test performance
- **Business Workflows**: Simulates real business processes with priorities and SLAs
- **Comprehensive Tracing**: Includes detailed business context and error handling

### 📊 Traced Attributes Include:
- **Request Level**: Input data, processing time, status, error messages
- **Session Level**: Total requests, success rates, average performance
- **Business Level**: Department, priority, SLA requirements, user context
- **Technical Level**: Client IDs, request IDs, processing delays, error types

All requests create detailed spans that can be viewed in Azure Application Insights or console output for debugging and monitoring purposes.

In [None]:
# Run the main function
await main()

## Environment Setup

To use Azure Application Insights, set the following environment variable:

```bash
APPLICATIONINSIGHTS_CONNECTION_STRING="InstrumentationKey=your-key;IngestionEndpoint=https://your-region.in.applicationinsights.azure.com/"
```

If this environment variable is not set, the code will fall back to console output for tracing data.

In [None]:
# Optional: Check current environment variables
print("Current tracing configuration:")
app_insights_conn = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
if app_insights_conn:
    print(f"Application Insights: Configured (Connection string starts with: {app_insights_conn[:50]}...)")
else:
    print("Application Insights: Not configured (using console output)")

## AI Foundry Portal Tracing Setup

**Issue**: You see traces in Application Insights but not in AI Foundry portal because they use different tracing endpoints.

- **Application Insights**: Uses Azure Monitor with `configure_azure_monitor()`
- **AI Foundry Portal**: Uses local OpenTelemetry collector at `http://localhost:4317`

The solution is to configure the agent framework's built-in observability setup instead of Azure Monitor.

In [None]:
# AI Foundry Portal Tracing Configuration
async def main_with_foundry_tracing() -> None:
    """Configure tracing for AI Foundry portal instead of Azure Monitor"""
    import time
    
    # Use agent framework's built-in observability setup for AI Foundry
    from agent_framework.observability import setup_observability
    
    print("🔧 Configuring tracing for AI Foundry portal...")
    
    # Configure for AI Toolkit's local OpenTelemetry collector
    setup_observability(
        otlp_endpoint="http://localhost:4317",  # AI Toolkit gRPC endpoint
        enable_sensitive_data=True  # Enable capturing prompts and completions
    )
    
    print("✅ AI Foundry tracing configured successfully!")
    print("📊 Traces will now appear in the AI Foundry portal tracing tab")
    
    # Get tracer
    tracer = trace.get_tracer(__name__)
    
    # Get user input for testing
    print("\n🎯 AI Foundry Tracing Demo")
    print("=" * 40)
    
    test_message = input("Enter a message to process: ") or "AI Foundry tracing test"
    
    with tracer.start_as_current_span("ai_foundry_workflow_execution") as workflow_span:
        workflow_span.set_attribute("foundry.portal", "enabled")
        workflow_span.set_attribute("workflow.type", "ai_foundry_demo")
        workflow_span.set_attribute("input.message", test_message)
        workflow_span.set_attribute("timestamp", time.time())
        
        try:
            # Build workflow
            with tracer.start_as_current_span("foundry_workflow_build") as build_span:
                workflow = (
                    WorkflowBuilder()
                    .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                    .set_start_executor("start")
                    .build()
                )
                build_span.set_attribute("workflow.nodes", 2)
                build_span.set_status(Status(StatusCode.OK))
                print("✅ Workflow built for AI Foundry")
            
            # Run workflow
            with tracer.start_as_current_span("foundry_workflow_run") as run_span:
                run_span.set_attribute("execution.mode", "ai_foundry")
                run_span.set_attribute("input.text", test_message)
                
                print(f"🚀 Processing: '{test_message}'")
                start_time = time.time()
                
                await workflow.run(test_message)
                
                processing_time = time.time() - start_time
                run_span.set_attribute("processing.time_ms", processing_time * 1000)
                run_span.set_status(Status(StatusCode.OK))
                
                print(f"✅ Completed in {processing_time*1000:.2f}ms")
            
            workflow_span.set_status(Status(StatusCode.OK))
            print("\n🎉 AI Foundry tracing demo completed!")
            print("📊 Check the AI Foundry portal tracing tab to see the traces")
            
        except Exception as e:
            workflow_span.set_status(Status(StatusCode.ERROR, str(e)))
            workflow_span.record_exception(e)
            print(f"❌ Error: {e}")
            raise

# Run AI Foundry tracing demo
print("🔧 Setting up tracing for AI Foundry portal...")
await main_with_foundry_tracing()

In [None]:
# Multiple Tracing Destinations Demo
async def dual_tracing_demo():
    """Demo showing how to send traces to both Azure Monitor and AI Foundry"""
    import time
    from opentelemetry.sdk.trace import TracerProvider
    from opentelemetry.sdk.trace.export import BatchSpanProcessor
    from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
    from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter
    
    print("🔄 Setting up dual tracing (Azure Monitor + AI Foundry)...")
    
    # Create tracer provider
    tracer_provider = TracerProvider()
    
    # Add AI Foundry OTLP exporter
    otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317")
    tracer_provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
    print("✅ AI Foundry OTLP exporter configured")
    
    # Add Azure Monitor exporter if connection string is available
    app_insights_conn = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
    if app_insights_conn:
        azure_exporter = AzureMonitorTraceExporter(connection_string=app_insights_conn)
        tracer_provider.add_span_processor(BatchSpanProcessor(azure_exporter))
        print("✅ Azure Monitor exporter configured")
    else:
        print("⚠️  Azure Monitor not configured (no connection string)")
    
    # Set the tracer provider
    trace.set_tracer_provider(tracer_provider)
    tracer = trace.get_tracer(__name__)
    
    # Demo with dual tracing
    test_message = input("\nEnter message for dual tracing demo: ") or "Dual tracing test"
    
    with tracer.start_as_current_span("dual_tracing_workflow") as workflow_span:
        workflow_span.set_attribute("tracing.destinations", "azure_monitor,ai_foundry")
        workflow_span.set_attribute("demo.type", "dual_tracing")
        workflow_span.set_attribute("input.message", test_message)
        
        try:
            workflow = (
                WorkflowBuilder()
                .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                .set_start_executor("start")
                .build()
            )
            
            print(f"\n🚀 Processing '{test_message}' with dual tracing...")
            start_time = time.time()
            
            with tracer.start_as_current_span("dual_workflow_execution") as exec_span:
                exec_span.set_attribute("execution.dual_trace", True)
                await workflow.run(test_message)
                
                processing_time = time.time() - start_time
                exec_span.set_attribute("processing.time_ms", processing_time * 1000)
                exec_span.set_status(Status(StatusCode.OK))
            
            workflow_span.set_status(Status(StatusCode.OK))
            
            print(f"✅ Completed in {processing_time*1000:.2f}ms")
            print("\n📊 Traces sent to:")
            print("   • AI Foundry portal (localhost:4317)")
            if app_insights_conn:
                print("   • Azure Application Insights")
            print("\n🔍 Check both locations to see the traces!")
            
        except Exception as e:
            workflow_span.set_status(Status(StatusCode.ERROR, str(e)))
            workflow_span.record_exception(e)
            print(f"❌ Error: {e}")
            raise

# Choice for dual tracing
setup_dual = input("\nWould you like to set up dual tracing (Azure + AI Foundry)? (y/n): ").lower().startswith('y')
if setup_dual:
    await dual_tracing_demo()
else:
    print("Skipping dual tracing setup. Use the cell above to run it later.")

## Why You Don't See Traces in AI Foundry Portal

### The Problem:
Your code uses **Azure Monitor tracing** (`configure_azure_monitor()`), which sends traces to Azure Application Insights. However, the **AI Foundry portal tracing tab** expects traces to be sent to its local OpenTelemetry collector.

### Two Different Tracing Systems:

1. **Azure Application Insights** (what you're currently using):
   - Endpoint: Azure cloud service
   - Configuration: `configure_azure_monitor(connection_string=...)`
   - View traces: Azure portal → Application Insights → Transaction search

2. **AI Foundry Portal** (what you need for the portal):
   - Endpoint: `http://localhost:4317` (local OTLP collector)
   - Configuration: `setup_observability(otlp_endpoint="http://localhost:4317")`
   - View traces: AI Foundry portal → Tracing tab

### Solutions:

- **Option 1**: Use cell above for AI Foundry-only tracing
- **Option 2**: Use dual tracing to send to both destinations
- **Option 3**: Switch between configurations based on your needs

The AI Toolkit tracing page has been opened and is ready to receive traces from the local collector!

In [None]:
# Comprehensive AI Foundry Tracing Troubleshooting
async def debug_foundry_tracing():
    """Comprehensive debugging to identify why traces aren't appearing in AI Foundry"""
    import time
    import socket
    import requests
    
    print("🔍 AI Foundry Tracing Troubleshooting")
    print("=" * 50)
    
    # Step 1: Check if AI Toolkit tracing collector is running
    print("1. Checking AI Toolkit tracing collector status...")
    
    def check_port(host, port):
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.settimeout(2)
            result = sock.connect_ex((host, port))
            sock.close()
            return result == 0
        except:
            return False
    
    # Check both HTTP and gRPC endpoints
    http_port_open = check_port('localhost', 4318)
    grpc_port_open = check_port('localhost', 4317)
    
    print(f"   • HTTP endpoint (4318): {'✅ Open' if http_port_open else '❌ Closed'}")
    print(f"   • gRPC endpoint (4317): {'✅ Open' if grpc_port_open else '❌ Closed'}")
    
    if not (http_port_open or grpc_port_open):
        print("⚠️  AI Toolkit collector not running!")
        print("💡 Solution: Open AI Toolkit and start the tracing collector")
        return
    
    # Step 2: Test basic OTLP connectivity
    print("\n2. Testing OTLP connectivity...")
    
    try:
        from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.sdk.trace.export import BatchSpanProcessor
        from opentelemetry import trace
        
        # Create a fresh tracer provider for testing
        test_provider = TracerProvider()
        
        # Try HTTP first, then gRPC
        if http_port_open:
            endpoint = "http://localhost:4318/v1/traces"
            print(f"   Testing HTTP endpoint: {endpoint}")
            from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
            exporter = HTTPExporter(endpoint=endpoint)
        else:
            endpoint = "http://localhost:4317"
            print(f"   Testing gRPC endpoint: {endpoint}")
            exporter = OTLPSpanExporter(endpoint=endpoint, insecure=True)
        
        test_provider.add_span_processor(BatchSpanProcessor(exporter))
        trace.set_tracer_provider(test_provider)
        
        # Create a test span
        tracer = trace.get_tracer("foundry_test")
        with tracer.start_as_current_span("connectivity_test") as span:
            span.set_attribute("test.type", "connectivity")
            span.set_attribute("timestamp", time.time())
            print("   ✅ Test span created successfully")
        
        # Force export
        test_provider.force_flush()
        print("   ✅ Traces exported to AI Foundry")
        
    except Exception as e:
        print(f"   ❌ OTLP connection failed: {e}")
        print("💡 Try restarting AI Toolkit or check firewall settings")
        return
    
    # Step 3: Test with agent framework setup
    print("\n3. Testing agent framework observability setup...")
    
    try:
        from agent_framework.observability import setup_observability
        
        # Reset any existing configuration
        trace.set_tracer_provider(None)
        
        # Configure with agent framework
        if grpc_port_open:
            setup_observability(
                otlp_endpoint="http://localhost:4317",
                enable_sensitive_data=True,
                resource_attributes={
                    "service.name": "foundry-tracing-test",
                    "service.version": "1.0.0"
                }
            )
            print("   ✅ Agent framework configured for gRPC")
        elif http_port_open:
            setup_observability(
                otlp_endpoint="http://localhost:4318/v1/traces",
                enable_sensitive_data=True,
                resource_attributes={
                    "service.name": "foundry-tracing-test",
                    "service.version": "1.0.0"
                }
            )
            print("   ✅ Agent framework configured for HTTP")
        
    except Exception as e:
        print(f"   ❌ Agent framework setup failed: {e}")
        return
    
    # Step 4: Create workflow and test end-to-end
    print("\n4. Testing end-to-end workflow with AI Foundry tracing...")
    
    try:
        tracer = trace.get_tracer("foundry_workflow_test")
        
        with tracer.start_as_current_span("foundry_e2e_test") as main_span:
            main_span.set_attribute("test.scenario", "end_to_end")
            main_span.set_attribute("foundry.enabled", True)
            main_span.set_attribute("service.name", "foundry-tracing-test")
            
            # Build and run workflow
            workflow = (
                WorkflowBuilder()
                .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                .set_start_executor("start")
                .build()
            )
            
            test_input = "AI Foundry debugging test"
            print(f"   Processing: '{test_input}'")
            
            start_time = time.time()
            await workflow.run(test_input)
            processing_time = time.time() - start_time
            
            main_span.set_attribute("processing.time_ms", processing_time * 1000)
            main_span.set_attribute("test.status", "success")
            
            print(f"   ✅ Workflow completed in {processing_time*1000:.2f}ms")
        
        # Force flush to ensure traces are sent
        if hasattr(trace.get_tracer_provider(), 'force_flush'):
            trace.get_tracer_provider().force_flush()
            print("   ✅ Traces flushed to AI Foundry")
        
        print("\n🎉 All tests passed!")
        print("\n📊 Traces should now appear in AI Foundry portal:")
        print("   1. Open AI Foundry portal")
        print("   2. Go to the Tracing tab")
        print("   3. Look for traces with service name 'foundry-tracing-test'")
        print("   4. Refresh the page if needed (traces may take 10-30 seconds)")
        
    except Exception as e:
        print(f"   ❌ End-to-end test failed: {e}")
        print(f"   Error details: {type(e).__name__}: {str(e)}")

# Run comprehensive debugging
print("Starting comprehensive AI Foundry tracing troubleshooting...")
await debug_foundry_tracing()

In [None]:
# Quick Fix: Simplified AI Foundry Tracing
async def simple_foundry_test():
    """Simplified test to get traces working in AI Foundry"""
    print("🚀 Quick AI Foundry Tracing Test")
    print("=" * 35)
    
    # Method 1: Manual OTLP setup (most reliable)
    from opentelemetry import trace
    from opentelemetry.sdk.trace import TracerProvider
    from opentelemetry.sdk.trace.export import BatchSpanProcessor
    from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
    from opentelemetry.sdk.resources import Resource
    
    # Create resource with service name
    resource = Resource.create({
        "service.name": "ai-foundry-test",
        "service.version": "1.0.0",
        "deployment.environment": "development"
    })
    
    # Create tracer provider
    provider = TracerProvider(resource=resource)
    
    # Use HTTP endpoint (more reliable than gRPC)
    http_exporter = OTLPSpanExporter(
        endpoint="http://localhost:4318/v1/traces",
        headers={}
    )
    
    provider.add_span_processor(BatchSpanProcessor(http_exporter))
    trace.set_tracer_provider(provider)
    
    print("✅ Manual OTLP configuration complete")
    
    # Create and run a simple test
    tracer = trace.get_tracer(__name__)
    
    with tracer.start_as_current_span("ai_foundry_simple_test") as span:
        span.set_attribute("test.framework", "manual_otlp")
        span.set_attribute("endpoint", "localhost:4318")
        span.set_attribute("timestamp", int(time.time()))
        
        # Simple workflow test
        print("Running simple workflow...")
        workflow = (
            WorkflowBuilder()
            .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
            .set_start_executor("start")
            .build()
        )
        
        await workflow.run("Simple foundry test")
        span.set_attribute("workflow.status", "completed")
        
    # Force export
    provider.force_flush()
    print("✅ Test completed and traces sent")
    print("📊 Check AI Foundry portal for 'ai-foundry-test' service")

# Run simple test
print("Running simplified AI Foundry test...")
await simple_foundry_test()

## Common Issues & Solutions for AI Foundry Tracing

### 🔍 **Most Likely Causes:**

1. **AI Toolkit Collector Not Running**
   - The tracing collector needs to be actively running
   - Check ports 4317 (gRPC) and 4318 (HTTP) are open

2. **Wrong Endpoint Configuration** 
   - AI Foundry expects: `http://localhost:4318/v1/traces` (HTTP) or `http://localhost:4317` (gRPC)
   - Your code was using Azure Monitor instead

3. **Service Name Missing**
   - AI Foundry groups traces by service name
   - Without it, traces may not appear correctly

4. **Trace Export Timing**
   - Traces are batched and may take 10-30 seconds to appear
   - Need to call `force_flush()` to ensure immediate export

### ✅ **Troubleshooting Steps:**

Run the debugging cell above to:
- ✅ Check if AI Toolkit collector is running
- ✅ Test OTLP connectivity  
- ✅ Verify trace export
- ✅ Run end-to-end workflow test

### 🚀 **Quick Test:**

The simplified test cell uses the most reliable configuration:
- HTTP endpoint (more stable than gRPC)
- Proper service name and resource attributes
- Force flush to ensure traces are sent immediately

## Environment Configuration (.env file)

Create a `.env` file in your project root with the following tracing configurations:

```bash
# AI Foundry / AI Toolkit Tracing (Local)
OTLP_ENDPOINT=http://localhost:4318/v1/traces
OTLP_GRPC_ENDPOINT=http://localhost:4317
SERVICE_NAME=ai-foundry-tracing
SERVICE_VERSION=1.0.0
ENVIRONMENT=development

# Azure Application Insights (Cloud) - Optional
APPLICATIONINSIGHTS_CONNECTION_STRING=InstrumentationKey=your-key;IngestionEndpoint=https://your-region.in.applicationinsights.azure.com/

# Tracing Configuration
ENABLE_SENSITIVE_DATA=true
TRACE_EXPORT_TIMEOUT=30
```

### Environment Variable Priority:
1. **For AI Foundry Portal**: Use `OTLP_ENDPOINT` 
2. **For Azure Monitor**: Use `APPLICATIONINSIGHTS_CONNECTION_STRING`
3. **For Both**: Configure dual tracing in code

In [None]:
# Environment-based Tracing Configuration
async def env_based_tracing():
    """Configure tracing based on environment variables"""
    import os
    from dotenv import load_dotenv
    
    print("🔧 Environment-based Tracing Configuration")
    print("=" * 50)
    
    # Load environment variables from .env file
    try:
        load_dotenv()
        print("✅ .env file loaded")
    except:
        print("⚠️  No .env file found, using system environment variables")
    
    # Get tracing configuration from environment
    otlp_endpoint = os.getenv('OTLP_ENDPOINT', 'http://localhost:4318/v1/traces')
    otlp_grpc_endpoint = os.getenv('OTLP_GRPC_ENDPOINT', 'http://localhost:4317')
    service_name = os.getenv('SERVICE_NAME', 'ai-foundry-service')
    service_version = os.getenv('SERVICE_VERSION', '1.0.0')
    environment = os.getenv('ENVIRONMENT', 'development')
    enable_sensitive = os.getenv('ENABLE_SENSITIVE_DATA', 'true').lower() == 'true'
    app_insights_conn = os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')
    
    print(f"\n📋 Configuration from environment:")
    print(f"   • OTLP HTTP Endpoint: {otlp_endpoint}")
    print(f"   • OTLP gRPC Endpoint: {otlp_grpc_endpoint}")
    print(f"   • Service Name: {service_name}")
    print(f"   • Service Version: {service_version}")
    print(f"   • Environment: {environment}")
    print(f"   • Sensitive Data: {enable_sensitive}")
    print(f"   • Azure Monitor: {'✅ Configured' if app_insights_conn else '❌ Not configured'}")
    
    # Configure tracing based on environment
    from opentelemetry import trace
    from opentelemetry.sdk.trace import TracerProvider
    from opentelemetry.sdk.trace.export import BatchSpanProcessor
    from opentelemetry.sdk.resources import Resource
    
    # Create resource with environment variables
    resource = Resource.create({
        "service.name": service_name,
        "service.version": service_version,
        "deployment.environment": environment
    })
    
    # Create tracer provider
    provider = TracerProvider(resource=resource)
    
    # Add OTLP exporter for AI Foundry
    try:
        from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
        otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
        provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
        print(f"✅ OTLP exporter configured for: {otlp_endpoint}")
    except Exception as e:
        print(f"❌ OTLP configuration failed: {e}")
    
    # Add Azure Monitor exporter if configured
    if app_insights_conn:
        try:
            from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter
            azure_exporter = AzureMonitorTraceExporter(connection_string=app_insights_conn)
            provider.add_span_processor(BatchSpanProcessor(azure_exporter))
            print("✅ Azure Monitor exporter configured")
        except Exception as e:
            print(f"❌ Azure Monitor configuration failed: {e}")
    
    # Set the tracer provider
    trace.set_tracer_provider(provider)
    
    # Test the configuration
    tracer = trace.get_tracer(__name__)
    
    test_message = input("\nEnter test message: ") or "Environment-based tracing test"
    
    with tracer.start_as_current_span("env_based_workflow") as span:
        span.set_attribute("config.source", "environment_variables")
        span.set_attribute("service.name", service_name)
        span.set_attribute("test.input", test_message)
        
        try:
            # Build and run workflow
            workflow = (
                WorkflowBuilder()
                .add_edge(StartExecutor(id="start"), EndExecutor(id="end"))
                .set_start_executor("start")
                .build()
            )
            
            print(f"\n🚀 Processing: '{test_message}'")
            await workflow.run(test_message)
            
            span.set_attribute("workflow.status", "success")
            print("✅ Workflow completed successfully")
            
        except Exception as e:
            span.set_attribute("workflow.status", "error")
            span.set_attribute("error.message", str(e))
            print(f"❌ Workflow failed: {e}")
            raise
    
    # Force flush traces
    provider.force_flush()
    
    print(f"\n📊 Traces sent to:")
    print(f"   • AI Foundry: {otlp_endpoint}")
    if app_insights_conn:
        print("   • Azure Monitor: Application Insights")
    
    print(f"\n🔍 Check AI Foundry portal for service: '{service_name}'")

# Run environment-based configuration
print("Configuring tracing from environment variables...")
await env_based_tracing()