In [1]:
!pip install azure-monitor-opentelemetry

Collecting azure-monitor-opentelemetry
  Downloading azure_monitor_opentelemetry-1.6.5-py3-none-any.whl.metadata (21 kB)
Collecting azure-core<2.0.0,>=1.28.0 (from azure-monitor-opentelemetry)
  Using cached azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting azure-core-tracing-opentelemetry~=1.0.0b11 (from azure-monitor-opentelemetry)
  Downloading azure_core_tracing_opentelemetry-1.0.0b11-py3-none-any.whl.metadata (8.5 kB)
Collecting azure-monitor-opentelemetry-exporter~=1.0.0b31 (from azure-monitor-opentelemetry)
  Downloading azure_monitor_opentelemetry_exporter-1.0.0b34-py2.py3-none-any.whl.metadata (32 kB)
Collecting opentelemetry-instrumentation-django~=0.49b0 (from azure-monitor-opentelemetry)
  Downloading opentelemetry_instrumentation_django-0.51b0-py3-none-any.whl.metadata (2.3 kB)
Collecting opentelemetry-instrumentation-fastapi~=0.49b0 (from azure-monitor-opentelemetry)
  Downloading opentelemetry_instrumentation_fastapi-0.51b0-py3-none-any.whl.metadata (2.2 kB)

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
azureml-mlflow 1.56.0 requires azure-common<2.0.0,>=1.1, which is not installed.
azureml-mlflow 1.56.0 requires azure-identity, which is not installed.
azureml-mlflow 1.56.0 requires azure-mgmt-core<2.0.0,>=1.2.0, which is not installed.
azureml-mlflow 1.56.0 requires azure-storage-blob<=12.19.0,>=12.5.0, which is not installed.
azureml-mlflow 1.56.0 requires jsonpickle, which is not installed.
mlflow 2.14.2 requires cloudpickle<4, which is not installed.
mlflow 2.14.2 requires markdown<4,>=3.3, which is not installed.
mlflow 2.14.2 requires querystring-parser<2, which is not installed.
mlflow 2.14.2 requires scikit-learn<2, which is not installed.
mlflow 2.14.2 requires sqlparse<1,>=0.4.0, which is not installed.
mlflow 2.14.2 requires waitress<4; platform_system == "Windows", which is not installed.
mlflow-skinn

# Monitoring container health

In [None]:
import time
import os
import logging
import requests

from opentelemetry import trace, metrics
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader, ConsoleMetricExporter
from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter, AzureMonitorMetricExporter
from azure.monitor.opentelemetry import configure_azure_monitor
from opentelemetry.instrumentation.requests import RequestsInstrumentor

# Instrument the requests library (optional, for context propagation)
RequestsInstrumentor().instrument()

# Configure Azure Monitor via the Azure Monitor OpenTelemetry Distro.
# The connection string is pulled from the environment variable.
connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING", "<Your_Connection_String>")
configure_azure_monitor(connection_string=connection_string)

# Create a Resource to tag all telemetry with a service name.
resource = Resource.create(attributes={"service.name": "ContainerMonitor"})

# Set up the TracerProvider with Azure Monitor exporter and a console exporter for debugging.
tracer_provider = TracerProvider(resource=resource)
trace.set_tracer_provider(tracer_provider)
azure_trace_exporter = AzureMonitorTraceExporter(connection_string=connection_string)
tracer_provider.add_span_processor(BatchSpanProcessor(azure_trace_exporter))
# Optionally add a console exporter to see traces locally.
tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
tracer = trace.get_tracer(__name__)

# Set up the MeterProvider for metrics with a periodic exporting reader.
azure_metric_exporter = AzureMonitorMetricExporter(connection_string=connection_string)
metric_reader = PeriodicExportingMetricReader(azure_metric_exporter, export_interval_millis=60000)
# Optionally, add a ConsoleMetricExporter to view metrics in the console.
# metric_reader = PeriodicExportingMetricReader(ConsoleMetricExporter(), export_interval_millis=60000)
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
meter = metrics.get_meter(__name__)

# Define metrics: a histogram for response times and counters for healthy/unhealthy checks.
response_time_histogram = meter.create_histogram(
    name="container.response.time",
    unit="ms",
    description="Response time for container health check"
)
healthy_counter = meter.create_counter(
    name="container.healthy.count",
    unit="1",
    description="Number of healthy checks"
)
unhealthy_counter = meter.create_counter(
    name="container.unhealthy.count",
    unit="1",
    description="Number of unhealthy checks"
)

def monitor_container(container):
    """
    Monitor a container by performing a GET request to its health endpoint.
    Records the response time metric and logs status, while also creating a trace span.
    
    :param container: dict with keys 'name' and 'url'
    """
    container_name = container.get("name", "unknown")
    container_url = container.get("url")
    
    with tracer.start_as_current_span(f"HealthCheck-{container_name}") as span:
        span.set_attribute("container.name", container_name)
        span.set_attribute("container.url", container_url)
        
        start_time = time.time()
        try:
            response = requests.get(container_url, timeout=5)
            elapsed_ms = (time.time() - start_time) * 1000  # in milliseconds
            response_time_histogram.record(elapsed_ms, attributes={"container.name": container_name})
            
            if response.status_code == 200:
                span.set_attribute("health.status", "healthy")
                healthy_counter.add(1, attributes={"container.name": container_name})
                logging.info(f"{container_name} is healthy. Response time: {elapsed_ms:.2f} ms")
            else:
                span.set_attribute("health.status", "unhealthy")
                unhealthy_counter.add(1, attributes={"container.name": container_name})
                logging.warning(f"{container_name} is unhealthy. Status code: {response.status_code}. Response time: {elapsed_ms:.2f} ms")
        except Exception as ex:
            elapsed_ms = (time.time() - start_time) * 1000  # in milliseconds
            response_time_histogram.record(elapsed_ms, attributes={"container.name": container_name})
            span.record_exception(ex)
            span.set_attribute("health.status", "unhealthy")
            unhealthy_counter.add(1, attributes={"container.name": container_name})
            logging.error(f"{container_name} health check failed: {ex}. Response time: {elapsed_ms:.2f} ms")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # Define the containers to monitor.
    # Adjust the list below to add more containers (e.g. other services) as needed.
    containers = [
        {"name": "Fuseki", "url": "http://fuseki-container:3030/health"},
        {"name": "Virtuoso", "url": "http://virtuoso-container:8890/health"},
    ]
    
    # Run the health checks in a loop (e.g. every 60 seconds)
    while True:
        for container in containers:
            monitor_container(container)
        # Wait for 60 seconds before the next check.
        time.sleep(60)


# testing health probes

In [None]:
import requests

def check_health(endpoint_url):
    try:
        response = requests.get(endpoint_url)
        # A status code between 200 and 399 indicates success
        if 200 <= response.status_code < 400:
            print(f"Health check passed for {endpoint_url} with status code {response.status_code}.")
            return True
        else:
            print(f"Health check failed for {endpoint_url} with status code {response.status_code}.")
            return False
    except requests.RequestException as e:
        print(f"Health check encountered an error for {endpoint_url}: {e}")
        return False

endpoint_health = "https://container-app--58dpcq1.salmonbeach-9c034f96.northeurope.azurecontainerapps.io/health"
endpoint_startup = "https://container-app--58dpcq1.salmonbeach-9c034f96.northeurope.azurecontainerapps.io/startup"
endpoint_liveness = "https://container-app--58dpcq1.salmonbeach-9c034f96.northeurope.azurecontainerapps.io/liveness"

check_health(endpoint_health)


Health check passed for https://container-app--58dpcq1.salmonbeach-9c034f96.northeurope.azurecontainerapps.io/health with status code 200.


True