-
Notifications
You must be signed in to change notification settings - Fork 67
Adding core OTEL layer with accompanying sample and tests #342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| ## DESIGN | ||
| # This design is similar to how error codes are implemented and maintained. | ||
| # The alternative was to inject all of this telemetry logic inline with the code it instruments. | ||
| # While some spans are simple, others require more involved mapping of attributes or | ||
| # even emitting metrics. | ||
| # | ||
| # This design hides the "mess" of telemetry to one location rather than throughout the codebase. | ||
| # | ||
| # NOTE: this module should not be auto-loaded from __init__.py in order to avoid | ||
|
|
||
| from . import attributes | ||
| from .core import ( | ||
| agents_telemetry, | ||
| SERVICE_NAME, | ||
| SERVICE_VERSION, | ||
| RESOURCE, | ||
| AttributeMap, | ||
| BaseSpanWrapper, | ||
| SimpleSpanWrapper, | ||
| ) | ||
|
|
||
| from .utils import ( | ||
| format_scopes, | ||
| get_conversation_id, | ||
| get_delivery_mode, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "attributes", | ||
| "agents_telemetry", | ||
| "format_scopes", | ||
| "get_conversation_id", | ||
| "get_delivery_mode", | ||
| "AttributeMap", | ||
| "BaseSpanWrapper", | ||
| "SimpleSpanWrapper", | ||
| "SERVICE_NAME", | ||
| "SERVICE_VERSION", | ||
| "RESOURCE", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
|
|
||
| ACTIVITY_DELIVERY_MODE = "activity.delivery_mode" | ||
| ACTIVITY_CHANNEL_ID = "activity.channel_id" | ||
| ACTIVITY_ID = "activity.id" | ||
| ACTIVITY_COUNT = "activities.count" | ||
| ACTIVITY_TYPE = "activity.type" | ||
|
|
||
| AGENTIC_USER_ID = "agentic.user_id" | ||
| AGENTIC_INSTANCE_ID = "agentic.instance_id" | ||
|
|
||
| APP_ID = "agent.app_id" | ||
|
|
||
| ATTACHMENT_ID = "activity.attachment.id" | ||
| ATTACHMENT_COUNT = "activity.attachments.count" | ||
|
|
||
| AUTH_HANDLER_ID = "auth.handler.id" | ||
| AUTH_METHOD = "auth.method" | ||
| AUTH_SCOPES = "auth.scopes" | ||
| AUTH_SUCCESS = "auth.success" | ||
|
|
||
| CONNECTION_NAME = "auth.connection.name" | ||
| CONVERSATION_ID = "activity.conversation.id" | ||
|
|
||
| HTTP_METHOD = "http.method" | ||
| HTTP_STATUS_CODE = "http.status_code" | ||
|
|
||
| IS_AGENTIC = "is_agentic_request" | ||
|
|
||
| KEY_COUNT = "storage.keys.count" | ||
|
|
||
| OPERATION = "operation" | ||
|
|
||
| ROUTE_AUTHORIZED = "route.authorized" | ||
| ROUTE_IS_INVOKE = "route.is_invoke" | ||
| ROUTE_IS_AGENTIC = "route.is_agentic" | ||
| ROUTE_MATCHED = "route.matched" | ||
|
|
||
| SERVICE_URL = "service_url" | ||
| STORAGE_OPERATION = "storage.operation" | ||
|
|
||
| TOKEN_SERVICE_ENDPOINT = "agents.token_service.endpoint" | ||
|
|
||
| USER_ID = "user.id" | ||
|
|
||
| VIEW_ID = "view.id" | ||
|
|
||
| # for missing values | ||
| UNKNOWN = "unknown" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
|
|
||
| from . import resource | ||
| from ._agents_telemetry import agents_telemetry | ||
| from .type_defs import AttributeMap, SpanCallback | ||
| from .simple_span_wrapper import SimpleSpanWrapper | ||
| from .base_span_wrapper import BaseSpanWrapper | ||
| from .resource import SERVICE_NAME, SERVICE_VERSION, RESOURCE | ||
|
|
||
| __all__ = [ | ||
| "agents_telemetry", | ||
| "resource", | ||
| "AttributeMap", | ||
| "SpanCallback", | ||
| "SimpleSpanWrapper", | ||
| "BaseSpanWrapper", | ||
| "SERVICE_NAME", | ||
| "SERVICE_VERSION", | ||
| "RESOURCE", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
|
|
||
| import time | ||
| import logging | ||
| from collections.abc import Iterator | ||
|
|
||
| from contextlib import contextmanager | ||
|
|
||
| from opentelemetry.metrics import Meter | ||
| from opentelemetry import metrics, trace | ||
| from opentelemetry.trace import Tracer, Span | ||
|
|
||
| from microsoft_agents.activity import TurnContextProtocol | ||
|
|
||
| from .resource import SERVICE_NAME, SERVICE_VERSION | ||
| from .type_defs import SpanCallback | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class _AgentsTelemetry: | ||
|
|
||
| def __init__(self): | ||
| """Initializes the AgentsTelemetry instance with the given tracer and meter, or creates new ones if not provided | ||
|
|
||
| :param tracer: Optional OpenTelemetry Tracer instance to use for creating spans. If not provided, a new tracer will be created with the service name and version from constants. | ||
| :param meter: Optional OpenTelemetry Meter instance to use for recording metrics. If not provided, a new meter will be created with the service name and version from constants. | ||
| """ | ||
| self._tracer = trace.get_tracer(SERVICE_NAME, SERVICE_VERSION) | ||
| self._meter = metrics.get_meter(SERVICE_NAME, SERVICE_VERSION) | ||
|
|
||
| @property | ||
| def tracer(self) -> Tracer: | ||
| """Returns the OpenTelemetry tracer instance for creating spans""" | ||
| return self._tracer | ||
|
|
||
| @property | ||
| def meter(self) -> Meter: | ||
| """Returns the OpenTelemetry meter instance for recording metrics""" | ||
| return self._meter | ||
|
|
||
| def _extract_attributes_from_context( | ||
| self, turn_context: TurnContextProtocol | ||
| ) -> dict: | ||
| """Helper method to extract common attributes from the TurnContext for span and metric recording""" | ||
|
|
||
| # This can be expanded to extract common attributes for spans and metrics from the context | ||
| attributes = {} | ||
| attributes["activity.type"] = turn_context.activity.type | ||
| attributes["agent.is_agentic"] = turn_context.activity.is_agentic_request() | ||
| if turn_context.activity.from_property: | ||
| attributes["from.id"] = turn_context.activity.from_property.id | ||
| if turn_context.activity.recipient: | ||
| attributes["recipient.id"] = turn_context.activity.recipient.id | ||
| if turn_context.activity.conversation: | ||
| attributes["conversation.id"] = turn_context.activity.conversation.id | ||
| attributes["channel_id"] = turn_context.activity.channel_id | ||
| attributes["message.text.length"] = ( | ||
| len(turn_context.activity.text) if turn_context.activity.text else 0 | ||
| ) | ||
| return attributes | ||
|
|
||
| @contextmanager | ||
| def start_as_current_span( | ||
| self, | ||
| span_name: str, | ||
| callback: SpanCallback | None = None, | ||
| ) -> Iterator[Span]: | ||
| """Context manager for starting a timed span that records duration and success/failure status, and invokes a callback with the results | ||
|
|
||
| :param span_name: The name of the span to start | ||
| :param callback: Optional callback function that will be called with the span, duration in milliseconds, and any exception that was raised (or None if successful) when the span is ended | ||
| :return: An iterator that yields the started span, which will be ended when the context manager exits | ||
| """ | ||
|
|
||
| with self._tracer.start_as_current_span(span_name) as span: | ||
|
|
||
| start = time.time() | ||
| exception: Exception | None = None | ||
|
|
||
| try: | ||
| yield span # execute the operation in the with block | ||
| except Exception as e: | ||
| exception = e | ||
| raise | ||
| finally: | ||
|
|
||
| success = exception is None | ||
|
|
||
| end = time.time() | ||
| duration = (end - start) * 1000 # milliseconds | ||
|
|
||
| if success: | ||
| span.add_event(f"{span_name} completed", {"duration_ms": duration}) | ||
| span.set_status(trace.Status(trace.StatusCode.OK)) | ||
| if callback: | ||
| callback(span, duration, None) | ||
| else: | ||
| if callback: | ||
| callback(span, duration, exception) | ||
|
|
||
| span.set_status(trace.Status(trace.StatusCode.ERROR)) | ||
|
|
||
|
|
||
| agents_telemetry = _AgentsTelemetry() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from abc import ABC, abstractmethod | ||
| from contextlib import ExitStack | ||
| from typing import ContextManager | ||
|
|
||
| from opentelemetry.trace import Span | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class BaseSpanWrapper(ABC): | ||
| """Wrapper around OTEL spans for SDK-specific telemetry""" | ||
|
|
||
| def __init__(self): | ||
| self._span: Span | None = None | ||
| self._active: bool = False | ||
|
|
||
| self._exit_stack = ExitStack() | ||
|
|
||
| @property | ||
| def otel_span(self) -> Span | None: | ||
| """Returns the underlying OTEL span if it is active, or None if the span has not been started or has already ended. This can be used to access OTEL-specific functionality or attributes of the span when needed, while still providing a higher-level abstraction through the BaseSpanWrapper class.""" | ||
| return self._span | ||
|
|
||
| @property | ||
| def active(self) -> bool: | ||
| """Indicates whether the BaseSpanWrapper is currently active. This can be used to prevent operations on an inactive BaseSpanWrapper, and to check the BaseSpanWrapper's lifecycle state.""" | ||
| return self._active | ||
|
|
||
| @abstractmethod | ||
| def _start_span(self) -> ContextManager[Span]: | ||
| """Abstract method that must be implemented by subclasses to define how the BaseSpanWrapper is started and what attributes are set on the BaseSpanWrapper. This method should return a context manager that yields the started BaseSpanWrapper, allowing the base BaseSpanWrapper class to manage the BaseSpanWrapper's lifecycle and ensure proper cleanup when the BaseSpanWrapper is ended.""" | ||
| raise NotImplementedError | ||
|
|
||
| @staticmethod | ||
| def _log_lifespan_error(desc: str) -> None: | ||
| """Helper method to log a warning when an operation is attempted on an inactive BaseSpanWrapper. This can be used in methods that require an active BaseSpanWrapper to indicate potential misuse of the BaseSpanWrapper lifecycle.""" | ||
| logger.warning( | ||
| "Attempting to perform an operation on an inactive BaseSpanWrapper. This may indicate a bug in the telemetry implementation or misuse of the BaseSpanWrapper lifecycle." | ||
| ) | ||
| logger.warning("Description: %s", desc) | ||
|
|
||
| # TODO -> Add Self annotation once 3.11 is the minimum supported version | ||
| def __enter__(self): | ||
| """Starts the BaseSpanWrapper and returns the BaseSpanWrapper instance for chaining. This method should check if the BaseSpanWrapper is already active and log a warning if an attempt is made to start an already active BaseSpanWrapper, to help identify potential issues with BaseSpanWrapper lifecycle management.""" | ||
| if self._active: | ||
| BaseSpanWrapper._log_lifespan_error( | ||
| "Attempting to start a BaseSpanWrapper that is already active." | ||
| ) | ||
|
|
||
| self._span = self._exit_stack.enter_context(self._start_span()) | ||
| self._active = True | ||
|
|
||
| return self | ||
|
|
||
| def start(self) -> BaseSpanWrapper: | ||
| """Starts the BaseSpanWrapper and returns the BaseSpanWrapper instance for chaining""" | ||
| return self.__enter__() | ||
|
|
||
| def __exit__(self, exc_type, exc_val, exc_tb): | ||
| """Stops the BaseSpanWrapper if it is active, and logs a warning if an attempt is made to stop a BaseSpanWrapper that is not active. This ensures that BaseSpanWrappers are properly cleaned up and that potential issues with BaseSpanWrapper lifecycle management are logged for debugging purposes.""" | ||
| if self._active: | ||
| self._exit_stack.__exit__(exc_type, exc_val, exc_tb) | ||
| self._span = None | ||
| self._active = False | ||
| else: | ||
| BaseSpanWrapper._log_lifespan_error( | ||
| "BaseSpanWrapper is not active and cannot be exited" | ||
| ) | ||
|
|
||
| def end(self) -> None: | ||
| """Stops the BaseSpanWrapper if it is active""" | ||
| self.__exit__(None, None, None) |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,18 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Licensed under the MIT License. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
| import os | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
| from opentelemetry.sdk.resources import Resource | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
| SERVICE_NAME = "microsoft_agents" | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| SERVICE_VERSION = "1.0.0" | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+5
to
+10
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
| from opentelemetry.sdk.resources import Resource | |
| SERVICE_NAME = "microsoft_agents" | |
| SERVICE_VERSION = "1.0.0" | |
| from importlib import metadata as importlib_metadata | |
| from opentelemetry.sdk.resources import Resource | |
| SERVICE_NAME = "microsoft_agents" | |
| def _get_service_version() -> str: | |
| """Return the installed package version, or a safe fallback.""" | |
| try: | |
| return importlib_metadata.version("microsoft-agents-hosting-core") | |
| except importlib_metadata.PackageNotFoundError: | |
| # Package metadata not available (e.g., editable install or non-standard env). | |
| return "unknown" | |
| except Exception: | |
| # Any other unexpected error: do not break telemetry initialization. | |
| return "unknown" | |
| SERVICE_VERSION = _get_service_version() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
|
|
||
| from abc import ABC | ||
| from collections.abc import Iterator | ||
| from contextlib import contextmanager | ||
|
|
||
| from opentelemetry.trace import Span | ||
|
|
||
| from ._agents_telemetry import agents_telemetry | ||
| from .base_span_wrapper import BaseSpanWrapper | ||
| from .type_defs import AttributeMap | ||
|
|
||
|
|
||
| class SimpleSpanWrapper(BaseSpanWrapper, ABC): | ||
| """Simple implementation of the BaseSpanWrapper that can be used when no additional attributes or functionality are needed on the span beyond what is provided by the base BaseSpanWrapper class. This can be used as a simple wrapper around an OTEL span for cases where no SDK-specific telemetry is needed, while still providing the benefits of the BaseSpanWrapper abstraction and lifecycle management.""" | ||
|
|
||
| def __init__(self, span_name: str): | ||
| super().__init__() | ||
| self._span_name = span_name | ||
|
|
||
| def _get_attributes(self) -> AttributeMap: | ||
| """Returns a dictionary of attributes to set on the span when it is started. This can be overridden by subclasses to provide custom attributes for the span based on the context in which it is being used.""" | ||
| return {} | ||
|
|
||
| def _callback(self, span: Span, duration: float, error: Exception | None) -> None: | ||
| """Callback function that is called when the span is ended. This can be overridden by subclasses to provide custom logic for recording metrics or handling errors based on the outcome of the span.""" | ||
| pass | ||
|
|
||
| @contextmanager | ||
| def _start_span(self) -> Iterator[Span]: | ||
| """Starts a basic OTEL span with the given name and no additional attributes.""" | ||
| with agents_telemetry.start_as_current_span( | ||
| self._span_name, callback=self._callback | ||
| ) as span: | ||
| yield span | ||
| if span is not None: | ||
| attributes = self._get_attributes() | ||
| if attributes: | ||
| span.set_attributes(attributes) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| from typing import Mapping, Callable | ||
|
|
||
| from opentelemetry.util.types import AttributeValue | ||
| from opentelemetry.trace import Span | ||
|
|
||
| AttributeMap = Mapping[str, AttributeValue] | ||
| SpanCallback = Callable[[Span, float, Exception | None], None] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The module header note is incomplete (
...in order to avoid) and doesn’t explain what should be avoided. Please complete or remove the comment so future readers aren’t left with a dangling design note.