Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions helm/kagent/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,12 @@ otel:
otlp:
endpoint: ""
protocol: "grpc"
timeout: 15
timeout: 15000 # milliseconds
insecure: true
logging:
enabled: false
exporter:
otlp:
endpoint: ""
timeout: 15
timeout: 15000 # milliseconds
insecure: true
43 changes: 39 additions & 4 deletions python/packages/kagent-core/src/kagent/core/tracing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,39 @@
from ._span_processor import KagentAttributesSpanProcessor


def _resolve_otlp_timeout_seconds(signal: str) -> float:
"""
Resolve OTLP timeout env vars (milliseconds) into seconds for exporters.
By default, Python OTLP exporter reads timeout env var as seconds.
However, OTEL spec defines timeout as milliseconds.
"""
signal_timeout_env = f"OTEL_EXPORTER_OTLP_{signal}_TIMEOUT"
raw_timeout = os.getenv(signal_timeout_env) or os.getenv("OTEL_EXPORTER_OTLP_TIMEOUT")
if raw_timeout is None:
# OTEL spec default is 10000ms
return 10.0

try:
timeout_millis = float(raw_timeout)
except ValueError:
logging.warning(
"Invalid OTEL timeout value %r from %s; falling back to 10000ms",
raw_timeout,
signal_timeout_env,
)
return 10.0

if timeout_millis < 0:
logging.warning(
"Negative OTEL timeout value %r from %s; falling back to 10000ms",
raw_timeout,
signal_timeout_env,
)
return 10.0

return timeout_millis / 1000.0


def _instrument_anthropic(event_logger_provider=None):
"""Instrument Anthropic SDK if available."""
try:
Expand Down Expand Up @@ -69,11 +102,12 @@ def configure(name: str = "kagent", namespace: str = "kagent", fastapi_app: Fast
or os.getenv("OTEL_TRACING_EXPORTER_OTLP_ENDPOINT") # Backward compatibility
or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
)
trace_timeout_seconds = _resolve_otlp_timeout_seconds("TRACES")
logging.info("Trace endpoint: %s", trace_endpoint or "<default>")
if trace_endpoint:
processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=trace_endpoint))
processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=trace_endpoint, timeout=trace_timeout_seconds))
else:
processor = BatchSpanProcessor(OTLPSpanExporter())
processor = BatchSpanProcessor(OTLPSpanExporter(timeout=trace_timeout_seconds))

# Check if a TracerProvider already exists (e.g., set by CrewAI)
current_provider = trace.get_tracer_provider()
Expand Down Expand Up @@ -107,13 +141,14 @@ def configure(name: str = "kagent", namespace: str = "kagent", fastapi_app: Fast
or os.getenv("OTEL_LOGGING_EXPORTER_OTLP_ENDPOINT") # Backward compatibility
or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
)
log_timeout_seconds = _resolve_otlp_timeout_seconds("LOGS")
logging.info("Log endpoint: %s", log_endpoint or "<default>")

# Add OTLP exporter
if log_endpoint:
log_processor = BatchLogRecordProcessor(OTLPLogExporter(endpoint=log_endpoint))
log_processor = BatchLogRecordProcessor(OTLPLogExporter(endpoint=log_endpoint, timeout=log_timeout_seconds))
else:
log_processor = BatchLogRecordProcessor(OTLPLogExporter())
log_processor = BatchLogRecordProcessor(OTLPLogExporter(timeout=log_timeout_seconds))
logger_provider.add_log_record_processor(log_processor)

_logs.set_logger_provider(logger_provider)
Expand Down
26 changes: 26 additions & 0 deletions python/packages/kagent-core/tests/test_tracing_configure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from types import SimpleNamespace

import pytest
from opentelemetry.propagate import get_global_textmap
from opentelemetry.trace import get_current_span

Expand Down Expand Up @@ -94,3 +95,28 @@ def test_otel_sdk_default_propagator_includes_w3c_tracecontext():

ctx = get_global_textmap().extract(carrier)
assert get_current_span(ctx).get_span_context().trace_id == trace_id


@pytest.mark.parametrize(
("signal", "env", "expected"),
[
("TRACES", {}, 10.0),
("TRACES", {"OTEL_EXPORTER_OTLP_TIMEOUT": "500"}, 0.5),
("TRACES", {"OTEL_EXPORTER_OTLP_TRACES_TIMEOUT": "250"}, 0.25),
(
"LOGS",
{
"OTEL_EXPORTER_OTLP_TIMEOUT": "500",
"OTEL_EXPORTER_OTLP_LOGS_TIMEOUT": "750",
},
0.75,
),
],
)
def test_resolve_otlp_timeout_seconds_uses_milliseconds(monkeypatch, signal, env, expected):
for key in ("OTEL_EXPORTER_OTLP_TIMEOUT", "OTEL_EXPORTER_OTLP_TRACES_TIMEOUT", "OTEL_EXPORTER_OTLP_LOGS_TIMEOUT"):
monkeypatch.delenv(key, raising=False)
for key, value in env.items():
monkeypatch.setenv(key, value)

assert _utils._resolve_otlp_timeout_seconds(signal) == expected
Loading