Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
openai_attributes as OpenAIAttributes,
)
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.trace.propagation import set_span_in_context
Expand Down Expand Up @@ -141,6 +142,9 @@ def _set_request_attributes(span, kwargs, instance=None):
_set_span_attribute(
span, SpanAttributes.LLM_IS_STREAMING, kwargs.get("stream") or False
)
_set_span_attribute(
span, OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER, kwargs.get("service_tier")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should guard against None

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nirga Are you sure? It's already guarded in the setter:

def _set_span_attribute(span, name, value):
    if value is None or value == "":
        return

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You’re right! We don’t have it in all instrumentations unfortunately so I forgot about that here. Thanks!

)
if response_format := kwargs.get("response_format"):
# backward-compatible check for
# openai.types.shared_params.response_format_json_schema.ResponseFormatJSONSchema
Expand Down Expand Up @@ -210,6 +214,11 @@ def _set_response_attributes(span, response):
SpanAttributes.LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
response.get("system_fingerprint"),
)
_set_span_attribute(
span,
OpenAIAttributes.OPENAI_RESPONSE_SERVICE_TIER,
response.get("service_tier"),
)
_log_prompt_filter(span, response)
usage = response.get("usage")
if not usage:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
openai_attributes as OpenAIAttributes,
)
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
Expand Down Expand Up @@ -132,6 +133,10 @@ class TracedData(pydantic.BaseModel):
request_reasoning_effort: Optional[str] = pydantic.Field(default=None)
response_reasoning_effort: Optional[str] = pydantic.Field(default=None)

# OpenAI service tier
request_service_tier: Optional[str] = pydantic.Field(default=None)
response_service_tier: Optional[str] = pydantic.Field(default=None)


responses: dict[str, TracedData] = {}

Expand Down Expand Up @@ -189,6 +194,8 @@ def set_data_attributes(traced_response: TracedData, span: Span):
_set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MODEL, traced_response.request_model)
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, traced_response.response_id)
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, traced_response.response_model)
_set_span_attribute(span, OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER, traced_response.request_service_tier)
_set_span_attribute(span, OpenAIAttributes.OPENAI_RESPONSE_SERVICE_TIER, traced_response.response_service_tier)
if usage := traced_response.usage:
_set_span_attribute(span, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens)
_set_span_attribute(span, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens)
Expand Down Expand Up @@ -483,6 +490,8 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
request_service_tier=kwargs.get("service_tier"),
response_service_tier=existing_data.get("response_service_tier"),
)
except Exception:
traced_data = None
Expand Down Expand Up @@ -546,6 +555,8 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
request_service_tier=existing_data.get("request_service_tier", kwargs.get("service_tier")),
response_service_tier=existing_data.get("response_service_tier", parsed_response.service_tier),
)
responses[parsed_response.id] = traced_data
except Exception:
Expand Down Expand Up @@ -621,6 +632,8 @@ async def async_responses_get_or_create_wrapper(
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
request_service_tier=kwargs.get("service_tier"),
response_service_tier=existing_data.get("response_service_tier"),
)
except Exception:
traced_data = None
Expand Down Expand Up @@ -685,6 +698,8 @@ async def async_responses_get_or_create_wrapper(
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
request_service_tier=existing_data.get("request_service_tier", kwargs.get("service_tier")),
response_service_tier=existing_data.get("response_service_tier", parsed_response.service_tier),
)
responses[parsed_response.id] = traced_data
except Exception:
Expand Down Expand Up @@ -791,6 +806,8 @@ def __init__(
),
request_reasoning_effort=self._request_kwargs.get("reasoning", {}).get("effort"),
response_reasoning_effort=None,
request_service_tier=self._request_kwargs.get("service_tier"),
response_service_tier=None,
)

self._complete_response_data = None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Say hello"}], "model": "gpt-5",
"service_tier": "priority"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '102'
content-type:
- application/json
host:
- api.openai.com
traceparent:
- 00-9abdb187b56105d7f694cb1bcd7d6ff7-ff31170765953060-01
user-agent:
- OpenAI/Python 1.99.7
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.99.7
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.9.6
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA3RSQW7bMBC86xVbnq1Cduok9iVIU6AuCgQucmsQCCy5tthSXJZcNVUC/70g7Vgy
0lx02NkZzQz3uQAQRoslCNVIVq235c1m/vh5vVp//B2fbtSnVf9tzYu72+u7p+9fb8UkMejHT1T8
wnqvqPUW2ZDbwyqgZEyq04vzs8W0OpvOMtCSRptoW8/lvJxVs3lZXZbVxYHXkFEYxRLuCwCA5/xN
Dp3Gv2IJ1eRl0mKMcotieVwCEIFsmggZo4ksHYvJACpyjC6bXqG19A5W9AhKOvgCDVoPPXXApGV/
NaYF3HRRJtOus3YESOeIZQqdDT8ckN3R4sY4E5s6oIzk0m8jkxcZ3RUADzlyd5JC+ECt55rpF2bZ
y72aGBoeYbMDyMTSDvPFoaVTsVojS2PjqDGhpGpQD8yhXtlpQyOgGEV7beZ/2vvYxm0HlfMPb+oP
gFLoGXXtA2qjThMPawHTAb61duw4OxYRwx+jsGaDIb2DD4aC4X5/dLGPjG29MW6LwQeTTyS9dbEr
/gEAAP//AwB9nhCJHwMAAA==
headers:
CF-RAY:
- 9a319038ca178687-ARN
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Sun, 23 Nov 2025 15:05:13 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=BzRM2cFojUtXKFo1i0gAAj1_aXPTRwMVXEDci5ux0eY-1763910313-1.0.1.1-Cq0yyUFvMCbE6GRA747glZtqTPRyXEkAE44E2L5S_TwRwNKZ.r26uFYeXvwQbhTSqDZ2KeyvsS693q_z9Dd9aWBj.j0eUJcBE5nNJ.pIhOg;
path=/; expires=Sun, 23-Nov-25 15:35:13 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=BoDvu6D4M8n.b5OoTqVk4rwbKfyeViFSKx_hJR4S4W0-1763910313444-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- agentpaid
openai-processing-ms:
- '968'
openai-project:
- proj_g54CtVh9wETqz1duiA1cw9Kw
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '1163'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '500'
x-ratelimit-limit-tokens:
- '500000'
x-ratelimit-remaining-requests:
- '499'
x-ratelimit-remaining-tokens:
- '499995'
x-ratelimit-reset-requests:
- 120ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_b623980c393049c6b6d99da2cc70576a
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
interactions:
- request:
body: '{"input": "Say hello", "model": "gpt-5", "service_tier": "priority"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '68'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.99.7
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.99.7
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.9.6
method: POST
uri: https://api.openai.com/v1/responses
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//fFTLctswDLznK1ie444efki+9Jp+QyajgSjIYUMJHBJyo+n43zuibD0a
uzcJCywXWJB/noSQupJHIR16W0TbAyRJtIV9lMT17hBF+zxJk6RMsnqnsjjf51msMpWmZRKXCtOd
fB4oqPyFim801Hoc48ohMFYFDFh82Kd5HKVJEjDPwJ0fahQ11iBjNRaVoD5Ojrp20FWD8TiGtTG6
Pcmj+PMkhBDSQo9uqK/wjIYsOvkkxCUko3M0YG1nTAjo9nZKUSGDNn6NenadYk3tKt7AZ0Ed244L
pg/8CjKRKRSYNV1DFZpB2cnyZrdJomS3ibJNdLhOKzDKo3gNjYztzEb4xzakW0i2wYa6yjDKs+qA
e8gO47gDCfcWRyPAUzsMbIJ81zTg+uHgtxC7PN8T0PjTfxRkOYRFgBTzapdXaR1vY5UlXxU06D2c
FtIeOR5ARS1jO09lKWxFe/MDP3mqDgnQtsRw8/D1bQUaOllH5R0kEB2FfEFj6Jt4od9CQSt+inc0
VvTUCaYK+h9yKrpcvyYe6cgEbeC99gwtj8lDYkiSFhwYg2a9MOy6cbetw7Omzhe361MEK6aFso4a
y4UC9Y7FB/YPMYfDEDW1y4x5Faa7g3VNjkeXKt0110kuVmSonq6Thxq5L3Q1kNcaV1fLoztrhQWP
cWmdJqe5l9drTg6XvTI2Fh1wF8Lx9+gaDS5c5dXkGpj/F+6HvOXyyjO6kvxw3tzNJHwc9ztpNfrT
MckJmJdBMtlisSLRFLRLja5rFVynKyvtoTS3R6sLqz41oNvVm5E9fw0v3qGpy2BiNddFq07/fYni
3T3gHu+0AY+omRjMDCbpNMHOr91ukKEChoH+8nT5CwAA//8DAD1YGBRCBgAA
headers:
CF-RAY:
- 9a3190799a8282d6-ARN
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Sun, 23 Nov 2025 15:05:24 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=5epdftKChRBshn5d9QnvOUIH6jdbbC2Z49fpF_kViyo-1763910324-1.0.1.1-cHXuCjdspu_Vvx.3Ax1PpY9WHMivIEjCSU1BsYYaHZ_cEDxkd62QXyB39dzmzPGQ9zjTm8MVBplGzsr60dhv0gBrBaj9EQkh58nrbutTgVo;
path=/; expires=Sun, 23-Nov-25 15:35:24 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=gp4oUQF19kcdJq_p9DCnU9cTap7yKDGhmqIT54evW5M-1763910324011-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- agentpaid
openai-processing-ms:
- '1395'
openai-project:
- proj_g54CtVh9wETqz1duiA1cw9Kw
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '1398'
x-ratelimit-limit-requests:
- '500'
x-ratelimit-limit-tokens:
- '500000'
x-ratelimit-remaining-requests:
- '499'
x-ratelimit-remaining-tokens:
- '500000'
x-ratelimit-reset-requests:
- 120ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_c7a7a91635ad4051892623b7a91f919f
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,27 @@ def test_chat_reasoning(instrument_legacy, span_exporter,
assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0


@pytest.mark.vcr
def test_chat_with_service_tier(instrument_legacy, span_exporter, openai_client):
openai_client.chat.completions.create(
model="gpt-5",
messages=[
{
"role": "user",
"content": "Say hello"
}
],
service_tier="priority",
)

spans = span_exporter.get_finished_spans()
assert len(spans) >= 1
span = spans[-1]

assert span.attributes["openai.request.service_tier"] == "priority"
assert span.attributes["openai.response.service_tier"] == "priority"


def test_chat_exception(instrument_legacy, span_exporter, openai_client):
openai_client.api_key = "invalid"
with pytest.raises(Exception):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@ def test_responses(
# assert span.attributes["gen_ai.prompt.0.role"] == "user"


@pytest.mark.vcr
def test_responses_with_service_tier(
instrument_legacy, span_exporter: InMemorySpanExporter, openai_client: OpenAI
):
_ = openai_client.responses.create(
model="gpt-5",
input="Say hello",
service_tier="priority",
)
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
assert span.name == "openai.response"
assert span.attributes["openai.request.service_tier"] == "priority"
assert span.attributes["openai.response.service_tier"] == "priority"


@pytest.mark.vcr
def test_responses_with_input_history(
instrument_legacy, span_exporter: InMemorySpanExporter, openai_client: OpenAI
Expand Down