Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

[project]
name = "lmnr"
version = "0.7.15"
version = "0.7.16"
description = "Python SDK for Laminar"
authors = [
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
Expand Down
55 changes: 51 additions & 4 deletions src/lmnr/opentelemetry_lib/litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,15 @@ def _process_response_usage(self, span, usage):
details.get("cached_tokens"),
)
# TODO: add audio/image/text token details
# TODO: add completion tokens details (reasoning tokens)
if usage_dict.get("completion_tokens_details"):
details = usage_dict.get("completion_tokens_details", {})
details = model_as_dict(details)
if details.get("reasoning_tokens"):
set_span_attribute(
span,
"gen_ai.usage.reasoning_tokens",
details.get("reasoning_tokens"),
)

def _process_tool_calls(self, span, tool_calls, choice_index, is_response=True):
"""Process and set tool call attributes on the span"""
Expand Down Expand Up @@ -467,17 +475,56 @@ def _process_response_choices(self, span, choices):
content = message.get("content", "")
if content is None:
continue
reasoning_content = message.get("reasoning_content")
if reasoning_content:
if isinstance(reasoning_content, str):
reasoning_content = [
{
"type": "text",
"text": reasoning_content,
}
]
elif not isinstance(reasoning_content, list):
reasoning_content = [
{
"type": "text",
"text": str(reasoning_content),
}
]
else:
reasoning_content = []
if isinstance(content, str):
set_span_attribute(span, f"gen_ai.completion.{i}.content", content)
if reasoning_content:
set_span_attribute(
span,
f"gen_ai.completion.{i}.content",
json.dumps(
reasoning_content
+ [
{
"type": "text",
"text": content,
}
]
),
)
else:
set_span_attribute(
span,
f"gen_ai.completion.{i}.content",
content,
)
elif isinstance(content, list):
set_span_attribute(
span, f"gen_ai.completion.{i}.content", json.dumps(content)
span,
f"gen_ai.completion.{i}.content",
json.dumps(reasoning_content + content),
)
else:
set_span_attribute(
span,
f"gen_ai.completion.{i}.content",
json.dumps(model_as_dict(content)),
json.dumps(reasoning_content + [model_as_dict(content)]),
)

def _process_content_part(self, content_part: dict) -> dict:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,16 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):

if response.usage_metadata:
usage_dict = to_dict(response.usage_metadata)
candidates_token_count = usage_dict.get("candidates_token_count")
# unlike OpenAI, and unlike input cached tokens, thinking tokens are
# not counted as part of candidates token count, so we need to add them
# separately for consistency with other instrumentations
thoughts_token_count = usage_dict.get("thoughts_token_count")
output_token_count = (
(candidates_token_count or 0) + (thoughts_token_count or 0)
if candidates_token_count is not None or thoughts_token_count is not None
else None
)
set_span_attribute(
span,
gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS,
Expand All @@ -280,7 +290,7 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):
set_span_attribute(
span,
gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS,
usage_dict.get("candidates_token_count"),
output_token_count,
)
set_span_attribute(
span,
Expand All @@ -292,6 +302,11 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):
SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
usage_dict.get("cached_content_token_count"),
)
set_span_attribute(
span,
SpanAttributes.LLM_USAGE_REASONING_TOKENS,
thoughts_token_count,
)

if should_send_prompts():
set_span_attribute(
Expand Down
2 changes: 1 addition & 1 deletion src/lmnr/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from packaging import version


__version__ = "0.7.15"
__version__ = "0.7.16"
PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}"


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
[{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
{"thinkingConfig": {"thinkingBudget": 512}}}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '290'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
user-agent:
- google-genai-sdk/1.34.0 gl-python/3.13.5
x-goog-api-client:
- google-genai-sdk/1.34.0 gl-python/3.13.5
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
response:
body:
string: !!binary |
H4sIAAAAAAAC/2VR32vCMBB+718R8iIUK5ubm/NtbHtwKBOtY7DuIdrTZrZJSa6oiP/7LtZqywKX
hPu++/XdwWOML4WKZSwQLB+wb/IwdjjdDtMKQSEBlYucuTB45ZbnUPsTBWHngvgIsGXZUhcKGSaQ
DSIVKcsChmS+b3yfXkG2JVuQQc1fvXsXFCbAUkAEw1qmxUSegzCWOHfEQZmBZVK5GmyrTcwibtGI
7QKM2Ue8w2vtHS//n/Z1KKNTcB1nOoa0oh8rAl9JJW0yBWG1crRZ+DHhF1SqGHbkvvGqAqfUvLBi
DWNAQfKKi4g8NzrLMdQbUC9OGkK6vTJZbRsN/P7pjKNGkTZDH7rtf3ntK1WVaX1NtQ3SkCKVuHeT
hG9fIa8Jgc22KiW8mmAcE12sE2y2eNvve2fJShU/wVhZyrWGjAQMup1esEqFTQKqDqeq3IDNtbIw
jB1xOgyFGP/O3x8LtYnnk/4I7OyZe0fvD6OyUt2sAgAA
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Mon, 29 Sep 2025 13:06:12 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=707
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
[{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
{"thinkingConfig": {"thinkingBudget": 512}}}'
headers:
Content-Type:
- application/json
user-agent:
- google-genai-sdk/1.34.0 gl-python/3.13.5
x-goog-api-client:
- google-genai-sdk/1.34.0 gl-python/3.13.5
method: post
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"Let's count them:\\n\\nThe word is
\\\"strawberry\\\".\\nThe letter is 'r'.\\n\\n* st**r**awbe**rr**y\\n\\nThe
letter 'r' appears **3** times in the word \\\"strawberry\\\".\"\n }\n
\ ],\n \"role\": \"model\"\n },\n \"finishReason\":
\"STOP\",\n \"index\": 0\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
25,\n \"candidatesTokenCount\": 50,\n \"totalTokenCount\": 265,\n \"promptTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 25\n
\ }\n ],\n \"thoughtsTokenCount\": 190\n },\n \"modelVersion\":
\"gemini-2.5-flash-lite\",\n \"responseId\": \"L4XaaIPGCcjtkdUPlInyiAM\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Mon, 29 Sep 2025 13:10:07 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=926
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
[{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
{"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '315'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
user-agent:
- google-genai-sdk/1.34.0 gl-python/3.13.5
x-goog-api-client:
- google-genai-sdk/1.34.0 gl-python/3.13.5
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
response:
body:
string: !!binary |
H4sIAAAAAAAC/4VUUW+bMBB+z6+48hIJAdrabOr6VrWbVnVrq4ZNlZY9OHAEK2Az2xFlVf/77gxJ
aDptkYLBd/7uu+/u/DQBCDKhcpkLhzY4gx+0A/Dkn2zTyqFyZNhu0WYjjNv79r+n0Tu5OHzkQ0EY
piXC3BnRLtGYDqb3U7jQG+XCcKEW6rwyclW6CCp0UwtLg2INrpQWct2qBPj0xqKBVihnwWlYK91C
Sf9aqA6crJG2yYsAHPlNzRRE06AwFqTyllabHBaB3ZFYBAnMZd1UCKj0ZlUm8EkaSyyuQCHmHOY3
Gs0AusdYYwdYYU1a2LM9qnQWqwLiA3jaIE29m20wk4XMBn4RtKXMSqAEiWkCN7qNfBIc03bWYS2c
zERVdWCpMLtQCas1d6S8VCsojK69aYkrqRRtMfeV3qqw7Ia3hJl5Qko3yF/Of1knq4r2EjgvI0Yy
SMkQrSMvecFy9AzTUnBltKLTF9QNUm18uEUgFgEvbb8s+wVJ3CThYBHsn76QBglHKM3RGO/IY1Md
11zClrhcIzaUBeETxqBgf4p7wuRHcIemwMz1aujIa8Tp73RiHTLuLy5jaZDSUtYJlVEMXRx0ykCr
0Aa9CMTOtmSS4xoflJYnQhDmq6ajCjpRcZCTvi2JZBAdjEXJ7caT4cwGR7bn6P+z9MWPiE+Og9dn
LIKNXRyGJgxjEbfxMsbhq3927JL+fTrC8CQMhwH6x6QEY5a79597voHRFTK/WudYbd13CQWFVNKW
9yisVuw2T2/vdroEUuX4SNtvJtsAHjrYWLHCr+gEXUxid/0EDTV+41K9RuVvEbIcv+vBRvfYC/vs
w2D39Tk4ehq9wrWXFFVW4wtudPdRkqKSruNM0o8P6ajCFOAF9laJyUiwbQe8pPj2dDYZJOtV/I7G
yl6uFdYkYHycvIuLStgypujoowYGbaOVxaucHe9nqRDXl/L8/cOvdf7tztx8vp2d62DyPPkDFZMx
XucFAAA=
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Mon, 29 Sep 2025 13:06:15 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=2264
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
[{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
{"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}'
headers:
Content-Type:
- application/json
user-agent:
- google-genai-sdk/1.34.0 gl-python/3.13.5
x-goog-api-client:
- google-genai-sdk/1.34.0 gl-python/3.13.5
method: post
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"**Breaking Down the Letter Count in
\\\"Strawberry\\\"**\\n\\nOkay, so the challenge is clear: figure out how
many times the letter 'r' appears in the word \\\"strawberry\\\". That's
straightforward enough. Let's break it down methodically.\\n\\nFirst, I need
to isolate the target: \\\"strawberry\\\" is the word in question, and 'r'
is the character we're focusing on. Now, the simplest approach is just a
direct scan. I'll go through the word, character by character, and increment
a counter every time I hit an 'r'.\\n\\n* s... nope.\\n* t... not there.\\n*
\ **r** ... one!\\n* a... keep moving.\\n* w... no 'r' here.\\n* b...
still searching.\\n* e... almost there...\\n* **r** ... two!\\n* **r**
... three!\\n* y... and we're done.\\n\\nAlright, that's it. It's a quick,
manual count, but effective. The final, definitive answer is: The letter
'r' appears 3 times in the word \\\"strawberry\\\". Easy peasy.\\n\",\n \"thought\":
true\n },\n {\n \"text\": \"Let's count them:\\n\\ns
- t - **r** - a - w - b - e - **r** - **r** - y\\n\\nThe letter 'r' appears
**3** times in the word \\\"strawberry\\\".\"\n }\n ],\n \"role\":
\"model\"\n },\n \"finishReason\": \"STOP\",\n \"index\": 0\n
\ }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": 25,\n \"candidatesTokenCount\":
49,\n \"totalTokenCount\": 246,\n \"promptTokensDetails\": [\n {\n
\ \"modality\": \"TEXT\",\n \"tokenCount\": 25\n }\n ],\n
\ \"thoughtsTokenCount\": 172\n },\n \"modelVersion\": \"gemini-2.5-flash-lite\",\n
\ \"responseId\": \"MoXaaLGnAaiE7M8P3q6RsQM\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json; charset=UTF-8
Date:
- Mon, 29 Sep 2025 13:10:10 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=2777
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1
Loading