lmnr-ai · dinmukhamedm · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@
 
 [project]
 name = "lmnr"
-version = "0.7.15"
+version = "0.7.16"
 description = "Python SDK for Laminar"
 authors = [
   { name = "lmnr.ai", email = "founders@lmnr.ai" }

diff --git a/src/lmnr/opentelemetry_lib/litellm/__init__.py b/src/lmnr/opentelemetry_lib/litellm/__init__.py
@@ -406,7 +406,15 @@ def _process_response_usage(self, span, usage):
                         details.get("cached_tokens"),
                     )
                 # TODO: add audio/image/text token details
-            # TODO: add completion tokens details (reasoning tokens)
+            if usage_dict.get("completion_tokens_details"):
+                details = usage_dict.get("completion_tokens_details", {})
+                details = model_as_dict(details)
+                if details.get("reasoning_tokens"):
+                    set_span_attribute(
+                        span,
+                        "gen_ai.usage.reasoning_tokens",
+                        details.get("reasoning_tokens"),
+                    )
 
         def _process_tool_calls(self, span, tool_calls, choice_index, is_response=True):
             """Process and set tool call attributes on the span"""
@@ -467,17 +475,56 @@ def _process_response_choices(self, span, choices):
                 content = message.get("content", "")
                 if content is None:
                     continue
+                reasoning_content = message.get("reasoning_content")
+                if reasoning_content:
+                    if isinstance(reasoning_content, str):
+                        reasoning_content = [
+                            {
+                                "type": "text",
+                                "text": reasoning_content,
+                            }
+                        ]
+                    elif not isinstance(reasoning_content, list):
+                        reasoning_content = [
+                            {
+                                "type": "text",
+                                "text": str(reasoning_content),
+                            }
+                        ]
+                else:
+                    reasoning_content = []
                 if isinstance(content, str):
-                    set_span_attribute(span, f"gen_ai.completion.{i}.content", content)
+                    if reasoning_content:
+                        set_span_attribute(
+                            span,
+                            f"gen_ai.completion.{i}.content",
+                            json.dumps(
+                                reasoning_content
+                                + [
+                                    {
+                                        "type": "text",
+                                        "text": content,
+                                    }
+                                ]
+                            ),
+                        )
+                    else:
+                        set_span_attribute(
+                            span,
+                            f"gen_ai.completion.{i}.content",
+                            content,
+                        )
                 elif isinstance(content, list):
                     set_span_attribute(
-                        span, f"gen_ai.completion.{i}.content", json.dumps(content)
+                        span,
+                        f"gen_ai.completion.{i}.content",
+                        json.dumps(reasoning_content + content),
                     )
                 else:
                     set_span_attribute(
                         span,
                         f"gen_ai.completion.{i}.content",
-                        json.dumps(model_as_dict(content)),
+                        json.dumps(reasoning_content + [model_as_dict(content)]),
                     )
 
         def _process_content_part(self, content_part: dict) -> dict:

diff --git a/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py b/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py
@@ -272,6 +272,16 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):
 
     if response.usage_metadata:
         usage_dict = to_dict(response.usage_metadata)
+        candidates_token_count = usage_dict.get("candidates_token_count")
+        # unlike OpenAI, and unlike input cached tokens, thinking tokens are
+        # not counted as part of candidates token count, so we need to add them
+        # separately for consistency with other instrumentations
+        thoughts_token_count = usage_dict.get("thoughts_token_count")
+        output_token_count = (
+            (candidates_token_count or 0) + (thoughts_token_count or 0)
+            if candidates_token_count is not None or thoughts_token_count is not None
+            else None
+        )
         set_span_attribute(
             span,
             gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS,
@@ -280,7 +290,7 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):
         set_span_attribute(
             span,
             gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS,
-            usage_dict.get("candidates_token_count"),
+            output_token_count,
         )
         set_span_attribute(
             span,
@@ -292,6 +302,11 @@ def _set_response_attributes(span, response: types.GenerateContentResponse):
             SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
             usage_dict.get("cached_content_token_count"),
         )
+        set_span_attribute(
+            span,
+            SpanAttributes.LLM_USAGE_REASONING_TOKENS,
+            thoughts_token_count,
+        )
 
     if should_send_prompts():
         set_span_attribute(

diff --git a/src/lmnr/version.py b/src/lmnr/version.py
@@ -3,7 +3,7 @@
 from packaging import version
 
 
-__version__ = "0.7.15"
+__version__ = "0.7.16"
 PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}"
 
 

diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens.yaml
@@ -0,0 +1,64 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
+      appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
+      [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
+      {"thinkingConfig": {"thinkingBudget": 512}}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '290'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+      x-goog-api-client:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/2VR32vCMBB+718R8iIUK5ubm/NtbHtwKBOtY7DuIdrTZrZJSa6oiP/7LtZqywKX
+        hPu++/XdwWOML4WKZSwQLB+wb/IwdjjdDtMKQSEBlYucuTB45ZbnUPsTBWHngvgIsGXZUhcKGSaQ
+        DSIVKcsChmS+b3yfXkG2JVuQQc1fvXsXFCbAUkAEw1qmxUSegzCWOHfEQZmBZVK5GmyrTcwibtGI
+        7QKM2Ue8w2vtHS//n/Z1KKNTcB1nOoa0oh8rAl9JJW0yBWG1crRZ+DHhF1SqGHbkvvGqAqfUvLBi
+        DWNAQfKKi4g8NzrLMdQbUC9OGkK6vTJZbRsN/P7pjKNGkTZDH7rtf3ntK1WVaX1NtQ3SkCKVuHeT
+        hG9fIa8Jgc22KiW8mmAcE12sE2y2eNvve2fJShU/wVhZyrWGjAQMup1esEqFTQKqDqeq3IDNtbIw
+        jB1xOgyFGP/O3x8LtYnnk/4I7OyZe0fvD6OyUt2sAgAA
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Mon, 29 Sep 2025 13:06:12 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=707
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_async.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_async.yaml
@@ -0,0 +1,56 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
+      appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
+      [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
+      {"thinkingConfig": {"thinkingBudget": 512}}}'
+    headers:
+      Content-Type:
+      - application/json
+      user-agent:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+      x-goog-api-client:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+    method: post
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"Let's count them:\\n\\nThe word is
+        \\\"strawberry\\\".\\nThe letter is 'r'.\\n\\n*   st**r**awbe**rr**y\\n\\nThe
+        letter 'r' appears **3** times in the word \\\"strawberry\\\".\"\n          }\n
+        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        25,\n    \"candidatesTokenCount\": 50,\n    \"totalTokenCount\": 265,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 25\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 190\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash-lite\",\n  \"responseId\": \"L4XaaIPGCcjtkdUPlInyiAM\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Mon, 29 Sep 2025 13:10:07 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=926
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts.yaml b/...cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts.yaml
@@ -0,0 +1,71 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
+      appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
+      [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
+      {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '315'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+      x-goog-api-client:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/4VUUW+bMBB+z6+48hIJAdrabOr6VrWbVnVrq4ZNlZY9OHAEK2Az2xFlVf/77gxJ
+        aDptkYLBd/7uu+/u/DQBCDKhcpkLhzY4gx+0A/Dkn2zTyqFyZNhu0WYjjNv79r+n0Tu5OHzkQ0EY
+        piXC3BnRLtGYDqb3U7jQG+XCcKEW6rwyclW6CCp0UwtLg2INrpQWct2qBPj0xqKBVihnwWlYK91C
+        Sf9aqA6crJG2yYsAHPlNzRRE06AwFqTyllabHBaB3ZFYBAnMZd1UCKj0ZlUm8EkaSyyuQCHmHOY3
+        Gs0AusdYYwdYYU1a2LM9qnQWqwLiA3jaIE29m20wk4XMBn4RtKXMSqAEiWkCN7qNfBIc03bWYS2c
+        zERVdWCpMLtQCas1d6S8VCsojK69aYkrqRRtMfeV3qqw7Ia3hJl5Qko3yF/Of1knq4r2EjgvI0Yy
+        SMkQrSMvecFy9AzTUnBltKLTF9QNUm18uEUgFgEvbb8s+wVJ3CThYBHsn76QBglHKM3RGO/IY1Md
+        11zClrhcIzaUBeETxqBgf4p7wuRHcIemwMz1aujIa8Tp73RiHTLuLy5jaZDSUtYJlVEMXRx0ykCr
+        0Aa9CMTOtmSS4xoflJYnQhDmq6ajCjpRcZCTvi2JZBAdjEXJ7caT4cwGR7bn6P+z9MWPiE+Og9dn
+        LIKNXRyGJgxjEbfxMsbhq3927JL+fTrC8CQMhwH6x6QEY5a79597voHRFTK/WudYbd13CQWFVNKW
+        9yisVuw2T2/vdroEUuX4SNtvJtsAHjrYWLHCr+gEXUxid/0EDTV+41K9RuVvEbIcv+vBRvfYC/vs
+        w2D39Tk4ehq9wrWXFFVW4wtudPdRkqKSruNM0o8P6ajCFOAF9laJyUiwbQe8pPj2dDYZJOtV/I7G
+        yl6uFdYkYHycvIuLStgypujoowYGbaOVxaucHe9nqRDXl/L8/cOvdf7tztx8vp2d62DyPPkDFZMx
+        XucFAAA=
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Mon, 29 Sep 2025 13:06:15 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=2264
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...tes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts_async.yaml b/...tes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts_async.yaml
@@ -0,0 +1,69 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r''
+      appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts":
+      [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig":
+      {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}'
+    headers:
+      Content-Type:
+      - application/json
+      user-agent:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+      x-goog-api-client:
+      - google-genai-sdk/1.34.0 gl-python/3.13.5
+    method: post
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"**Breaking Down the Letter Count in
+        \\\"Strawberry\\\"**\\n\\nOkay, so the challenge is clear: figure out how
+        many times the letter 'r' appears in the word \\\"strawberry\\\".  That's
+        straightforward enough. Let's break it down methodically.\\n\\nFirst, I need
+        to isolate the target: \\\"strawberry\\\" is the word in question, and 'r'
+        is the character we're focusing on.  Now, the simplest approach is just a
+        direct scan.  I'll go through the word, character by character, and increment
+        a counter every time I hit an 'r'.\\n\\n*   s... nope.\\n*   t... not there.\\n*
+        \  **r** ... one!\\n*   a... keep moving.\\n*   w... no 'r' here.\\n*   b...
+        still searching.\\n*   e... almost there...\\n*   **r** ... two!\\n*   **r**
+        ... three!\\n*   y... and we're done.\\n\\nAlright, that's it. It's a quick,
+        manual count, but effective.  The final, definitive answer is: The letter
+        'r' appears 3 times in the word \\\"strawberry\\\".  Easy peasy.\\n\",\n            \"thought\":
+        true\n          },\n          {\n            \"text\": \"Let's count them:\\n\\ns
+        - t - **r** - a - w - b - e - **r** - **r** - y\\n\\nThe letter 'r' appears
+        **3** times in the word \\\"strawberry\\\".\"\n          }\n        ],\n        \"role\":
+        \"model\"\n      },\n      \"finishReason\": \"STOP\",\n      \"index\": 0\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 25,\n    \"candidatesTokenCount\":
+        49,\n    \"totalTokenCount\": 246,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 25\n      }\n    ],\n
+        \   \"thoughtsTokenCount\": 172\n  },\n  \"modelVersion\": \"gemini-2.5-flash-lite\",\n
+        \ \"responseId\": \"MoXaaLGnAaiE7M8P3q6RsQM\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Mon, 29 Sep 2025 13:10:10 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=2777
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1