diff --git a/pyproject.toml b/pyproject.toml index 8f0577ec..db3b30ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ [project] name = "lmnr" -version = "0.7.15" +version = "0.7.16" description = "Python SDK for Laminar" authors = [ { name = "lmnr.ai", email = "founders@lmnr.ai" } diff --git a/src/lmnr/opentelemetry_lib/litellm/__init__.py b/src/lmnr/opentelemetry_lib/litellm/__init__.py index 8e1f7d91..9973bea4 100644 --- a/src/lmnr/opentelemetry_lib/litellm/__init__.py +++ b/src/lmnr/opentelemetry_lib/litellm/__init__.py @@ -406,7 +406,15 @@ def _process_response_usage(self, span, usage): details.get("cached_tokens"), ) # TODO: add audio/image/text token details - # TODO: add completion tokens details (reasoning tokens) + if usage_dict.get("completion_tokens_details"): + details = usage_dict.get("completion_tokens_details", {}) + details = model_as_dict(details) + if details.get("reasoning_tokens"): + set_span_attribute( + span, + "gen_ai.usage.reasoning_tokens", + details.get("reasoning_tokens"), + ) def _process_tool_calls(self, span, tool_calls, choice_index, is_response=True): """Process and set tool call attributes on the span""" @@ -467,17 +475,56 @@ def _process_response_choices(self, span, choices): content = message.get("content", "") if content is None: continue + reasoning_content = message.get("reasoning_content") + if reasoning_content: + if isinstance(reasoning_content, str): + reasoning_content = [ + { + "type": "text", + "text": reasoning_content, + } + ] + elif not isinstance(reasoning_content, list): + reasoning_content = [ + { + "type": "text", + "text": str(reasoning_content), + } + ] + else: + reasoning_content = [] if isinstance(content, str): - set_span_attribute(span, f"gen_ai.completion.{i}.content", content) + if reasoning_content: + set_span_attribute( + span, + f"gen_ai.completion.{i}.content", + json.dumps( + reasoning_content + + [ + { + "type": "text", + "text": content, + } + ] + ), + ) + else: + set_span_attribute( + span, + f"gen_ai.completion.{i}.content", + content, + ) elif isinstance(content, list): set_span_attribute( - span, f"gen_ai.completion.{i}.content", json.dumps(content) + span, + f"gen_ai.completion.{i}.content", + json.dumps(reasoning_content + content), ) else: set_span_attribute( span, f"gen_ai.completion.{i}.content", - json.dumps(model_as_dict(content)), + json.dumps(reasoning_content + [model_as_dict(content)]), ) def _process_content_part(self, content_part: dict) -> dict: diff --git a/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py b/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py index 5e04507d..2cf6128c 100644 --- a/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +++ b/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py @@ -272,6 +272,16 @@ def _set_response_attributes(span, response: types.GenerateContentResponse): if response.usage_metadata: usage_dict = to_dict(response.usage_metadata) + candidates_token_count = usage_dict.get("candidates_token_count") + # unlike OpenAI, and unlike input cached tokens, thinking tokens are + # not counted as part of candidates token count, so we need to add them + # separately for consistency with other instrumentations + thoughts_token_count = usage_dict.get("thoughts_token_count") + output_token_count = ( + (candidates_token_count or 0) + (thoughts_token_count or 0) + if candidates_token_count is not None or thoughts_token_count is not None + else None + ) set_span_attribute( span, gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, @@ -280,7 +290,7 @@ def _set_response_attributes(span, response: types.GenerateContentResponse): set_span_attribute( span, gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, - usage_dict.get("candidates_token_count"), + output_token_count, ) set_span_attribute( span, @@ -292,6 +302,11 @@ def _set_response_attributes(span, response: types.GenerateContentResponse): SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, usage_dict.get("cached_content_token_count"), ) + set_span_attribute( + span, + SpanAttributes.LLM_USAGE_REASONING_TOKENS, + thoughts_token_count, + ) if should_send_prompts(): set_span_attribute( diff --git a/src/lmnr/version.py b/src/lmnr/version.py index 61b8262e..2b560455 100644 --- a/src/lmnr/version.py +++ b/src/lmnr/version.py @@ -3,7 +3,7 @@ from packaging import version -__version__ = "0.7.15" +__version__ = "0.7.16" PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens.yaml new file mode 100644 index 00000000..45f7cf53 --- /dev/null +++ b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens.yaml @@ -0,0 +1,64 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r'' + appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts": + [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig": + {"thinkingConfig": {"thinkingBudget": 512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '290' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + x-goog-api-client: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/2VR32vCMBB+718R8iIUK5ubm/NtbHtwKBOtY7DuIdrTZrZJSa6oiP/7LtZqywKX + hPu++/XdwWOML4WKZSwQLB+wb/IwdjjdDtMKQSEBlYucuTB45ZbnUPsTBWHngvgIsGXZUhcKGSaQ + DSIVKcsChmS+b3yfXkG2JVuQQc1fvXsXFCbAUkAEw1qmxUSegzCWOHfEQZmBZVK5GmyrTcwibtGI + 7QKM2Ue8w2vtHS//n/Z1KKNTcB1nOoa0oh8rAl9JJW0yBWG1crRZ+DHhF1SqGHbkvvGqAqfUvLBi + DWNAQfKKi4g8NzrLMdQbUC9OGkK6vTJZbRsN/P7pjKNGkTZDH7rtf3ntK1WVaX1NtQ3SkCKVuHeT + hG9fIa8Jgc22KiW8mmAcE12sE2y2eNvve2fJShU/wVhZyrWGjAQMup1esEqFTQKqDqeq3IDNtbIw + jB1xOgyFGP/O3x8LtYnnk/4I7OyZe0fvD6OyUt2sAgAA + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 13:06:12 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=707 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_async.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_async.yaml new file mode 100644 index 00000000..83b7df53 --- /dev/null +++ b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_async.yaml @@ -0,0 +1,56 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r'' + appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts": + [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig": + {"thinkingConfig": {"thinkingBudget": 512}}}' + headers: + Content-Type: + - application/json + user-agent: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + x-goog-api-client: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"Let's count them:\\n\\nThe word is + \\\"strawberry\\\".\\nThe letter is 'r'.\\n\\n* st**r**awbe**rr**y\\n\\nThe + letter 'r' appears **3** times in the word \\\"strawberry\\\".\"\n }\n + \ ],\n \"role\": \"model\"\n },\n \"finishReason\": + \"STOP\",\n \"index\": 0\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": + 25,\n \"candidatesTokenCount\": 50,\n \"totalTokenCount\": 265,\n \"promptTokensDetails\": + [\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 25\n + \ }\n ],\n \"thoughtsTokenCount\": 190\n },\n \"modelVersion\": + \"gemini-2.5-flash-lite\",\n \"responseId\": \"L4XaaIPGCcjtkdUPlInyiAM\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 13:10:07 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=926 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts.yaml new file mode 100644 index 00000000..5c7dc66d --- /dev/null +++ b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts.yaml @@ -0,0 +1,71 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r'' + appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts": + [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig": + {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '315' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + x-goog-api-client: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/4VUUW+bMBB+z6+48hIJAdrabOr6VrWbVnVrq4ZNlZY9OHAEK2Az2xFlVf/77gxJ + aDptkYLBd/7uu+/u/DQBCDKhcpkLhzY4gx+0A/Dkn2zTyqFyZNhu0WYjjNv79r+n0Tu5OHzkQ0EY + piXC3BnRLtGYDqb3U7jQG+XCcKEW6rwyclW6CCp0UwtLg2INrpQWct2qBPj0xqKBVihnwWlYK91C + Sf9aqA6crJG2yYsAHPlNzRRE06AwFqTyllabHBaB3ZFYBAnMZd1UCKj0ZlUm8EkaSyyuQCHmHOY3 + Gs0AusdYYwdYYU1a2LM9qnQWqwLiA3jaIE29m20wk4XMBn4RtKXMSqAEiWkCN7qNfBIc03bWYS2c + zERVdWCpMLtQCas1d6S8VCsojK69aYkrqRRtMfeV3qqw7Ia3hJl5Qko3yF/Of1knq4r2EjgvI0Yy + SMkQrSMvecFy9AzTUnBltKLTF9QNUm18uEUgFgEvbb8s+wVJ3CThYBHsn76QBglHKM3RGO/IY1Md + 11zClrhcIzaUBeETxqBgf4p7wuRHcIemwMz1aujIa8Tp73RiHTLuLy5jaZDSUtYJlVEMXRx0ykCr + 0Aa9CMTOtmSS4xoflJYnQhDmq6ajCjpRcZCTvi2JZBAdjEXJ7caT4cwGR7bn6P+z9MWPiE+Og9dn + LIKNXRyGJgxjEbfxMsbhq3927JL+fTrC8CQMhwH6x6QEY5a79597voHRFTK/WudYbd13CQWFVNKW + 9yisVuw2T2/vdroEUuX4SNtvJtsAHjrYWLHCr+gEXUxid/0EDTV+41K9RuVvEbIcv+vBRvfYC/vs + w2D39Tk4ehq9wrWXFFVW4wtudPdRkqKSruNM0o8P6ajCFOAF9laJyUiwbQe8pPj2dDYZJOtV/I7G + yl6uFdYkYHycvIuLStgypujoowYGbaOVxaucHe9nqRDXl/L8/cOvdf7tztx8vp2d62DyPPkDFZMx + XucFAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 13:06:15 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=2264 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts_async.yaml b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts_async.yaml new file mode 100644 index 00000000..685f957e --- /dev/null +++ b/tests/cassettes/test_google_genai/test_google_genai_reasoning_tokens_with_include_thoughts_async.yaml @@ -0,0 +1,69 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "How many times does the letter ''r'' + appear in the word strawberry?"}], "role": "user"}], "systemInstruction": {"parts": + [{"text": "Think deep and thoroughly step by step."}], "role": "user"}, "generationConfig": + {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}' + headers: + Content-Type: + - application/json + user-agent: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + x-goog-api-client: + - google-genai-sdk/1.34.0 gl-python/3.13.5 + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"**Breaking Down the Letter Count in + \\\"Strawberry\\\"**\\n\\nOkay, so the challenge is clear: figure out how + many times the letter 'r' appears in the word \\\"strawberry\\\". That's + straightforward enough. Let's break it down methodically.\\n\\nFirst, I need + to isolate the target: \\\"strawberry\\\" is the word in question, and 'r' + is the character we're focusing on. Now, the simplest approach is just a + direct scan. I'll go through the word, character by character, and increment + a counter every time I hit an 'r'.\\n\\n* s... nope.\\n* t... not there.\\n* + \ **r** ... one!\\n* a... keep moving.\\n* w... no 'r' here.\\n* b... + still searching.\\n* e... almost there...\\n* **r** ... two!\\n* **r** + ... three!\\n* y... and we're done.\\n\\nAlright, that's it. It's a quick, + manual count, but effective. The final, definitive answer is: The letter + 'r' appears 3 times in the word \\\"strawberry\\\". Easy peasy.\\n\",\n \"thought\": + true\n },\n {\n \"text\": \"Let's count them:\\n\\ns + - t - **r** - a - w - b - e - **r** - **r** - y\\n\\nThe letter 'r' appears + **3** times in the word \\\"strawberry\\\".\"\n }\n ],\n \"role\": + \"model\"\n },\n \"finishReason\": \"STOP\",\n \"index\": 0\n + \ }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": 25,\n \"candidatesTokenCount\": + 49,\n \"totalTokenCount\": 246,\n \"promptTokensDetails\": [\n {\n + \ \"modality\": \"TEXT\",\n \"tokenCount\": 25\n }\n ],\n + \ \"thoughtsTokenCount\": 172\n },\n \"modelVersion\": \"gemini-2.5-flash-lite\",\n + \ \"responseId\": \"MoXaaLGnAaiE7M8P3q6RsQM\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 13:10:10 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=2777 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking.yaml b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking.yaml new file mode 100644 index 00000000..56bc20f9 --- /dev/null +++ b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: '{"contents":[{"role":"user","parts":[{"text":"How many times does the letter + ''r'' appear in the word strawberry?"}]}],"system_instruction":{"parts":[{"text":"Think + deep and thoroughly step by step."}]},"generationConfig":{"thinkingConfig":{"includeThoughts":true,"thinkingBudget":512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '285' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/4VUXU/bMBR976+4y0ulKI2mln3QPSEGG9IYDKpp0jpNbnKbeHXsYDuEDPHfd2/S + lrQghmTq+H6e4+N7PwAIEqFTmQqPLpjCTzoBuG//s81oj9qTYXNEh6Ww/tG3+7vv7cnF4x0HBWF4 + pIVq/kqdgc8RTi3eVKiTBswSBHxB79GG4VzP9cVKNBE4A2egEVPwBpYyqyyCqTzkpoZC6Aa8LNC1 + uVQbDEM7BEdmB1UJUrem2tgU5oHzVtQLtLaJ5wGc+aGjmnwos9wvja0FuZXWLBQWEVgUSjUxnErr + fARnQ6WgpISpqbusXtgM/fT5ClSAeNxrLGZgMzpaoPMgSqolkhxyJFTSQaJQWNVAKi0mHhJTaU9M + xfDVbDkwt2gTU5RKJnRFlJ8cHNTS59RwVilhAe9Ki85Jox0YSxQWCEvBJAuVGUuuxQcOdFxzIZxM + oBTcoqYUicm09BQb820Ahf6pqFdHqqAYa6osfwR8K13FJLVQV4glEP5k1cK8Nh2uGiEzU+YGPFgi + vIYFIO0sEEmta0gSGbohjECbEtfffu/b8neDLqLygq/OaIzXNtH5rr/qvcjFjhWfy+u5VZaDNrzd + 1vC1ift+jPOJT24RX629mi57C+tIWRZW1PK1lCT87kqZ9jYohhnXJeVhtC9hEgeJwcFkrfCXlHwt + SQ/YNkcPNFGVk7fMTRDtvcKcr48forcV9mwP0f+fLqmBwHYAqJNiyhAdc0crDOnVMj+0aloLWtg7 + 3/w2mwfwDNIwnJDPy2hJMEG/7+3+1yOCwBqF3HFhUlQb9y3EgG5CuvwKhTOa3a5nF5dbpgKpU7yj + 49eDTYE2dVA5keE5ekGTUWznX0AvuCj9zKxQHzM1ZBkfdMl6g3THfnC4tnvjhdoNPXwbPcnrPlJV + qfoTtjd8CaRQ0jeMZHbyY9a7cyqw09aGiUGPsI0mdlscjyeDNWUdi9/R8jzhIhkWROBoHL8ZLZVw + +YiqY1s1oKlT0tDBs5Qdjw9+C3HxefxJyJN35+8vJzdvr9y382DwMPgHAjzTXWgGAAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 13:52:11 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=3030 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_async.yaml b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_async.yaml new file mode 100644 index 00000000..c33b546c --- /dev/null +++ b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_async.yaml @@ -0,0 +1,117 @@ +interactions: +- request: + body: '{"contents":[{"role":"user","parts":[{"text":"How many times does the letter + ''r'' appear in the word strawberry?"}]}],"system_instruction":{"parts":[{"text":"Think + deep and thoroughly step by step."}]},"generationConfig":{"thinkingConfig":{"includeThoughts":true,"thinkingBudget":512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '285' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/4VTUW/TQAx+76/w7qUsSiO6Dib6ygYaGttYI4REefAatzma3kXnq7Kq2n/Hlyxp + OoTIQ+T4sz9/tuP9AEAt0GQ6Q0+spvBTPAD7+h0wazwZL0DrEmeJzh9im2ffsyXE01NIUlGU5gRz + 9TBX8NFujQdt5HPmHVaP5NxuLiFzMzd3a9zFwBa8xG+ZHFRoPNefBXkvjqEbwuLAwR1HIuQ35IcM + j45wLTmaIbOVSeCTduzjmqWyLgMB+pmxZEr3Ne7RrciHiOOaCdzaKobrYVEAy6w6shgKa9farGBp + HRAuchHGHs2CwC5D6hRYascQuQje+ByDRGvoNAaMQTgfY6AWrezJaRfpiE5OE0iD0ZHyC2sCApAj + qUpNa2i4ErGap4dO+21Kl2GRKEyvB1rXAq83xAnM9KYsCMjY7SpP5kbFr7aaByAs1rst9bDn+P+/ + QrOhZoEiYjMNa2cfSctR0CmGWLvgTY81YlkSOoYomkRRozX8Av9oNVF9YZ396yBROVtQkLSxGRVt + eNeDWmqjOX8gZGtC2Cy9u+9GobTJ6EncbwdtgZpabRlX9JU8yilhdzCqdHZT+tSuydQHIMjZeUPW + u7wjfNLi3nosjlPHH+K/ePlSquqif5K9a5UmsdB+FzpJr36kvaVKgSNZ7SQGvYG1Sz+WOH4/HryM + rJnid3Ksm3GtaCMDHJ0l70bLAjkfSXWqqypHXFrDdJ2FwIsvN4j3+vNteXlh+Op+Yke/Z9/U4Hnw + B15FvimYBAAA + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 14:08:44 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=1656 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents":[{"role":"user","parts":[{"text":"How many times does the letter + ''r'' appear in the word strawberry?"}]}],"system_instruction":{"parts":[{"text":"Think + deep and thoroughly step by step."}]},"generationConfig":{"thinkingConfig":{"includeThoughts":true,"thinkingBudget":512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '285' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent + response: + body: + string: '' + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Mon, 29 Sep 2025 14:08:44 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=1656 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming.yaml b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming.yaml new file mode 100644 index 00000000..78e8b4e4 --- /dev/null +++ b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming.yaml @@ -0,0 +1,82 @@ +interactions: +- request: + body: '{"contents": [{"role": "user", "parts": [{"text": "How many times does + the letter ''r'' appear in the word strawberry?"}]}], "system_instruction": + {"parts": [{"text": "Think deep and thoroughly step by step."}]}, "generationConfig": + {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '300' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:streamGenerateContent?alt=sse + response: + body: + string: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"**Calculating + Letter Frequency**\\n\\nI've zeroed in on the core question: figuring out + how many times the letter 'r' shows up in \\\"strawberry.\\\" The target is + clear, and I'm now moving forward to count that letter. The next step is to + examine the word letter by letter, to see the r's!\\n\\n\\n\",\"thought\": + true}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"totalTokenCount\": 96,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": + 24}],\"thoughtsTokenCount\": 72},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": + \"dpDaaLL7BsjtkdUPlInyiAM\"}\r\n\r\ndata: {\"candidates\": [{\"content\": + {\"parts\": [{\"text\": \"**Pinpointing The Final Count**\\n\\nI've made swift + progress. I successfully broke down \\\"strawberry\\\" letter by letter. I + identified all the instances of 'r', meticulously keeping a running tally. + Now I'm ready to declare the precise frequency of 'r' within the word.\\n\\n\\n\",\"thought\": + true}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"totalTokenCount\": 211,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": + 24}],\"thoughtsTokenCount\": 187},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": + \"dpDaaLL7BsjtkdUPlInyiAM\"}\r\n\r\ndata: {\"candidates\": [{\"content\": + {\"parts\": [{\"text\": \"Let's count the letter 'r' in the word \\\"strawberry\\\":\\n\\ns + - t - **r** - a - w - b - e - **r** - **r** - y\\n\\nThe letter 'r' appears + **\"}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"candidatesTokenCount\": 47,\"totalTokenCount\": 258,\"promptTokensDetails\": + [{\"modality\": \"TEXT\",\"tokenCount\": 24}],\"thoughtsTokenCount\": 187},\"modelVersion\": + \"gemini-2.5-flash-lite\",\"responseId\": \"dpDaaLL7BsjtkdUPlInyiAM\"}\r\n\r\ndata: + {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"3** times.\"}],\"role\": + \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"candidatesTokenCount\": 51,\"totalTokenCount\": 262,\"promptTokensDetails\": + [{\"modality\": \"TEXT\",\"tokenCount\": 24}],\"thoughtsTokenCount\": 187},\"modelVersion\": + \"gemini-2.5-flash-lite\",\"responseId\": \"dpDaaLL7BsjtkdUPlInyiAM\"}\r\n\r\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Disposition: + - attachment + Content-Type: + - text/event-stream + Date: + - Mon, 29 Sep 2025 13:58:15 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=1685 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming_async.yaml b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming_async.yaml new file mode 100644 index 00000000..cf38c73e --- /dev/null +++ b/tests/cassettes/test_litellm_gemini/test_litellm_gemini_thinking_with_streaming_async.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"contents": [{"role": "user", "parts": [{"text": "How many times does + the letter ''r'' appear in the word strawberry?"}]}], "system_instruction": + {"parts": [{"text": "Think deep and thoroughly step by step."}]}, "generationConfig": + {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '300' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:streamGenerateContent?alt=sse + response: + body: + string: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"**Determining + the Approach**\\n\\nI've zeroed in on the central issue: figuring out how + many times 'r' appears in \\\"strawberry\\\". First, I need to pinpoint the + target word and letter. Now, the next logical step is to scan the word character + by character, counting each instance of the letter 'r'. This appears to be + a basic search operation that can be done using a counting variable and iteration.\\n\\n\\n\",\"thought\": + true}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"totalTokenCount\": 95,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": + 24}],\"thoughtsTokenCount\": 71},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": + \"7ZLaaPHtG6nr7M8Pz-j9wA4\"}\r\n\r\ndata: {\"candidates\": [{\"content\": + {\"parts\": [{\"text\": \"**Counting the Instances**\\n\\nI've determined + the answer, the character 'r' appears three times in the word \\\"strawberry\\\". + I achieved this by meticulously scanning the word, character by character, + and incrementing a counter each time I encountered the letter 'r'. I've confirmed + that counting three times provides an accurate count.\\n\\n\\n\",\"thought\": + true}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": + 24,\"totalTokenCount\": 236,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": + 24}],\"thoughtsTokenCount\": 212},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": + \"7ZLaaPHtG6nr7M8Pz-j9wA4\"}\r\n\r\ndata: {\"candidates\": [{\"content\": + {\"parts\": [{\"text\": \"Let's count them:\\n\\nS - t - **r** - a - w - b + - e - **r** - **r** - y\\n\\nThe letter 'r' appears **3** times in the word + \\\"strawberry\\\".\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": + 0}],\"usageMetadata\": {\"promptTokenCount\": 24,\"candidatesTokenCount\": + 47,\"totalTokenCount\": 283,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": + 24}],\"thoughtsTokenCount\": 212},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": + \"7ZLaaPHtG6nr7M8Pz-j9wA4\"}\r\n\r\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Disposition: + - attachment + Content-Type: + - text/event-stream + Date: + - Mon, 29 Sep 2025 14:08:46 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=1132 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents": [{"role": "user", "parts": [{"text": "How many times does + the letter ''r'' appear in the word strawberry?"}]}], "system_instruction": + {"parts": [{"text": "Think deep and thoroughly step by step."}]}, "generationConfig": + {"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 512}}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '300' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + user-agent: + - litellm/1.77.1 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:streamGenerateContent?alt=sse + response: + body: + string: '' + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Disposition: + - attachment + Content-Type: + - text/event-stream + Date: + - Mon, 29 Sep 2025 14:08:46 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=1132 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/conftest.py b/tests/conftest.py index 9613ad1d..d99f7cfb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,5 +99,6 @@ def vcr_config(): "api-key", "x-goog-api-key", "x-api-key", - ] + ], + "filter_query_parameters": ["key"], } diff --git a/tests/test_google_genai.py b/tests/test_google_genai.py index d62f3ac7..1ae58052 100644 --- a/tests/test_google_genai.py +++ b/tests/test_google_genai.py @@ -572,6 +572,218 @@ def test_google_genai_output_json_schema(span_exporter: InMemorySpanExporter): ) +@pytest.mark.vcr +def test_google_genai_reasoning_tokens(span_exporter: InMemorySpanExporter): + client = Client(api_key="123") + response = client.models.generate_content( + model="gemini-2.5-flash-lite", + contents=[ + { + "role": "user", + "parts": [ + { + "text": "How many times does the letter 'r' appear in the word strawberry?" + }, + ], + } + ], + config=types.GenerateContentConfig( + system_instruction={"text": "Think deep and thoroughly step by step."}, + thinking_config=types.ThinkingConfig(thinking_budget=512), + ), + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "gemini.generate_content" + assert ( + spans[0].attributes["gen_ai.usage.reasoning_tokens"] + == response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.output_tokens"] + == response.usage_metadata.candidates_token_count + + response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.input_tokens"] + == response.usage_metadata.prompt_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == response.usage_metadata.total_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == spans[0].attributes["gen_ai.usage.input_tokens"] + + spans[0].attributes["gen_ai.usage.output_tokens"] + ) + + +@pytest.mark.vcr +def test_google_genai_reasoning_tokens_with_include_thoughts( + span_exporter: InMemorySpanExporter, +): + client = Client(api_key="123") + response = client.models.generate_content( + model="gemini-2.5-flash-lite", + contents=[ + { + "role": "user", + "parts": [ + { + "text": "How many times does the letter 'r' appear in the word strawberry?" + }, + ], + } + ], + config=types.GenerateContentConfig( + system_instruction={"text": "Think deep and thoroughly step by step."}, + thinking_config=types.ThinkingConfig( + thinking_budget=512, include_thoughts=True + ), + ), + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "gemini.generate_content" + assert ( + spans[0].attributes["gen_ai.usage.reasoning_tokens"] + == response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.output_tokens"] + == response.usage_metadata.candidates_token_count + + response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.input_tokens"] + == response.usage_metadata.prompt_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == response.usage_metadata.total_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == spans[0].attributes["gen_ai.usage.input_tokens"] + + spans[0].attributes["gen_ai.usage.output_tokens"] + ) + span_output = json.loads(spans[0].attributes["gen_ai.completion.0.content"]) + assert span_output[0]["type"] == "text" + assert span_output[0]["text"] == response.parts[0].text + assert span_output[1]["type"] == "text" + assert span_output[1]["text"] == response.text + + +@pytest.mark.vcr(record_mode="once") +@pytest.mark.asyncio +async def test_google_genai_reasoning_tokens_async(span_exporter: InMemorySpanExporter): + client = Client(api_key="123") + response = await client.aio.models.generate_content( + model="gemini-2.5-flash-lite", + contents=[ + { + "role": "user", + "parts": [ + { + "text": "How many times does the letter 'r' appear in the word strawberry?" + }, + ], + } + ], + config=types.GenerateContentConfig( + system_instruction={"text": "Think deep and thoroughly step by step."}, + thinking_config=types.ThinkingConfig(thinking_budget=512), + ), + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "gemini.generate_content" + assert ( + spans[0].attributes["gen_ai.usage.reasoning_tokens"] + == response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.output_tokens"] + == response.usage_metadata.candidates_token_count + + response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.input_tokens"] + == response.usage_metadata.prompt_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == response.usage_metadata.total_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == spans[0].attributes["gen_ai.usage.input_tokens"] + + spans[0].attributes["gen_ai.usage.output_tokens"] + ) + + +@pytest.mark.vcr(record_mode="once") +@pytest.mark.asyncio +async def test_google_genai_reasoning_tokens_with_include_thoughts_async( + span_exporter: InMemorySpanExporter, +): + client = Client(api_key="123") + response = await client.aio.models.generate_content( + model="gemini-2.5-flash-lite", + contents=[ + { + "role": "user", + "parts": [ + { + "text": "How many times does the letter 'r' appear in the word strawberry?" + }, + ], + } + ], + config=types.GenerateContentConfig( + system_instruction={"text": "Think deep and thoroughly step by step."}, + thinking_config=types.ThinkingConfig( + thinking_budget=512, include_thoughts=True + ), + ), + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "gemini.generate_content" + assert ( + spans[0].attributes["gen_ai.usage.reasoning_tokens"] + == response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.output_tokens"] + == response.usage_metadata.candidates_token_count + + response.usage_metadata.thoughts_token_count + ) + assert ( + spans[0].attributes["gen_ai.usage.input_tokens"] + == response.usage_metadata.prompt_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == response.usage_metadata.total_token_count + ) + assert ( + spans[0].attributes["llm.usage.total_tokens"] + == spans[0].attributes["gen_ai.usage.input_tokens"] + + spans[0].attributes["gen_ai.usage.output_tokens"] + ) + span_output = json.loads(spans[0].attributes["gen_ai.completion.0.content"]) + assert span_output[0]["type"] == "text" + assert span_output[0]["text"] == response.parts[0].text + assert span_output[1]["type"] == "text" + assert span_output[1]["text"] == response.text + + def test_google_genai_error(span_exporter: InMemorySpanExporter): # Invalid key on purpose client = Client(api_key="123") diff --git a/tests/test_litellm_gemini.py b/tests/test_litellm_gemini.py new file mode 100644 index 00000000..00154188 --- /dev/null +++ b/tests/test_litellm_gemini.py @@ -0,0 +1,246 @@ +import asyncio +import json +import litellm +import os +import pytest +import time + +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + +from lmnr.opentelemetry_lib.litellm import LaminarLiteLLMCallback +from lmnr import Laminar + +SLEEP_TO_FLUSH_SECONDS = 0.05 + + +@pytest.mark.vcr +def test_litellm_gemini_thinking( + span_exporter: InMemorySpanExporter, litellm_callback: LaminarLiteLLMCallback +): + # The actual key was used during recording and the request/response was saved + # to the VCR cassette. + os.environ["GEMINI_API_KEY"] = "test-key" + + litellm.callbacks = [litellm_callback] + response = litellm.completion( + model="gemini/gemini-2.5-flash-lite", + messages=[ + {"role": "system", "content": "Think deep and thoroughly step by step."}, + { + "role": "user", + "content": "How many times does the letter 'r' appear in the word strawberry?", + }, + ], + thinking={"type": "enabled", "budget_tokens": 512, "include_thoughts": True}, + ) + + # Wait for the callback to complete and flush the spans + time.sleep(SLEEP_TO_FLUSH_SECONDS) + Laminar.flush() + time.sleep(SLEEP_TO_FLUSH_SECONDS) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "litellm.completion" + assert spans[0].attributes["gen_ai.request.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.id"] == response.id + assert spans[0].attributes["gen_ai.usage.input_tokens"] == 24 + assert spans[0].attributes["gen_ai.usage.output_tokens"] == 272 + assert spans[0].attributes["gen_ai.usage.reasoning_tokens"] == 223 + assert spans[0].attributes["llm.usage.total_tokens"] == 296 + assert ( + spans[0].attributes["gen_ai.prompt.0.content"] + == "Think deep and thoroughly step by step." + ) + assert spans[0].attributes["gen_ai.prompt.0.role"] == "system" + assert ( + spans[0].attributes["gen_ai.prompt.1.content"] + == "How many times does the letter 'r' appear in the word strawberry?" + ) + assert spans[0].attributes["gen_ai.prompt.1.role"] == "user" + assert json.loads(spans[0].attributes["gen_ai.completion.0.content"]) == [ + {"type": "text", "text": response.choices[0].message.reasoning_content}, + {"type": "text", "text": response.choices[0].message.content}, + ] + assert spans[0].attributes["gen_ai.completion.0.role"] == "assistant" + assert spans[0].attributes["gen_ai.system"] == "gemini" + + +@pytest.mark.vcr +def test_litellm_gemini_thinking_with_streaming( + span_exporter: InMemorySpanExporter, litellm_callback: LaminarLiteLLMCallback +): + # The actual key was used during recording and the request/response was saved + # to the VCR cassette. + os.environ["GEMINI_API_KEY"] = "test-key" + + litellm.callbacks = [litellm_callback] + response = litellm.completion( + model="gemini/gemini-2.5-flash-lite", + messages=[ + {"role": "system", "content": "Think deep and thoroughly step by step."}, + { + "role": "user", + "content": "How many times does the letter 'r' appear in the word strawberry?", + }, + ], + thinking={"type": "enabled", "budget_tokens": 512, "include_thoughts": True}, + stream=True, + ) + + final_response = "" + final_reasoning_response = "" + for chunk in response: + final_response += chunk.choices[0].delta.content or "" + if hasattr(chunk.choices[0].delta, "reasoning_content"): + final_reasoning_response += chunk.choices[0].delta.reasoning_content or "" + assert final_reasoning_response + + # Wait for the callback to complete and flush the spans + time.sleep(SLEEP_TO_FLUSH_SECONDS) + Laminar.flush() + time.sleep(SLEEP_TO_FLUSH_SECONDS) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "litellm.completion" + assert spans[0].attributes["gen_ai.request.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.usage.input_tokens"] == 24 + assert spans[0].attributes["gen_ai.usage.output_tokens"] == 238 + assert spans[0].attributes["gen_ai.usage.reasoning_tokens"] == 187 + assert spans[0].attributes["llm.usage.total_tokens"] == 262 + assert ( + spans[0].attributes["gen_ai.prompt.0.content"] + == "Think deep and thoroughly step by step." + ) + assert spans[0].attributes["gen_ai.prompt.0.role"] == "system" + assert ( + spans[0].attributes["gen_ai.prompt.1.content"] + == "How many times does the letter 'r' appear in the word strawberry?" + ) + assert spans[0].attributes["gen_ai.prompt.1.role"] == "user" + assert json.loads(spans[0].attributes["gen_ai.completion.0.content"]) == [ + {"type": "text", "text": final_reasoning_response}, + {"type": "text", "text": final_response}, + ] + assert spans[0].attributes["gen_ai.completion.0.role"] == "assistant" + assert spans[0].attributes["gen_ai.system"] == "gemini" + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_litellm_gemini_thinking_async( + span_exporter: InMemorySpanExporter, litellm_callback: LaminarLiteLLMCallback +): + # The actual key was used during recording and the request/response was saved + # to the VCR cassette. + os.environ["GEMINI_API_KEY"] = "test-key" + + litellm.callbacks = [litellm_callback] + response = await litellm.acompletion( + model="gemini/gemini-2.5-flash-lite", + messages=[ + {"role": "system", "content": "Think deep and thoroughly step by step."}, + { + "role": "user", + "content": "How many times does the letter 'r' appear in the word strawberry?", + }, + ], + thinking={"type": "enabled", "budget_tokens": 512, "include_thoughts": True}, + ) + + # Wait for the callback to complete and flush the spans + await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS) + Laminar.flush() + await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "litellm.completion" + assert spans[0].attributes["gen_ai.request.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.id"] == response.id + assert spans[0].attributes["gen_ai.usage.input_tokens"] == 24 + assert spans[0].attributes["gen_ai.usage.output_tokens"] == 195 + assert spans[0].attributes["gen_ai.usage.reasoning_tokens"] == 161 + assert spans[0].attributes["llm.usage.total_tokens"] == 219 + assert ( + spans[0].attributes["gen_ai.prompt.0.content"] + == "Think deep and thoroughly step by step." + ) + assert spans[0].attributes["gen_ai.prompt.0.role"] == "system" + assert ( + spans[0].attributes["gen_ai.prompt.1.content"] + == "How many times does the letter 'r' appear in the word strawberry?" + ) + assert spans[0].attributes["gen_ai.prompt.1.role"] == "user" + assert json.loads(spans[0].attributes["gen_ai.completion.0.content"]) == [ + {"type": "text", "text": response.choices[0].message.reasoning_content}, + {"type": "text", "text": response.choices[0].message.content}, + ] + assert spans[0].attributes["gen_ai.completion.0.role"] == "assistant" + assert spans[0].attributes["gen_ai.system"] == "gemini" + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_litellm_gemini_thinking_with_streaming_async( + span_exporter: InMemorySpanExporter, litellm_callback: LaminarLiteLLMCallback +): + # The actual key was used during recording and the request/response was saved + # to the VCR cassette. + os.environ["GEMINI_API_KEY"] = "test-key" + + litellm.callbacks = [litellm_callback] + response = await litellm.acompletion( + model="gemini/gemini-2.5-flash-lite", + messages=[ + {"role": "system", "content": "Think deep and thoroughly step by step."}, + { + "role": "user", + "content": "How many times does the letter 'r' appear in the word strawberry?", + }, + ], + thinking={"type": "enabled", "budget_tokens": 512, "include_thoughts": True}, + stream=True, + ) + + final_response = "" + final_reasoning_response = "" + async for chunk in response: + final_response += chunk.choices[0].delta.content or "" + if hasattr(chunk.choices[0].delta, "reasoning_content"): + final_reasoning_response += chunk.choices[0].delta.reasoning_content or "" + + # Wait for the callback to complete and flush the spans + await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS) + Laminar.flush() + await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "litellm.completion" + assert spans[0].attributes["gen_ai.request.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.response.model"] == "gemini-2.5-flash-lite" + assert spans[0].attributes["gen_ai.usage.input_tokens"] == 24 + assert spans[0].attributes["gen_ai.usage.output_tokens"] == 259 + assert spans[0].attributes["gen_ai.usage.reasoning_tokens"] == 212 + assert spans[0].attributes["llm.usage.total_tokens"] == 283 + assert ( + spans[0].attributes["gen_ai.prompt.0.content"] + == "Think deep and thoroughly step by step." + ) + assert spans[0].attributes["gen_ai.prompt.0.role"] == "system" + assert ( + spans[0].attributes["gen_ai.prompt.1.content"] + == "How many times does the letter 'r' appear in the word strawberry?" + ) + assert spans[0].attributes["gen_ai.prompt.1.role"] == "user" + assert json.loads(spans[0].attributes["gen_ai.completion.0.content"]) == [ + {"type": "text", "text": final_reasoning_response}, + {"type": "text", "text": final_response}, + ] + assert spans[0].attributes["gen_ai.completion.0.role"] == "assistant" + assert spans[0].attributes["gen_ai.system"] == "gemini"