Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A minor (semantic) technicality, but at least in both Open AI client and Anthropic, the id is generated by the LLM API and is not created at the time of the request. It is a response ID (which can be used to track what an LLM said in response to a request. One could theoretically track a request ID separately if needed.

Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,6 @@ class CreateResult(BaseModel):
thought: Optional[str] = None
"""The reasoning text for the completion if available. Used for reasoning models
and additional text content besides function calls."""

request_id: Optional[str] = None
"""The ID of the model API request, used for tracking model usage."""
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ async def create(
finish_reason=data.get("finish_reason", "stop"),
usage=data.get("usage", RequestUsage(prompt_tokens=0, completion_tokens=0)),
cached=True,
request_id=data.get("request_id", None),
)
return result

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ async def create(
usage=usage,
cached=False,
thought=thought,
request_id=getattr(result, "request_id", None),
)

# Update usage statistics
Expand Down Expand Up @@ -775,6 +776,7 @@ async def create_stream(
usage=usage,
cached=False,
thought=thought,
request_id=getattr(result, "request_id", None),
)

# Emit the end event.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ async def create(
usage=usage,
cached=False,
thought=thought,
request_id=getattr(result, "request_id", None),
)

self.add_usage(usage)
Expand Down Expand Up @@ -561,6 +562,7 @@ async def create_stream(
usage=usage,
cached=False,
thought=thought,
request_id=getattr(result, "request_id", None),
)

# Log the end of the stream.
Expand Down Expand Up @@ -606,3 +608,4 @@ def __del__(self) -> None:
asyncio.get_running_loop().create_task(self._client.close())
except RuntimeError:
asyncio.run(self._client.close())

Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ async def create(
if not response_tool_calls and not response_text:
logger.debug("DEBUG: No response text found. Returning empty response.")
return CreateResult(
content="", usage=RequestUsage(prompt_tokens=0, completion_tokens=0), finish_reason="stop", cached=False
content="", usage=RequestUsage(prompt_tokens=0, completion_tokens=0), finish_reason="stop", cached=False, request_id=response.get("request_id", None),
)

# Create a CreateResult object
Expand All @@ -373,6 +373,7 @@ async def create(
usage=cast(RequestUsage, response["usage"]),
finish_reason=normalize_stop_reason(finish_reason), # type: ignore
cached=False,
request_id=response.get("request_id", None),
)

# If we are running in the context of a handler we can get the agent_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ async def create(
usage=usage,
cached=False,
logprobs=None,
request_id=response.get("request_id", None),
)

self._total_usage = _add_usage(self._total_usage, usage)
Expand Down Expand Up @@ -737,6 +738,7 @@ async def create_stream(
usage=usage,
cached=False,
logprobs=None,
request_id=chunk.get("request_id", None),
)

# Emit the end event.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,13 @@ async def create(
raise ValueError("No more mock responses available")

response = self.chat_completions[self._current_index]
request_id = response.get("request_id", None) if isinstance(response, dict) else None
_, prompt_token_count = self._tokenize(messages)
if isinstance(response, str):
_, output_token_count = self._tokenize(response)
self._cur_usage = RequestUsage(prompt_tokens=prompt_token_count, completion_tokens=output_token_count)
response = CreateResult(
finish_reason="stop", content=response, usage=self._cur_usage, cached=self._cached_bool_value
finish_reason="stop", content=response, usage=self._cur_usage, cached=self._cached_bool_value, request_id=request_id,
)
else:
self._cur_usage = RequestUsage(
Expand Down Expand Up @@ -210,7 +211,9 @@ async def create_stream(
raise ValueError("No more mock responses available")

response = self.chat_completions[self._current_index]
request_id = response.get("request_id", None) if isinstance(response, dict) else None
_, prompt_token_count = self._tokenize(messages)

if isinstance(response, str):
output_tokens, output_token_count = self._tokenize(response)
self._cur_usage = RequestUsage(prompt_tokens=prompt_token_count, completion_tokens=output_token_count)
Expand All @@ -221,7 +224,7 @@ async def create_stream(
else:
yield token
yield CreateResult(
finish_reason="stop", content=response, usage=self._cur_usage, cached=self._cached_bool_value
finish_reason="stop", content=response, usage=self._cur_usage, cached=self._cached_bool_value, request_id=request_id,
)
self._update_total_usage()
else:
Expand Down Expand Up @@ -310,3 +313,4 @@ def _from_config(cls, config: ReplayChatCompletionClientConfig) -> Self:
chat_completions=config.chat_completions,
model_info=config.model_info,
)

Original file line number Diff line number Diff line change
Expand Up @@ -514,13 +514,18 @@ async def create(
thought, content = parse_r1_content(content)
else:
thought = None

request_id = None
if result and hasattr(result[0], "metadata") and isinstance(result[0].metadata, dict):
request_id = result[0].metadata.get("request_id")

return CreateResult(
content=content,
finish_reason=finish_reason,
usage=RequestUsage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
cached=False,
thought=thought,
request_id=request_id,
)

@staticmethod
Expand Down Expand Up @@ -671,11 +676,16 @@ async def create_stream(
)
)
# Yield all function calls in progress
request_id = None
if result and hasattr(result[0], "metadata") and isinstance(result[0].metadata, dict):
request_id = result[0].metadata.get("request_id")

yield CreateResult(
content=calls_to_yield,
finish_reason="function_calls",
usage=RequestUsage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
cached=False,
request_id=request_id,
)
return

Expand All @@ -691,13 +701,18 @@ async def create_stream(
thought = None
if isinstance(accumulated_text, str) and self._model_info["family"] == ModelFamily.R1:
thought, accumulated_text = parse_r1_content(accumulated_text)

request_id = None
if result and hasattr(result[0], "metadata") and isinstance(result[0].metadata, dict):
request_id = result[0].metadata.get("request_id")

result = CreateResult(
content=accumulated_text,
finish_reason="stop",
usage=RequestUsage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
cached=False,
thought=thought,
request_id=request_id,
)

# Emit the end event.
Expand Down