Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/llama_stack_client/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
headers[idempotency_header] = options.idempotency_key or self._idempotency_key()

# Don't set the retry count header if it was already set or removed by the caller. We check
# Don't set these headers if they were already set or removed by the caller. We check
# `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
lower_custom_headers = [header.lower() for header in custom_headers]
if "x-stainless-retry-count" not in lower_custom_headers:
headers["x-stainless-retry-count"] = str(retries_taken)
if "x-stainless-read-timeout" not in lower_custom_headers:
timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
if isinstance(timeout, Timeout):
timeout = timeout.read
if timeout is not None:
headers["x-stainless-read-timeout"] = str(timeout)

return headers

Expand Down
42 changes: 40 additions & 2 deletions src/llama_stack_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ class LlamaStackClient(SyncAPIClient):
with_streaming_response: LlamaStackClientWithStreamedResponse

# client options
api_key: str | None

def __init__(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
Expand All @@ -123,7 +125,14 @@ def __init__(
_strict_response_validation: bool = False,
provider_data: Mapping[str, Any] | None = None,
) -> None:
"""Construct a new synchronous llama-stack-client client instance."""
"""Construct a new synchronous llama-stack-client client instance.

This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided.
"""
if api_key is None:
api_key = os.environ.get("LLAMA_STACK_API_KEY")
Copy link
Contributor

@vladimirivic vladimirivic Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should name this LLAMA_STACK_CLIENT_API_KEY

so it has the same prefix as LLAMA_STACK_CLIENT_BASE_URL. The env name is defined in the Stainless config.

self.api_key = api_key

if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
Expand Down Expand Up @@ -182,6 +191,14 @@ def __init__(
def qs(self) -> Querystring:
return Querystring(array_format="comma")

@property
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ehhuang how did you get these changes, this was not supposed to be here yet because adding api_key Stainless config was only in a dev branch but not in main. It's ok if we leave this here though we will just need to remove api key changes from the sync_stainless script.

@override
def auth_headers(self) -> dict[str, str]:
api_key = self.api_key
if api_key is None:
return {}
return {"Authorization": f"Bearer {api_key}"}

@property
@override
def default_headers(self) -> dict[str, str | Omit]:
Expand All @@ -194,6 +211,7 @@ def default_headers(self) -> dict[str, str | Omit]:
def copy(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -228,6 +246,7 @@ def copy(

http_client = http_client or self._client
return self.__class__(
api_key=api_key or self.api_key,
base_url=base_url or self.base_url,
api_key=api_key or self.api_key,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
Expand Down Expand Up @@ -304,10 +323,12 @@ class AsyncLlamaStackClient(AsyncAPIClient):
with_streaming_response: AsyncLlamaStackClientWithStreamedResponse

# client options
api_key: str | None

def __init__(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
Expand All @@ -329,7 +350,14 @@ def __init__(
_strict_response_validation: bool = False,
provider_data: Mapping[str, Any] | None = None,
) -> None:
"""Construct a new async llama-stack-client client instance."""
"""Construct a new async llama-stack-client client instance.

This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided.
"""
if api_key is None:
api_key = os.environ.get("LLAMA_STACK_API_KEY")
self.api_key = api_key

if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
Expand Down Expand Up @@ -388,6 +416,14 @@ def __init__(
def qs(self) -> Querystring:
return Querystring(array_format="comma")

@property
@override
def auth_headers(self) -> dict[str, str]:
api_key = self.api_key
if api_key is None:
return {}
return {"Authorization": f"Bearer {api_key}"}

@property
@override
def default_headers(self) -> dict[str, str | Omit]:
Expand All @@ -400,6 +436,7 @@ def default_headers(self) -> dict[str, str | Omit]:
def copy(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -434,6 +471,7 @@ def copy(

http_client = http_client or self._client
return self.__class__(
api_key=api_key or self.api_key,
base_url=base_url or self.base_url,
api_key=api_key or self.api_key,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
Expand Down
2 changes: 1 addition & 1 deletion src/llama_stack_client/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"

# default timeout is 1 minute
DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
DEFAULT_MAX_RETRIES = 2
DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)

Expand Down
22 changes: 22 additions & 0 deletions src/llama_stack_client/resources/agents/turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -69,6 +70,8 @@ def create(
) -> Turn:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -88,6 +91,7 @@ def create(
messages: Iterable[turn_create_params.Message],
stream: Literal[True],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -98,6 +102,8 @@ def create(
) -> Stream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -117,6 +123,7 @@ def create(
messages: Iterable[turn_create_params.Message],
stream: bool,
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -127,6 +134,8 @@ def create(
) -> Turn | Stream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -146,6 +155,7 @@ def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -165,6 +175,7 @@ def create(
"messages": messages,
"documents": documents,
"stream": stream,
"tool_config": tool_config,
"toolgroups": toolgroups,
},
turn_create_params.TurnCreateParams,
Expand Down Expand Up @@ -244,6 +255,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -254,6 +266,8 @@ async def create(
) -> Turn:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -273,6 +287,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
stream: Literal[True],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -283,6 +298,8 @@ async def create(
) -> AsyncStream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -302,6 +319,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
stream: bool,
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -312,6 +330,8 @@ async def create(
) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -331,6 +351,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -350,6 +371,7 @@ async def create(
"messages": messages,
"documents": documents,
"stream": stream,
"tool_config": tool_config,
"toolgroups": toolgroups,
},
turn_create_params.TurnCreateParams,
Expand Down
20 changes: 20 additions & 0 deletions src/llama_stack_client/resources/batch_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ def chat_completion(
) -> BatchInferenceChatCompletionResponse:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
not be followed. It depends on the Instruction Following capabilities of the
model.

tool_prompt_format: Prompt format for calling custom / zero shot tools.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -118,6 +126,8 @@ def completion(
) -> BatchCompletion:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -185,6 +195,14 @@ async def chat_completion(
) -> BatchInferenceChatCompletionResponse:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
not be followed. It depends on the Instruction Following capabilities of the
model.

tool_prompt_format: Prompt format for calling custom / zero shot tools.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -231,6 +249,8 @@ async def completion(
) -> BatchCompletion:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down
Loading