llamastack · stainless-app · Nov 13, 2025 · Nov 11, 2025 · Nov 10, 2025 · Nov 12, 2025
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.5"
+  ".": "0.4.0-alpha.6"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 98
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-8ca5cbc3919d101274c4f94c6046257b410584281f4e05ee4dc0ebacd7adb355.yml
-openapi_spec_hash: ee5e6406a8e0bfb84f810c2123e86ea5
-config_hash: 99e1ad2a9fd4559a679d06f9115787a4
+configured_endpoints: 89
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-af20fa1866f461e9fef4f7fd226d757b0dddee907e2a083fa582ac0580735e20.yml
+openapi_spec_hash: 68caf264f8ade02c34456c526d7300b1
+config_hash: e8a35d9d37cb4774b4b0fe1b167dc156
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,27 @@
 # Changelog
 
+## 0.4.0-alpha.6 (2025-11-12)
+
+Full Changelog: [v0.4.0-alpha.5...v0.4.0-alpha.6](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.5...v0.4.0-alpha.6)
+
+### Features
+
+* add new API filter for all non-deprecated APIs ([59c62f0](https://github.com/llamastack/llama-stack-client-python/commit/59c62f0b8a74b880d5841c073aa01e3d57205cdd))
+* Adding option to return embeddings and metadata from `/vector_stores/*/files/*/content` and UI updates ([696ed27](https://github.com/llamastack/llama-stack-client-python/commit/696ed279d7abb9386dff490693cce8cd3ec8dbe6))
+* Implement the 'max_tool_calls' parameter for the Responses API ([9f57efe](https://github.com/llamastack/llama-stack-client-python/commit/9f57efef4fcbc7fdd4e59d3f3be40192915464d9))
+
+
+### Bug Fixes
+
+* compat with Python 3.14 ([b971369](https://github.com/llamastack/llama-stack-client-python/commit/b971369a0ca187ff7641fe27365ce9abe27bb994))
+* **compat:** update signatures of `model_dump` and `model_dump_json` for Pydantic v1 ([f17bdac](https://github.com/llamastack/llama-stack-client-python/commit/f17bdacb4022ae91eb8463a04592f6f0920bb719))
+* **docs:** correct inconsistent python version requirement ([c4b7aaa](https://github.com/llamastack/llama-stack-client-python/commit/c4b7aaa86cb14ffef0604d7aa85a94a633f820af))
+
+
+### Chores
+
+* **package:** drop Python 3.8 support ([f6b66d4](https://github.com/llamastack/llama-stack-client-python/commit/f6b66d4a5151f667e296397289eb25197aaca517))
+
 ## 0.4.0-alpha.5 (2025-11-04)
 
 Full Changelog: [v0.4.0-alpha.4...v0.4.0-alpha.5](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.4...v0.4.0-alpha.5)

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack-client)](https://pypi.org/project/llama-stack-client/)
 [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack)
 
-The Llama Stack Client Python library provides convenient access to the Llama Stack Client REST API from any Python 3.7+
+The Llama Stack Client Python library provides convenient access to the Llama Stack Client REST API from any Python 3.12+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
@@ -33,10 +33,15 @@ from llama_stack_client import LlamaStackClient
 
 client = LlamaStackClient()
 
-response = client.models.register(
-    model_id="model_id",
+completion = client.chat.completions.create(
+    messages=[
+        {
+            "content": "string",
+            "role": "user",
+        }
+    ],
+    model="model",
 )
-print(response.identifier)
 ```
 
 While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `LLAMA_STACK_CLIENT_API_KEY="My API Key"` to your `.env` file so that your API Key is not stored in source control.
@@ -48,33 +53,33 @@ llama-stack-client inference chat-completion --message "hello, what model are yo
 
 ```python
 OpenAIChatCompletion(
-    id='AmivnS0iMv-mmEE4_A0DK1T',
+    id="AmivnS0iMv-mmEE4_A0DK1T",
     choices=[
         OpenAIChatCompletionChoice(
-            finish_reason='stop',
+            finish_reason="stop",
             index=0,
             message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
+                role="assistant",
                 content="Hello! I am an AI designed by Meta AI, and my model is a type of recurrent neural network (RNN) called a transformer. My specific architecture is based on the BERT (Bidirectional Encoder Representations from Transformers) model, which is a pre-trained language model that has been fine-tuned for a variety of natural language processing tasks.\n\nHere are some key details about my model:\n\n* **Model type:** Transformer-based language model\n* **Architecture:** BERT (Bidirectional Encoder Representations from Transformers)\n* **Training data:** A massive corpus of text data, including but not limited to:\n\t+ Web pages\n\t+ Books\n\t+ Articles\n\t+ Forums\n\t+ Social media platforms\n* **Parameters:** My model has approximately 1.5 billion parameters, which allows me to understand and generate human-like language.\n* **Capabilities:** I can perform a wide range of tasks, including but not limited to:\n\t+ Answering questions\n\t+ Generating text\n\t+ Translating languages\n\t+ Summarizing content\n\t+ Offering suggestions and ideas\n\nI'm constantly learning and improving, so please bear with me if I make any mistakes or don't quite understand what you're asking. How can I assist you today?",
                 name=None,
                 tool_calls=None,
-                function_call=None
+                function_call=None,
             ),
-            logprobs=OpenAIChatCompletionChoiceLogprobs(content=None, refusal=None)
+            logprobs=OpenAIChatCompletionChoiceLogprobs(content=None, refusal=None),
         )
     ],
     created=1749825661,
-    model='Llama-3.3-70B-Instruct',
-    object='chat.completion',
+    model="Llama-3.3-70B-Instruct",
+    object="chat.completion",
     system_fingerprint=None,
     usage={
-        'completion_tokens': 258,
-        'prompt_tokens': 16,
-        'total_tokens': 274,
-        'completion_tokens_details': None,
-        'prompt_tokens_details': None
+        "completion_tokens": 258,
+        "prompt_tokens": 16,
+        "total_tokens": 274,
+        "completion_tokens_details": None,
+        "prompt_tokens_details": None,
     },
-    service_tier=None
+    service_tier=None,
 )
 ```
 
@@ -93,10 +98,15 @@ client = AsyncLlamaStackClient(
 
 
 async def main() -> None:
-    response = await client.models.register(
-        model_id="model_id",
+    completion = await client.chat.completions.create(
+        messages=[
+            {
+                "content": "string",
+                "role": "user",
+            }
+        ],
+        model="model",
     )
-    print(response.identifier)
 
 
 asyncio.run(main())
@@ -127,10 +137,15 @@ async def main() -> None:
     async with AsyncLlamaStackClient(
         http_client=DefaultAioHttpClient(),
     ) as client:
-        response = await client.models.register(
-            model_id="model_id",
+        completion = await client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="model",
         )
-        print(response.identifier)
 
 
 asyncio.run(main())
@@ -198,11 +213,10 @@ from llama_stack_client import LlamaStackClient
 
 client = LlamaStackClient()
 
-client.toolgroups.register(
-    provider_id="provider_id",
-    toolgroup_id="toolgroup_id",
+tool_defs = client.tool_runtime.list_tools(
     mcp_endpoint={"uri": "uri"},
 )
+print(tool_defs.mcp_endpoint)
 ```
 
 ## File uploads

diff --git a/api.md b/api.md
@@ -2,12 +2,8 @@
 
 ```python
 from llama_stack_client.types import (
-    Document,
     InterleavedContent,
     InterleavedContentItem,
-    ParamType,
-    QueryConfig,
-    QueryResult,
     SafetyViolation,
     SamplingParams,
     ScoringResult,
@@ -27,8 +23,6 @@ Methods:
 
 - <code title="get /v1/toolgroups">client.toolgroups.<a href="./src/llama_stack_client/resources/toolgroups.py">list</a>() -> <a href="./src/llama_stack_client/types/toolgroup_list_response.py">ToolgroupListResponse</a></code>
 - <code title="get /v1/toolgroups/{toolgroup_id}">client.toolgroups.<a href="./src/llama_stack_client/resources/toolgroups.py">get</a>(toolgroup_id) -> <a href="./src/llama_stack_client/types/tool_group.py">ToolGroup</a></code>
-- <code title="post /v1/toolgroups">client.toolgroups.<a href="./src/llama_stack_client/resources/toolgroups.py">register</a>(\*\*<a href="src/llama_stack_client/types/toolgroup_register_params.py">params</a>) -> None</code>
-- <code title="delete /v1/toolgroups/{toolgroup_id}">client.toolgroups.<a href="./src/llama_stack_client/resources/toolgroups.py">unregister</a>(toolgroup_id) -> None</code>
 
 # Tools
 
@@ -56,13 +50,6 @@ Methods:
 - <code title="post /v1/tool-runtime/invoke">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime/tool_runtime.py">invoke_tool</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_invoke_tool_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_invocation_result.py">ToolInvocationResult</a></code>
 - <code title="get /v1/tool-runtime/list-tools">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime/tool_runtime.py">list_tools</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_list_tools_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_runtime_list_tools_response.py">ToolRuntimeListToolsResponse</a></code>
 
-## RagTool
-
-Methods:
-
-- <code title="post /v1/tool-runtime/rag-tool/insert">client.tool_runtime.rag_tool.<a href="./src/llama_stack_client/resources/tool_runtime/rag_tool.py">insert</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py">params</a>) -> None</code>
-- <code title="post /v1/tool-runtime/rag-tool/query">client.tool_runtime.rag_tool.<a href="./src/llama_stack_client/resources/tool_runtime/rag_tool.py">query</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/query_result.py">QueryResult</a></code>
-
 # Responses
 
 Types:
@@ -268,7 +255,7 @@ Methods:
 - <code title="post /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_update_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
 - <code title="get /v1/vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
 - <code title="delete /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_delete_response.py">FileDeleteResponse</a></code>
-- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_content_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
 
 ## FileBatches
 
@@ -298,16 +285,13 @@ from llama_stack_client.types import (
     Model,
     ModelRetrieveResponse,
     ModelListResponse,
-    ModelRegisterResponse,
 )
 ```
 
 Methods:
 
 - <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model_retrieve_response.py">ModelRetrieveResponse</a></code>
 - <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
-- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model_register_response.py">ModelRegisterResponse</a></code>
-- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
 
 ## OpenAI
 
@@ -376,8 +360,6 @@ Methods:
 
 - <code title="get /v1/shields/{identifier}">client.shields.<a href="./src/llama_stack_client/resources/shields.py">retrieve</a>(identifier) -> <a href="./src/llama_stack_client/types/shield.py">Shield</a></code>
 - <code title="get /v1/shields">client.shields.<a href="./src/llama_stack_client/resources/shields.py">list</a>() -> <a href="./src/llama_stack_client/types/shield_list_response.py">ShieldListResponse</a></code>
-- <code title="delete /v1/shields/{identifier}">client.shields.<a href="./src/llama_stack_client/resources/shields.py">delete</a>(identifier) -> None</code>
-- <code title="post /v1/shields">client.shields.<a href="./src/llama_stack_client/resources/shields.py">register</a>(\*\*<a href="src/llama_stack_client/types/shield_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shield.py">Shield</a></code>
 
 # Scoring
 
@@ -409,7 +391,6 @@ Methods:
 
 - <code title="get /v1/scoring-functions/{scoring_fn_id}">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">retrieve</a>(scoring_fn_id) -> <a href="./src/llama_stack_client/types/scoring_fn.py">ScoringFn</a></code>
 - <code title="get /v1/scoring-functions">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">list</a>() -> <a href="./src/llama_stack_client/types/scoring_function_list_response.py">ScoringFunctionListResponse</a></code>
-- <code title="post /v1/scoring-functions">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">register</a>(\*\*<a href="src/llama_stack_client/types/scoring_function_register_params.py">params</a>) -> None</code>
 
 # Files
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.5"
+version = "0.4.0-alpha.6"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"