llamastack
diff --git a/‎client-sdks/stainless/openapi.yml‎
Lines changed: 0 additions & 1 deletion b/‎client-sdks/stainless/openapi.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/deprecated-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/deprecated-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/deprecated-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/deprecated-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/experimental-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/experimental-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/experimental-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/experimental-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/stainless-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/stainless-llama-stack-spec.html‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/static/stainless-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion b/‎docs/static/stainless-llama-stack-spec.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎llama_stack/apis/inference/inference.py‎
Lines changed: 1 addition & 1 deletion b/‎llama_stack/apis/inference/inference.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/integration/agents/recordings/007a9180a7aa38e17c1135ebf1f75e0d5ce1ea58e2261deba8c41e51196078ec.json‎
Lines changed: 59 additions & 0 deletions b/‎tests/integration/agents/recordings/007a9180a7aa38e17c1135ebf1f75e0d5ce1ea58e2261deba8c41e51196078ec.json‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎tests/integration/agents/recordings/0275b5b0278c3188f5530957d25d7eb8ab8a9a14c0b9b31d9a70ad342b02353d.json‎
Lines changed: 59 additions & 0 deletions b/‎tests/integration/agents/recordings/0275b5b0278c3188f5530957d25d7eb8ab8a9a14c0b9b31d9a70ad342b02353d.json‎
Lines changed: 59 additions & 0 deletions
@@ -11600,7 +11600,6 @@ components:
           description: The sampling strategy.
         max_tokens:
           type: integer
-          default: 0
           description: >-
             The maximum number of tokens that can be generated in the completion.
             The token count of your prompt plus max_tokens cannot exceed the model's
 
@@ -3901,7 +3901,6 @@
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "default": 0,
                         "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                     },
                     "repetition_penalty": {
 
@@ -2862,7 +2862,6 @@ components:
           description: The sampling strategy.
         max_tokens:
           type: integer
-          default: 0
           description: >-
             The maximum number of tokens that can be generated in the completion.
             The token count of your prompt plus max_tokens cannot exceed the model's
 
@@ -2376,7 +2376,6 @@
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "default": 0,
                         "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                     },
                     "repetition_penalty": {
 
@@ -1695,7 +1695,6 @@ components:
           description: The sampling strategy.
         max_tokens:
           type: integer
-          default: 0
           description: >-
             The maximum number of tokens that can be generated in the completion.
             The token count of your prompt plus max_tokens cannot exceed the model's
 
@@ -15452,7 +15452,6 @@
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "default": 0,
                         "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                     },
                     "repetition_penalty": {
 
@@ -11600,7 +11600,6 @@ components:
           description: The sampling strategy.
         max_tokens:
           type: integer
-          default: 0
           description: >-
             The maximum number of tokens that can be generated in the completion.
             The token count of your prompt plus max_tokens cannot exceed the model's
 
@@ -97,7 +97,7 @@ class SamplingParams(BaseModel):
 
     strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
 
-    max_tokens: int | None = 0
+    max_tokens: int | None = None
     repetition_penalty: float | None = 1.0
     stop: list[str] | None = None