scaleapi · yunfeng-scale · Aug 7, 2023 · Aug 7, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py
@@ -61,7 +61,7 @@ async def acreate(
             temperature (float):
                 What sampling temperature to use, in the range `[0, 1]`. Higher values like 0.8 will make the output
                 more random, while lower values like 0.2 will make it more focused and deterministic.
-                When temperature is 0 greedy sampling is used.
+                When temperature is 0 [greedy search](https://huggingface.co/docs/transformers/generation_strategies#greedy-search) is used.
 
             stop_sequences (Optional[List[str]]):
                 One or more sequences where the API will stop generating tokens for the current completion.
@@ -224,7 +224,7 @@ def create(
             temperature (float):
                 What sampling temperature to use, in the range `[0, 1]`. Higher values like 0.8 will make the output
                 more random, while lower values like 0.2 will make it more focused and deterministic.
-                When temperature is 0 greedy sampling is used.
+                When temperature is 0 [greedy search](https://huggingface.co/docs/transformers/generation_strategies#greedy-search) is used.
 
             stop_sequences (Optional[List[str]]):
                 One or more sequences where the API will stop generating tokens for the current completion.

diff --git a/docs/guides/completions.md b/docs/guides/completions.md
@@ -34,6 +34,7 @@ print(response.output.text)
 - **max_new_tokens:** The maximum number of tokens to generate in the chat completion.
 - **temperature:** The sampling temperature to use. Higher values make the output more random,
   while lower values will make it more focused and deterministic.
+  When temperature is 0 [greedy search](https://huggingface.co/docs/transformers/generation_strategies#greedy-search) is used.
 
 See the full [Completion API reference documentation](../../api/python_client/#llmengine.Completion) to learn more.