From 8b616251454acc0ba0645c95dec8088dfc0ce3ee Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Wed, 29 Jan 2025 16:29:27 +0000 Subject: [PATCH 1/3] Fix llmparams default values, add CI --- charts/azimuth-chat/ci/ui-only-values.yaml | 9 --------- charts/azimuth-chat/ci/ui-param-overrides.yaml | 16 ++++++++++++++++ web-apps/utils/utils.py | 12 ++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) create mode 100644 charts/azimuth-chat/ci/ui-param-overrides.yaml diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml index b66347d..bf30ede 100644 --- a/charts/azimuth-chat/ci/ui-only-values.yaml +++ b/charts/azimuth-chat/ci/ui-only-values.yaml @@ -5,12 +5,3 @@ azimuth-llm: service: zenith: enabled: false - appSettings: - # Verify that we can set non-standard LLM params - llm_params: - max_tokens: 101 - temperature: 0.1 - top_p: 0.15 - top_k: 1 - presence_penalty: 0.9 - frequency_penalty: 1 diff --git a/charts/azimuth-chat/ci/ui-param-overrides.yaml b/charts/azimuth-chat/ci/ui-param-overrides.yaml new file mode 100644 index 0000000..b66347d --- /dev/null +++ b/charts/azimuth-chat/ci/ui-param-overrides.yaml @@ -0,0 +1,16 @@ +azimuth-llm: + api: + enabled: false + ui: + service: + zenith: + enabled: false + appSettings: + # Verify that we can set non-standard LLM params + llm_params: + max_tokens: 101 + temperature: 0.1 + top_p: 0.15 + top_k: 1 + presence_penalty: 0.9 + frequency_penalty: 1 diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index 28e7a90..60836b9 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -33,12 +33,12 @@ class LLMParams(BaseModel): https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters """ - max_tokens: PositiveInt | None = None - temperature: Annotated[float, Field(ge=0, le=2)] | None = None - top_p: Annotated[float, Field(gt=0, le=1)] | None = None - top_k: Annotated[int, Field(ge=-1)] | None = None - frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None - presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None + max_tokens: PositiveInt = 1000 + temperature: Annotated[float, Field(ge=0, le=2)] = 0 + top_p: Annotated[float, Field(gt=0, le=1)] = 1 + top_k: Annotated[int, Field(ge=-1)] = -1 + frequency_penalty: Annotated[float, Field(ge=-2, le=2)] = 0 + presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] = 0 # Make sure we can't smuggle in extra request params / typos model_config = ConfigDict(extra="forbid") From 570714568ed8fc14b7006b8973cf57ce8dc5b50b Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Thu, 13 Feb 2025 11:22:27 +0000 Subject: [PATCH 2/3] Move LLM param defaults to chat app --- web-apps/chat/defaults.yml | 10 +++++----- web-apps/utils/utils.py | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml index b0260a9..c97296d 100644 --- a/web-apps/chat/defaults.yml +++ b/web-apps/chat/defaults.yml @@ -14,12 +14,12 @@ page_title: Large Language Model # See https://platform.openai.com/docs/api-reference/chat/create # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters llm_params: - max_tokens: + max_tokens: 1000 temperature: 0 - top_p: - top_k: - frequency_penalty: - presence_penalty: + top_p: 1 + top_k: -1 + frequency_penalty: 0 + presence_penalty: 0 # Gradio theme constructor parameters (e.g. 'primary_hue') # See https://www.gradio.app/guides/theming-guide diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index 60836b9..28e7a90 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -33,12 +33,12 @@ class LLMParams(BaseModel): https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters """ - max_tokens: PositiveInt = 1000 - temperature: Annotated[float, Field(ge=0, le=2)] = 0 - top_p: Annotated[float, Field(gt=0, le=1)] = 1 - top_k: Annotated[int, Field(ge=-1)] = -1 - frequency_penalty: Annotated[float, Field(ge=-2, le=2)] = 0 - presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] = 0 + max_tokens: PositiveInt | None = None + temperature: Annotated[float, Field(ge=0, le=2)] | None = None + top_p: Annotated[float, Field(gt=0, le=1)] | None = None + top_k: Annotated[int, Field(ge=-1)] | None = None + frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None + presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None # Make sure we can't smuggle in extra request params / typos model_config = ConfigDict(extra="forbid") From 1b3211e48246afbc6f075b61bd4ab50460938db6 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 13 Feb 2025 16:45:00 +0000 Subject: [PATCH 3/3] Fix in-cluster backend detection --- web-apps/chat/app.py | 1 - web-apps/utils/utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py index df369ec..cee141f 100644 --- a/web-apps/chat/app.py +++ b/web-apps/chat/app.py @@ -61,7 +61,6 @@ class PossibleSystemPromptException(Exception): streaming=True, ) - def inference(latest_message, history): # Allow mutating global variable global BACKEND_INITIALISED diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index 28e7a90..dc1861d 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -95,7 +95,7 @@ def load_settings() -> dict: "Please check for typos" ) settings = {**defaults, **overrides} - if "backend_url" not in settings or not settings["backend_url"]: + if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]: # Try to detect in-cluster address in_cluster_backend = api_address_in_cluster() if not in_cluster_backend: