From 8b616251454acc0ba0645c95dec8088dfc0ce3ee Mon Sep 17 00:00:00 2001
From: Alex-Welsh <alex@stackhpc.com>
Date: Wed, 29 Jan 2025 16:29:27 +0000
Subject: [PATCH 1/3] Fix llmparams default values, add CI

---
 charts/azimuth-chat/ci/ui-only-values.yaml     |  9 ---------
 charts/azimuth-chat/ci/ui-param-overrides.yaml | 16 ++++++++++++++++
 web-apps/utils/utils.py                        | 12 ++++++------
 3 files changed, 22 insertions(+), 15 deletions(-)
 create mode 100644 charts/azimuth-chat/ci/ui-param-overrides.yaml

diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml
index b66347d..bf30ede 100644
--- a/charts/azimuth-chat/ci/ui-only-values.yaml
+++ b/charts/azimuth-chat/ci/ui-only-values.yaml
@@ -5,12 +5,3 @@ azimuth-llm:
     service:
       zenith:
         enabled: false
-    appSettings:
-      # Verify that we can set non-standard LLM params
-      llm_params:
-        max_tokens: 101
-        temperature: 0.1
-        top_p: 0.15
-        top_k: 1
-        presence_penalty: 0.9
-        frequency_penalty: 1
diff --git a/charts/azimuth-chat/ci/ui-param-overrides.yaml b/charts/azimuth-chat/ci/ui-param-overrides.yaml
new file mode 100644
index 0000000..b66347d
--- /dev/null
+++ b/charts/azimuth-chat/ci/ui-param-overrides.yaml
@@ -0,0 +1,16 @@
+azimuth-llm:
+  api:
+    enabled: false
+  ui:
+    service:
+      zenith:
+        enabled: false
+    appSettings:
+      # Verify that we can set non-standard LLM params
+      llm_params:
+        max_tokens: 101
+        temperature: 0.1
+        top_p: 0.15
+        top_k: 1
+        presence_penalty: 0.9
+        frequency_penalty: 1
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index 28e7a90..60836b9 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -33,12 +33,12 @@ class LLMParams(BaseModel):
     https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters
     """
 
-    max_tokens: PositiveInt | None = None
-    temperature: Annotated[float, Field(ge=0, le=2)] | None = None
-    top_p: Annotated[float, Field(gt=0, le=1)] | None = None
-    top_k: Annotated[int, Field(ge=-1)] | None = None
-    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None
-    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None
+    max_tokens: PositiveInt = 1000
+    temperature: Annotated[float, Field(ge=0, le=2)] = 0
+    top_p: Annotated[float, Field(gt=0, le=1)] = 1
+    top_k: Annotated[int, Field(ge=-1)] = -1
+    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] = 0
+    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] = 0
     # Make sure we can't smuggle in extra request params / typos
     model_config = ConfigDict(extra="forbid")
 

From 570714568ed8fc14b7006b8973cf57ce8dc5b50b Mon Sep 17 00:00:00 2001
From: Alex-Welsh <alex@stackhpc.com>
Date: Thu, 13 Feb 2025 11:22:27 +0000
Subject: [PATCH 2/3] Move LLM param defaults to chat app

---
 web-apps/chat/defaults.yml | 10 +++++-----
 web-apps/utils/utils.py    | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml
index b0260a9..c97296d 100644
--- a/web-apps/chat/defaults.yml
+++ b/web-apps/chat/defaults.yml
@@ -14,12 +14,12 @@ page_title: Large Language Model
 # See https://platform.openai.com/docs/api-reference/chat/create
 # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters
 llm_params:
-  max_tokens:
+  max_tokens: 1000
   temperature: 0
-  top_p:
-  top_k:
-  frequency_penalty:
-  presence_penalty:
+  top_p: 1
+  top_k: -1
+  frequency_penalty: 0
+  presence_penalty: 0
 
 # Gradio theme constructor parameters (e.g. 'primary_hue')
 # See https://www.gradio.app/guides/theming-guide
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index 60836b9..28e7a90 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -33,12 +33,12 @@ class LLMParams(BaseModel):
     https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters
     """
 
-    max_tokens: PositiveInt = 1000
-    temperature: Annotated[float, Field(ge=0, le=2)] = 0
-    top_p: Annotated[float, Field(gt=0, le=1)] = 1
-    top_k: Annotated[int, Field(ge=-1)] = -1
-    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] = 0
-    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] = 0
+    max_tokens: PositiveInt | None = None
+    temperature: Annotated[float, Field(ge=0, le=2)] | None = None
+    top_p: Annotated[float, Field(gt=0, le=1)] | None = None
+    top_k: Annotated[int, Field(ge=-1)] | None = None
+    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None
+    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None
     # Make sure we can't smuggle in extra request params / typos
     model_config = ConfigDict(extra="forbid")
 

From 1b3211e48246afbc6f075b61bd4ab50460938db6 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 13 Feb 2025 16:45:00 +0000
Subject: [PATCH 3/3] Fix in-cluster backend detection

---
 web-apps/chat/app.py    | 1 -
 web-apps/utils/utils.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py
index df369ec..cee141f 100644
--- a/web-apps/chat/app.py
+++ b/web-apps/chat/app.py
@@ -61,7 +61,6 @@ class PossibleSystemPromptException(Exception):
     streaming=True,
 )
 
-
 def inference(latest_message, history):
     # Allow mutating global variable
     global BACKEND_INITIALISED
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index 28e7a90..dc1861d 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -95,7 +95,7 @@ def load_settings() -> dict:
             "Please check for typos"
         )
     settings = {**defaults, **overrides}
-    if "backend_url" not in settings or not settings["backend_url"]:
+    if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]:
         # Try to detect in-cluster address
         in_cluster_backend = api_address_in_cluster()
         if not in_cluster_backend: