From 2ee635456e8354f0dd7fc1a2c885f2fc42707d31 Mon Sep 17 00:00:00 2001
From: Alex-Welsh <alex@stackhpc.com>
Date: Fri, 14 Feb 2025 11:13:05 +0000
Subject: [PATCH 01/20] Add CPU-backed vLLM test case

---
 charts/azimuth-chat/ci/vllm-cpu-values.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 charts/azimuth-chat/ci/vllm-cpu-values.yaml

diff --git a/charts/azimuth-chat/ci/vllm-cpu-values.yaml b/charts/azimuth-chat/ci/vllm-cpu-values.yaml
new file mode 100644
index 0000000..071b55a
--- /dev/null
+++ b/charts/azimuth-chat/ci/vllm-cpu-values.yaml
@@ -0,0 +1,14 @@
+azimuth-llm:
+  ui:
+    service:
+      zenith:
+        enabled: false
+  api:
+    image:
+      repository: ghcr.io/stackhpc/vllm-cpu
+      version: v0.7.2
+    monitoring:
+      enabled: false
+    gpus: 0
+  huggingface:
+    model: HuggingFaceTB/SmolLM2-135M-Instruct

From 4ad5c11ec069cf4025fdbddf1cbebbdbf8570c4c Mon Sep 17 00:00:00 2001
From: Alex-Welsh <alex@stackhpc.com>
Date: Fri, 14 Feb 2025 14:09:54 +0000
Subject: [PATCH 02/20] wip: test in CI

---
 charts/azimuth-chat/ci/ui-only-values.yaml | 16 ----------------
 charts/azimuth-llm/values.yaml             |  3 +--
 web-apps/chat/defaults.yml                 |  6 ------
 3 files changed, 1 insertion(+), 24 deletions(-)
 delete mode 100644 charts/azimuth-chat/ci/ui-only-values.yaml

diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml
deleted file mode 100644
index b66347d..0000000
--- a/charts/azimuth-chat/ci/ui-only-values.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-azimuth-llm:
-  api:
-    enabled: false
-  ui:
-    service:
-      zenith:
-        enabled: false
-    appSettings:
-      # Verify that we can set non-standard LLM params
-      llm_params:
-        max_tokens: 101
-        temperature: 0.1
-        top_p: 0.15
-        top_k: 1
-        presence_penalty: 0.9
-        frequency_penalty: 1
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index 29879d4..cd483c1 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -56,8 +56,7 @@ api:
     hostPath:
       path: /tmp/llm/huggingface-cache
   # Number of gpus to requests for each api pod instance
-  # NOTE: This must be in the range 1 <= value <= N, where
-  # 'N' is the number of GPUs available in a single
+  # NOTE: This must be less than the number of GPUs available in a single
   # worker node on the target Kubernetes cluster.
   # NOTE: According to the vLLM docs found here
   # https://docs.vllm.ai/en/latest/serving/distributed_serving.html
diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml
index b0260a9..b9ce61f 100644
--- a/web-apps/chat/defaults.yml
+++ b/web-apps/chat/defaults.yml
@@ -1,9 +1,3 @@
-
-# Default target is a local ollama instance
-# running inside the same docker network
-model_name: smollm2:135m
-backend_url: http://ollama:11434
-
 host_address: 0.0.0.0
 
 model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately."

From 1a309ee4eb25294d6db9e732e8ede9788be267da Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:12:14 +0000
Subject: [PATCH 03/20] Pick defauly vLLM image based on GPU count

---
 charts/azimuth-llm/templates/api/deployment.yml | 7 ++++---
 charts/azimuth-llm/values.yaml                  | 4 +++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml
index dc41a5f..e3b37fe 100644
--- a/charts/azimuth-llm/templates/api/deployment.yml
+++ b/charts/azimuth-llm/templates/api/deployment.yml
@@ -19,7 +19,8 @@ spec:
     spec:
       containers:
       - name: {{ .Release.Name }}-api
-        image: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.version }}
+        {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm-project/vllm" (eq (.Values.api.gpus | int) 0)) -}}
+        image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
         ports:
         - name: api
           containerPort: 8000
@@ -29,7 +30,7 @@ spec:
         args:
           - --model
           - {{ .Values.huggingface.model }}
-          {{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
+          {{- include "azimuth-llm.chatTemplate" . | nindent 10 -}}
           {{- if .Values.api.modelMaxContextLength -}}
           - --max-model-len
           - {{ .Values.api.modelMaxContextLength | quote }}
@@ -41,7 +42,7 @@ spec:
           {{- if .Values.api.extraArgs -}}
           {{- .Values.api.extraArgs | toYaml | nindent 10 }}
           {{- end -}}
-        {{- if .Values.huggingface.secretName }}
+        {{- if .Values.huggingface.secretName -}}
         envFrom:
         - secretRef:
             name: {{ .Values.huggingface.secretName }}
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index cd483c1..09d5eec 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -33,7 +33,9 @@ api:
   enabled: true
   # Container image config
   image:
-    repository: vllm/vllm-openai
+    # Defaults to vllm/vllm-openai when api.gpus > 0
+    # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
+    repository:
     version: v0.6.3
   monitoring:
     enabled: true

From 6e1fd111e1199fd94cb3aed81366a9dc29a7d101 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:20:56 +0000
Subject: [PATCH 04/20] Simplify CI test values for each chart

---
 charts/azimuth-chat/ci/test-values.yaml       | 23 +++++++++++++++++++
 charts/azimuth-chat/ci/vllm-cpu-values.yaml   | 14 -----------
 .../{ui-only-values.yaml => test-values.yaml} |  9 +++++++-
 charts/azimuth-llm/ci/default-values.yaml     | 15 ++++++++++++
 charts/azimuth-llm/ci/no-api-values.yaml      |  6 -----
 5 files changed, 46 insertions(+), 21 deletions(-)
 create mode 100644 charts/azimuth-chat/ci/test-values.yaml
 delete mode 100644 charts/azimuth-chat/ci/vllm-cpu-values.yaml
 rename charts/azimuth-image-analysis/ci/{ui-only-values.yaml => test-values.yaml} (54%)
 create mode 100644 charts/azimuth-llm/ci/default-values.yaml
 delete mode 100644 charts/azimuth-llm/ci/no-api-values.yaml

diff --git a/charts/azimuth-chat/ci/test-values.yaml b/charts/azimuth-chat/ci/test-values.yaml
new file mode 100644
index 0000000..56b1dfb
--- /dev/null
+++ b/charts/azimuth-chat/ci/test-values.yaml
@@ -0,0 +1,23 @@
+azimuth-llm:
+  huggingface:
+    # Use the smallest LLM we can find
+    model: HuggingFaceTB/SmolLM2-135M-Instruct
+  api:
+    # CI Kind cluster doesn't have kube-prometheus-stack
+    monitoring:
+      enabled: false
+    # No GPUs in CI runners
+    gpus: 0
+  ui:
+    service:
+      zenith:
+        enabled: false
+    appSettings:
+      # Verify that we can set non-standard LLM params
+      llm_params:
+        max_tokens: 101
+        temperature: 0.1
+        top_k: 2
+        top_p: 0.15
+        presence_penalty: 0.9
+        frequency_penalty: 1
diff --git a/charts/azimuth-chat/ci/vllm-cpu-values.yaml b/charts/azimuth-chat/ci/vllm-cpu-values.yaml
deleted file mode 100644
index 071b55a..0000000
--- a/charts/azimuth-chat/ci/vllm-cpu-values.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-azimuth-llm:
-  ui:
-    service:
-      zenith:
-        enabled: false
-  api:
-    image:
-      repository: ghcr.io/stackhpc/vllm-cpu
-      version: v0.7.2
-    monitoring:
-      enabled: false
-    gpus: 0
-  huggingface:
-    model: HuggingFaceTB/SmolLM2-135M-Instruct
diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml
similarity index 54%
rename from charts/azimuth-image-analysis/ci/ui-only-values.yaml
rename to charts/azimuth-image-analysis/ci/test-values.yaml
index 96f716d..72b7a5e 100644
--- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml
+++ b/charts/azimuth-image-analysis/ci/test-values.yaml
@@ -1,6 +1,13 @@
 azimuth-llm:
+  huggingface:
+    # Use the smallest vision model we can find
+    model: HuggingFaceTB/SmolVLM-256M-Instruct
   api:
-    enabled: false
+    # CI Kind cluster doesn't have kube-prometheus-stack
+    monitoring:
+      enabled: false
+    # No GPUs in CI runners
+    gpus: 0
   ui:
     service:
       zenith:
diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml
new file mode 100644
index 0000000..d116377
--- /dev/null
+++ b/charts/azimuth-llm/ci/default-values.yaml
@@ -0,0 +1,15 @@
+# This is intended to test the default chart values
+# as close as possible given the lack of GPU
+huggingface:
+  # Use the smallest LLM we can find
+  model: HuggingFaceTB/SmolLM2-135M-Instruct
+api:
+  # CI Kind cluster doesn't have kube-prometheus-stack
+  monitoring:
+    enabled: false
+  # No GPUs in CI runners
+  gpus: 0
+ui:
+  service:
+    zenith:
+      enabled: false
diff --git a/charts/azimuth-llm/ci/no-api-values.yaml b/charts/azimuth-llm/ci/no-api-values.yaml
deleted file mode 100644
index e455af3..0000000
--- a/charts/azimuth-llm/ci/no-api-values.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-api:
-  enabled: false
-ui:
-  service:
-    zenith:
-      enabled: false

From 781660e1a7a3af43a45eaa09dee3e61aa663ae27 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:24:33 +0000
Subject: [PATCH 05/20] Skip Gradio tests for now

---
 .github/workflows/test-pr.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml
index 2b16474..42c9b0e 100644
--- a/.github/workflows/test-pr.yml
+++ b/.github/workflows/test-pr.yml
@@ -44,10 +44,10 @@ jobs:
         with:
           ref: ${{ github.event.pull_request.head.sha }}
 
-      - name: Run Gradio app tests
-        run: |
-          ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }})
-        working-directory: web-apps
+      # - name: Run Gradio app tests
+      #   run: |
+      #     ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }})
+      #   working-directory: web-apps
 
       - name: Install Helm
         uses: azure/setup-helm@v4
@@ -66,7 +66,7 @@ jobs:
         with:
           cluster_name: ${{ env.CLUSTER_NAME }}
 
-      # NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
+      # NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
       # only gets overwritten to the correct commit SHA during Helm chart build,
       # we need to pull these published images and load them into the kind cluster
       # with the tag correct tag.

From 09522e8e0a50862c828635a7910b0770f2477a28 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:28:39 +0000
Subject: [PATCH 06/20] Update comment

---
 charts/azimuth-llm/ci/default-values.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml
index d116377..c610aab 100644
--- a/charts/azimuth-llm/ci/default-values.yaml
+++ b/charts/azimuth-llm/ci/default-values.yaml
@@ -1,5 +1,6 @@
 # This is intended to test the default chart values
-# as close as possible given the lack of GPU
+# as close as possible given the constraints of running
+# inside a Kind cluster within a CI runner
 huggingface:
   # Use the smallest LLM we can find
   model: HuggingFaceTB/SmolLM2-135M-Instruct

From 44d1e6bdb78bb476b5ed90f2afabb920c549eb7f Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:41:34 +0000
Subject: [PATCH 07/20] Fix failing Gradio unit test

---
 .github/workflows/test-pr.yml | 8 ++++----
 web-apps/chat/defaults.yml    | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml
index 42c9b0e..dec9f48 100644
--- a/.github/workflows/test-pr.yml
+++ b/.github/workflows/test-pr.yml
@@ -44,10 +44,10 @@ jobs:
         with:
           ref: ${{ github.event.pull_request.head.sha }}
 
-      # - name: Run Gradio app tests
-      #   run: |
-      #     ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }})
-      #   working-directory: web-apps
+      - name: Run Gradio app tests
+        run: |
+          ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }})
+        working-directory: web-apps
 
       - name: Install Helm
         uses: azure/setup-helm@v4
diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml
index b9ce61f..80b4549 100644
--- a/web-apps/chat/defaults.yml
+++ b/web-apps/chat/defaults.yml
@@ -4,6 +4,9 @@ model_instruction: "You are a helpful and cheerful AI assistant. Please respond
 
 page_title: Large Language Model
 
+backend_url: http://ollama:11434
+model_name: smollm2:135m
+
 # LLM request parameters
 # See https://platform.openai.com/docs/api-reference/chat/create
 # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters
@@ -24,6 +27,6 @@ theme_params: {}
 theme_params_extended: {}
 
 # Additional CSS and JS overrides
-# See https://www.gradio.app/guides/custom-CSS-and-JS
+# See https://www.gradio.app/guides/custom-CSS-and-JS
 css_overrides:
 custom_javascript:

From 677df04cffb79cdb437b0a1bc53f8fbdc28c8206 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 17:48:55 +0000
Subject: [PATCH 08/20] Make linter happy

---
 charts/azimuth-llm/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index 7496aa3..18ac6b6 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -84,7 +84,7 @@ ui:
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-chat-ui
-    tag: # Defaults to chart's appVersion
+    tag:  # Defaults to chart's appVersion
     imagePullPolicy:
   # The settings to be passed to the frontend web app.
   # Format depends on the chosen UI image above. For each of the UIs

From e9e427d082ea8d28095e086c91705409b7761bd4 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 19:06:07 +0000
Subject: [PATCH 09/20] Bump test job time limit

---
 charts/azimuth-llm/templates/test/end-to-end.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index e94d4d2..5a31f5a 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -26,5 +26,5 @@ spec:
       restartPolicy: Never
   # Allow plenty of retries since downloading
   # model weights can take a long time.
-  backoffLimit: 10
+  backoffLimit: 20
 {{- end -}}

From 873558dfd19e75c88897357d27bf790c1021129f Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 19:30:26 +0000
Subject: [PATCH 10/20] Wait for model readiness in test script

---
 .../azimuth-llm/templates/test/end-to-end.yml |  5 ++---
 web-apps/chat/gradio-client-test.py           | 20 ++++++++++++++++---
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index 5a31f5a..de97493 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -24,7 +24,6 @@ spec:
         - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc
         {{- end }}
       restartPolicy: Never
-  # Allow plenty of retries since downloading
-  # model weights can take a long time.
-  backoffLimit: 20
+  # Handle retries within gradio-test-client script
+  backoffLimit: 1
 {{- end -}}
diff --git a/web-apps/chat/gradio-client-test.py b/web-apps/chat/gradio-client-test.py
index ddf245c..723852d 100644
--- a/web-apps/chat/gradio-client-test.py
+++ b/web-apps/chat/gradio-client-test.py
@@ -1,7 +1,21 @@
 import sys
+import time
+
 from gradio_client import Client
 
 gradio_host = sys.argv[1]
-client = Client(gradio_host)
-result = client.predict("Hi", api_name="/chat")
-print(result)
+
+retries = 60
+for n in range(1, retries+1):
+    try:
+        client = Client(gradio_host)
+        result = client.predict("Hi", api_name="/chat")
+        print(result)
+        break
+    except Exception as err:
+        msg = f"Attempt {n} / {retries} encounter error: {err}"
+        if n < retries:
+            print(msg, "- waiting 10 seconds before retrying")
+            time.sleep(10)
+        else:
+            print(msg, "- no more retries left")

From dfba4252dc851aa56e57c4a64c853187f9473e8b Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 19:32:28 +0000
Subject: [PATCH 11/20] Add missing CI test values

---
 charts/azimuth-chat/ci/test-values.yaml           | 3 ++-
 charts/azimuth-image-analysis/ci/test-values.yaml | 3 ++-
 charts/azimuth-llm/ci/default-values.yaml         | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/charts/azimuth-chat/ci/test-values.yaml b/charts/azimuth-chat/ci/test-values.yaml
index 56b1dfb..98e3530 100644
--- a/charts/azimuth-chat/ci/test-values.yaml
+++ b/charts/azimuth-chat/ci/test-values.yaml
@@ -1,7 +1,7 @@
 azimuth-llm:
   huggingface:
     # Use the smallest LLM we can find
-    model: HuggingFaceTB/SmolLM2-135M-Instruct
+    model: &model HuggingFaceTB/SmolLM2-135M-Instruct
   api:
     # CI Kind cluster doesn't have kube-prometheus-stack
     monitoring:
@@ -13,6 +13,7 @@ azimuth-llm:
       zenith:
         enabled: false
     appSettings:
+      model_name: *model
       # Verify that we can set non-standard LLM params
       llm_params:
         max_tokens: 101
diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml
index 72b7a5e..09ad8ba 100644
--- a/charts/azimuth-image-analysis/ci/test-values.yaml
+++ b/charts/azimuth-image-analysis/ci/test-values.yaml
@@ -1,7 +1,7 @@
 azimuth-llm:
   huggingface:
     # Use the smallest vision model we can find
-    model: HuggingFaceTB/SmolVLM-256M-Instruct
+    model: &model HuggingFaceTB/SmolVLM-256M-Instruct
   api:
     # CI Kind cluster doesn't have kube-prometheus-stack
     monitoring:
@@ -13,6 +13,7 @@ azimuth-llm:
       zenith:
         enabled: false
     appSettings:
+      model_name: *model
       # Verify that we can set non-standard LLM params
       llm_params:
         max_tokens: 101
diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml
index c610aab..40dbbde 100644
--- a/charts/azimuth-llm/ci/default-values.yaml
+++ b/charts/azimuth-llm/ci/default-values.yaml
@@ -3,7 +3,7 @@
 # inside a Kind cluster within a CI runner
 huggingface:
   # Use the smallest LLM we can find
-  model: HuggingFaceTB/SmolLM2-135M-Instruct
+  model: &model HuggingFaceTB/SmolLM2-135M-Instruct
 api:
   # CI Kind cluster doesn't have kube-prometheus-stack
   monitoring:
@@ -14,3 +14,5 @@ ui:
   service:
     zenith:
       enabled: false
+  appSettings:
+    model_name: *model

From cdcea4ff7a348352869a716242cc1ff49cb5c357 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 20:02:58 +0000
Subject: [PATCH 12/20] Make python output visible in test job logs

---
 charts/azimuth-llm/templates/test/end-to-end.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index de97493..881e369 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -23,6 +23,10 @@ spec:
         {{- else }}
         - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc
         {{- end }}
+        env:
+        - name: PYTHONUNBUFFERED
+          value: "1"
+        tty: true # Make stdout from python visible in k8s logs
       restartPolicy: Never
   # Handle retries within gradio-test-client script
   backoffLimit: 1

From 2f15872d33a8c57bfe6a0e976f173823599f6db2 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 13 Feb 2025 16:45:00 +0000
Subject: [PATCH 13/20] Fix in-cluster backend detection

---
 web-apps/chat/app.py    | 1 -
 web-apps/utils/utils.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py
index df369ec..cee141f 100644
--- a/web-apps/chat/app.py
+++ b/web-apps/chat/app.py
@@ -61,7 +61,6 @@ class PossibleSystemPromptException(Exception):
     streaming=True,
 )
 
-
 def inference(latest_message, history):
     # Allow mutating global variable
     global BACKEND_INITIALISED
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index 28e7a90..dc1861d 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -95,7 +95,7 @@ def load_settings() -> dict:
             "Please check for typos"
         )
     settings = {**defaults, **overrides}
-    if "backend_url" not in settings or not settings["backend_url"]:
+    if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]:
         # Try to detect in-cluster address
         in_cluster_backend = api_address_in_cluster()
         if not in_cluster_backend:

From 88f957a461a5671ef95cc9bd32566f662d3d82ba Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 20:59:45 +0000
Subject: [PATCH 14/20] Set in-cluster service addresses automatically

---
 charts/azimuth-llm/templates/api/ingress.yml  |  2 +-
 charts/azimuth-llm/templates/api/service.yml  |  2 +-
 .../templates/api/zenith-client.yml           |  2 +-
 .../azimuth-llm/templates/test/end-to-end.yml |  2 +-
 charts/azimuth-llm/templates/test/web-app.yml |  2 +-
 .../templates/ui/app-config-map.yml           |  2 ++
 charts/azimuth-llm/templates/ui/ingress.yml   |  2 +-
 charts/azimuth-llm/templates/ui/service.yml   |  2 +-
 .../templates/ui/ui-zenith-client.yml         |  2 +-
 charts/azimuth-llm/values.yaml                |  2 --
 web-apps/utils/utils.py                       | 35 +++----------------
 11 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/charts/azimuth-llm/templates/api/ingress.yml b/charts/azimuth-llm/templates/api/ingress.yml
index 14c13b1..d0ba22c 100644
--- a/charts/azimuth-llm/templates/api/ingress.yml
+++ b/charts/azimuth-llm/templates/api/ingress.yml
@@ -16,7 +16,7 @@ spec:
         pathType: Prefix
         backend:
           service:
-            name: {{ .Values.api.service.name }}
+            name: {{ .Release.Name }}-api
             port:
               # Must match Service resource
               number: 80
diff --git a/charts/azimuth-llm/templates/api/service.yml b/charts/azimuth-llm/templates/api/service.yml
index e4e31c6..0400545 100644
--- a/charts/azimuth-llm/templates/api/service.yml
+++ b/charts/azimuth-llm/templates/api/service.yml
@@ -2,7 +2,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ .Values.api.service.name }}
+  name: {{ .Release.Name }}-api
   labels:
     {{- include "azimuth-llm.api-selectorLabels" . | nindent 4 }}
 spec:
diff --git a/charts/azimuth-llm/templates/api/zenith-client.yml b/charts/azimuth-llm/templates/api/zenith-client.yml
index ae20b2a..d2832bc 100644
--- a/charts/azimuth-llm/templates/api/zenith-client.yml
+++ b/charts/azimuth-llm/templates/api/zenith-client.yml
@@ -8,7 +8,7 @@ metadata:
 spec:
   reservationName: {{ .Release.Name }}-api
   upstream:
-    serviceName: {{ .Values.api.service.name }}
+    serviceName: {{ .Release.Name }}-api
   auth:
     skip: {{ .Values.api.service.zenith.skipAuth }}
 {{- end -}}
diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index 881e369..f27147e 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -21,7 +21,7 @@ spec:
         {{- if .Values.ingress.ui.enabled }}
         - {{ .Values.ingress.host }}{{ .Values.ingress.ui.path }}
         {{- else }}
-        - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc
+        - http://{{ .Release.Name }}-ui.{{ .Release.Namespace }}.svc
         {{- end }}
         env:
         - name: PYTHONUNBUFFERED
diff --git a/charts/azimuth-llm/templates/test/web-app.yml b/charts/azimuth-llm/templates/test/web-app.yml
index 65dbb90..3193fb3 100644
--- a/charts/azimuth-llm/templates/test/web-app.yml
+++ b/charts/azimuth-llm/templates/test/web-app.yml
@@ -18,7 +18,7 @@ spec:
         {{- if .Values.ingress.ui.enabled }}
         - {{ .Values.ingress.host | trimPrefix "http://" | trimPrefix "https://" }}{{ .Values.ingress.ui.path }}
         {{- else }}
-        - {{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc
+        - {{ .Release.Name }}-ui.{{ .Release.Namespace }}.svc
         {{- end }}
         - "80"
       restartPolicy: Never
diff --git a/charts/azimuth-llm/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml
index e77da18..8a29584 100644
--- a/charts/azimuth-llm/templates/ui/app-config-map.yml
+++ b/charts/azimuth-llm/templates/ui/app-config-map.yml
@@ -7,5 +7,7 @@ metadata:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
 data:
   overrides.yml: |
+    {{/* Set backend URL to in-cluster API address */}}
+    {{- $_ := set .Values.ui.appSettings "backend_url" (printf "http://%s-api.%s.svc" .Release.Name .Release.Namespace) }}
     {{- .Values.ui.appSettings | toYaml | nindent 4 }}
 {{- end -}}
diff --git a/charts/azimuth-llm/templates/ui/ingress.yml b/charts/azimuth-llm/templates/ui/ingress.yml
index e9e6e02..c2cc8be 100644
--- a/charts/azimuth-llm/templates/ui/ingress.yml
+++ b/charts/azimuth-llm/templates/ui/ingress.yml
@@ -16,7 +16,7 @@ spec:
         pathType: Prefix
         backend:
           service:
-            name: {{ .Values.ui.service.name }}
+            name: {{ .Release.Name }}-ui
             port:
               # Must match Service resource
               number: 80
diff --git a/charts/azimuth-llm/templates/ui/service.yml b/charts/azimuth-llm/templates/ui/service.yml
index 1de5c17..c199a1d 100644
--- a/charts/azimuth-llm/templates/ui/service.yml
+++ b/charts/azimuth-llm/templates/ui/service.yml
@@ -2,7 +2,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ .Values.ui.service.name }}
+  name: {{ .Release.Name }}-ui
   labels:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
 spec:
diff --git a/charts/azimuth-llm/templates/ui/ui-zenith-client.yml b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml
index cbdbe95..ce4b5ad 100644
--- a/charts/azimuth-llm/templates/ui/ui-zenith-client.yml
+++ b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml
@@ -9,7 +9,7 @@ metadata:
 spec:
   reservationName: {{ .Release.Name }}-ui
   upstream:
-    serviceName: {{ .Values.ui.service.name }}
+    serviceName: {{ .Release.Name }}-ui
   auth:
     skip: {{ .Values.ui.service.zenith.skipAuth }}
 {{- end -}}
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index 18ac6b6..e693bbf 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -43,7 +43,6 @@ api:
   replicas: 1
   # Service config
   service:
-    name: llm-backend
     type: ClusterIP
     zenith:
       enabled: false
@@ -106,7 +105,6 @@ ui:
         - Arial
   # Service config
   service:
-    name: web-app
     type: ClusterIP
     zenith:
       enabled: true
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index dc1861d..cb99776 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -43,28 +43,6 @@ class LLMParams(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
 
-NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
-
-
-def get_k8s_namespace():
-    try:
-        current_k8s_namespace = open(NAMESPACE_FILE_PATH).read()
-        return current_k8s_namespace
-    except FileNotFoundError:
-        return None
-
-
-def api_address_in_cluster():
-    k8s_ns = get_k8s_namespace()
-    if k8s_ns:
-        return f"http://llm-backend.{k8s_ns}.svc"
-    else:
-        log.warning(
-            "Failed to determine k8s namespace from %s - assuming non-kubernetes environment.",
-            NAMESPACE_FILE_PATH,
-        )
-
-
 # Method for loading settings from files
 def load_yaml(file_path: str) -> dict:
     with open(file_path, "r") as file:
@@ -95,12 +73,9 @@ def load_settings() -> dict:
             "Please check for typos"
         )
     settings = {**defaults, **overrides}
-    if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]:
-        # Try to detect in-cluster address
-        in_cluster_backend = api_address_in_cluster()
-        if not in_cluster_backend:
-            raise Exception(
-                "Backend URL must be provided in settings when running outside of Kubernetes."
-            )
-        settings["backend_url"] = in_cluster_backend
+
+    if "backend_url" not in settings:
+        raise Exception(
+            "Backend URL must be provided in settings when running outside of Kubernetes."
+        )
     return settings

From 3724392cae474969d5c336fc87567e5529a984c1 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 21:03:24 +0000
Subject: [PATCH 15/20] Remove unnecessary comment

---
 charts/azimuth-llm/templates/ui/app-config-map.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/charts/azimuth-llm/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml
index 8a29584..7a7b9e6 100644
--- a/charts/azimuth-llm/templates/ui/app-config-map.yml
+++ b/charts/azimuth-llm/templates/ui/app-config-map.yml
@@ -7,7 +7,6 @@ metadata:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
 data:
   overrides.yml: |
-    {{/* Set backend URL to in-cluster API address */}}
     {{- $_ := set .Values.ui.appSettings "backend_url" (printf "http://%s-api.%s.svc" .Release.Name .Release.Namespace) }}
     {{- .Values.ui.appSettings | toYaml | nindent 4 }}
 {{- end -}}

From 3e8c8deb6b3d79ac141e7eeb575e83060a5e12f4 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 21:44:06 +0000
Subject: [PATCH 16/20] Add separate VLM end to end test

---
 .../azimuth-llm/templates/test/end-to-end.yml |  5 +---
 web-apps/image-analysis/gradio-client-test.py | 25 +++++++++++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 web-apps/image-analysis/gradio-client-test.py

diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index f27147e..741a670 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -10,10 +10,7 @@ spec:
     spec:
       containers:
       - name: gradio-client-test
-        {{- /*
-          Use the chat image since we know this contains the gradio_client package
-        */}}
-        image: {{ printf "ghcr.io/stackhpc/azimuth-llm-chat-ui:%s" (default .Chart.AppVersion .Values.ui.image.tag) }}
+        image: {{ printf "%s:%s" .Values.ui.image.repository (default .Chart.AppVersion .Values.ui.image.tag) }}
         imagePullPolicy: IfNotPresent
         command:
         - python
diff --git a/web-apps/image-analysis/gradio-client-test.py b/web-apps/image-analysis/gradio-client-test.py
new file mode 100644
index 0000000..a675ba6
--- /dev/null
+++ b/web-apps/image-analysis/gradio-client-test.py
@@ -0,0 +1,25 @@
+import sys
+import time
+
+from gradio_client import Client
+
+gradio_host = sys.argv[1]
+
+retries = 60
+for n in range(1, retries+1):
+    try:
+        client = Client(gradio_host)
+        result = client.predict(
+    		image_url="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*xDgN0L2RfkS8Kc7uTwsRqw.jpeg",
+    		prompt="Hi",
+    		api_name="/predict"
+        )
+        print(result)
+        break
+    except Exception as err:
+        msg = f"Attempt {n} / {retries} encounter error: {err}"
+        if n < retries:
+            print(msg, "- waiting 10 seconds before retrying")
+            time.sleep(10)
+        else:
+            print(msg, "- no more retries left")

From d23de83abc45083e9332abe18eb12cad2fc62f29 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 22:07:01 +0000
Subject: [PATCH 17/20] Bump Helm timeout to 20m

---
 ct.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ct.yaml b/ct.yaml
index f5fada9..3fa795d 100644
--- a/ct.yaml
+++ b/ct.yaml
@@ -6,3 +6,6 @@ validate-maintainers: false
 all: true
 # Split output to make it look nice in GitHub Actions tab
 github-groups: true
+# Allow for long running install and test processes
+# (e.g. downloading containers images and model weights)
+helm-extra-args: --timeout 1200s

From 56564cfa47fa671ab5e88cf37abf84e72b7106c0 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 14 Feb 2025 22:44:57 +0000
Subject: [PATCH 18/20] Try to speed up VLM test

---
 charts/azimuth-image-analysis/ci/test-values.yaml | 2 +-
 web-apps/image-analysis/gradio-client-test.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml
index 09ad8ba..7f57671 100644
--- a/charts/azimuth-image-analysis/ci/test-values.yaml
+++ b/charts/azimuth-image-analysis/ci/test-values.yaml
@@ -16,7 +16,7 @@ azimuth-llm:
       model_name: *model
       # Verify that we can set non-standard LLM params
       llm_params:
-        max_tokens: 101
+        max_tokens: 10 # Constrain response tokens to speed up CI test
         temperature: 0.1
         top_p: 0.15
         presence_penalty: 0.9
diff --git a/web-apps/image-analysis/gradio-client-test.py b/web-apps/image-analysis/gradio-client-test.py
index a675ba6..56e333e 100644
--- a/web-apps/image-analysis/gradio-client-test.py
+++ b/web-apps/image-analysis/gradio-client-test.py
@@ -10,7 +10,7 @@
     try:
         client = Client(gradio_host)
         result = client.predict(
-    		image_url="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*xDgN0L2RfkS8Kc7uTwsRqw.jpeg",
+            image_url="https://media.licdn.com/dms/image/v2/D4D0BAQHyxNra6_PoUQ/company-logo_200_200/company-logo_200_200/0/1704365018113/stackhpc_ltd_logo?e=1747872000&v=beta&t=Ed3-KZS-sHlg-ne1KC0YjI4Ez7yVvJzWr103nm5eVK0",
     		prompt="Hi",
     		api_name="/predict"
         )

From 2c19a83aba0c4b190f007c4572f06dff03804f63 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Sat, 15 Feb 2025 10:55:54 +0000
Subject: [PATCH 19/20] Appease chart linter

---
 charts/azimuth-image-analysis/ci/test-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml
index 7f57671..a1ca32f 100644
--- a/charts/azimuth-image-analysis/ci/test-values.yaml
+++ b/charts/azimuth-image-analysis/ci/test-values.yaml
@@ -16,7 +16,7 @@ azimuth-llm:
       model_name: *model
       # Verify that we can set non-standard LLM params
       llm_params:
-        max_tokens: 10 # Constrain response tokens to speed up CI test
+        max_tokens: 10  # Constrain response tokens to speed up CI test
         temperature: 0.1
         top_p: 0.15
         presence_penalty: 0.9

From a508241a0bd36ce93c993b49d1b507c62754719a Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Tue, 18 Feb 2025 13:45:25 +0000
Subject: [PATCH 20/20] Add explanatory comments

---
 charts/azimuth-llm/templates/test/end-to-end.yml | 2 ++
 web-apps/chat/defaults.yml                       | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
index 741a670..6aecf38 100644
--- a/charts/azimuth-llm/templates/test/end-to-end.yml
+++ b/charts/azimuth-llm/templates/test/end-to-end.yml
@@ -10,6 +10,8 @@ spec:
     spec:
       containers:
       - name: gradio-client-test
+        # Assumes that one of the in-repo Gradio apps is used and that
+        # the app includes a `gradio-test-client.py` script.
         image: {{ printf "%s:%s" .Values.ui.image.repository (default .Chart.AppVersion .Values.ui.image.tag) }}
         imagePullPolicy: IfNotPresent
         command:
diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml
index 80b4549..8a18976 100644
--- a/web-apps/chat/defaults.yml
+++ b/web-apps/chat/defaults.yml
@@ -1,12 +1,15 @@
+
+# Default target is a local ollama instance
+# running inside the same docker network
+model_name: smollm2:135m
+backend_url: http://ollama:11434
+
 host_address: 0.0.0.0
 
 model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately."
 
 page_title: Large Language Model
 
-backend_url: http://ollama:11434
-model_name: smollm2:135m
-
 # LLM request parameters
 # See https://platform.openai.com/docs/api-reference/chat/create
 # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters