From 2ee635456e8354f0dd7fc1a2c885f2fc42707d31 Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Fri, 14 Feb 2025 11:13:05 +0000 Subject: [PATCH 01/20] Add CPU-backed vLLM test case --- charts/azimuth-chat/ci/vllm-cpu-values.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 charts/azimuth-chat/ci/vllm-cpu-values.yaml diff --git a/charts/azimuth-chat/ci/vllm-cpu-values.yaml b/charts/azimuth-chat/ci/vllm-cpu-values.yaml new file mode 100644 index 0000000..071b55a --- /dev/null +++ b/charts/azimuth-chat/ci/vllm-cpu-values.yaml @@ -0,0 +1,14 @@ +azimuth-llm: + ui: + service: + zenith: + enabled: false + api: + image: + repository: ghcr.io/stackhpc/vllm-cpu + version: v0.7.2 + monitoring: + enabled: false + gpus: 0 + huggingface: + model: HuggingFaceTB/SmolLM2-135M-Instruct From 4ad5c11ec069cf4025fdbddf1cbebbdbf8570c4c Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Fri, 14 Feb 2025 14:09:54 +0000 Subject: [PATCH 02/20] wip: test in CI --- charts/azimuth-chat/ci/ui-only-values.yaml | 16 ---------------- charts/azimuth-llm/values.yaml | 3 +-- web-apps/chat/defaults.yml | 6 ------ 3 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 charts/azimuth-chat/ci/ui-only-values.yaml diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml deleted file mode 100644 index b66347d..0000000 --- a/charts/azimuth-chat/ci/ui-only-values.yaml +++ /dev/null @@ -1,16 +0,0 @@ -azimuth-llm: - api: - enabled: false - ui: - service: - zenith: - enabled: false - appSettings: - # Verify that we can set non-standard LLM params - llm_params: - max_tokens: 101 - temperature: 0.1 - top_p: 0.15 - top_k: 1 - presence_penalty: 0.9 - frequency_penalty: 1 diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 29879d4..cd483c1 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -56,8 +56,7 @@ api: hostPath: path: /tmp/llm/huggingface-cache # Number of gpus to requests for each api pod instance - # NOTE: This must be in the range 1 <= value <= N, where - # 'N' is the number of GPUs available in a single + # NOTE: This must be less than the number of GPUs available in a single # worker node on the target Kubernetes cluster. # NOTE: According to the vLLM docs found here # https://docs.vllm.ai/en/latest/serving/distributed_serving.html diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml index b0260a9..b9ce61f 100644 --- a/web-apps/chat/defaults.yml +++ b/web-apps/chat/defaults.yml @@ -1,9 +1,3 @@ - -# Default target is a local ollama instance -# running inside the same docker network -model_name: smollm2:135m -backend_url: http://ollama:11434 - host_address: 0.0.0.0 model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately." From 1a309ee4eb25294d6db9e732e8ede9788be267da Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:12:14 +0000 Subject: [PATCH 03/20] Pick defauly vLLM image based on GPU count --- charts/azimuth-llm/templates/api/deployment.yml | 7 ++++--- charts/azimuth-llm/values.yaml | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index dc41a5f..e3b37fe 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -19,7 +19,8 @@ spec: spec: containers: - name: {{ .Release.Name }}-api - image: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.version }} + {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm-project/vllm" (eq (.Values.api.gpus | int) 0)) -}} + image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }} ports: - name: api containerPort: 8000 @@ -29,7 +30,7 @@ spec: args: - --model - {{ .Values.huggingface.model }} - {{- include "azimuth-llm.chatTemplate" . | nindent 10 }} + {{- include "azimuth-llm.chatTemplate" . | nindent 10 -}} {{- if .Values.api.modelMaxContextLength -}} - --max-model-len - {{ .Values.api.modelMaxContextLength | quote }} @@ -41,7 +42,7 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} - {{- if .Values.huggingface.secretName }} + {{- if .Values.huggingface.secretName -}} envFrom: - secretRef: name: {{ .Values.huggingface.secretName }} diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index cd483c1..09d5eec 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -33,7 +33,9 @@ api: enabled: true # Container image config image: - repository: vllm/vllm-openai + # Defaults to vllm/vllm-openai when api.gpus > 0 + # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0 + repository: version: v0.6.3 monitoring: enabled: true From 6e1fd111e1199fd94cb3aed81366a9dc29a7d101 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:20:56 +0000 Subject: [PATCH 04/20] Simplify CI test values for each chart --- charts/azimuth-chat/ci/test-values.yaml | 23 +++++++++++++++++++ charts/azimuth-chat/ci/vllm-cpu-values.yaml | 14 ----------- .../{ui-only-values.yaml => test-values.yaml} | 9 +++++++- charts/azimuth-llm/ci/default-values.yaml | 15 ++++++++++++ charts/azimuth-llm/ci/no-api-values.yaml | 6 ----- 5 files changed, 46 insertions(+), 21 deletions(-) create mode 100644 charts/azimuth-chat/ci/test-values.yaml delete mode 100644 charts/azimuth-chat/ci/vllm-cpu-values.yaml rename charts/azimuth-image-analysis/ci/{ui-only-values.yaml => test-values.yaml} (54%) create mode 100644 charts/azimuth-llm/ci/default-values.yaml delete mode 100644 charts/azimuth-llm/ci/no-api-values.yaml diff --git a/charts/azimuth-chat/ci/test-values.yaml b/charts/azimuth-chat/ci/test-values.yaml new file mode 100644 index 0000000..56b1dfb --- /dev/null +++ b/charts/azimuth-chat/ci/test-values.yaml @@ -0,0 +1,23 @@ +azimuth-llm: + huggingface: + # Use the smallest LLM we can find + model: HuggingFaceTB/SmolLM2-135M-Instruct + api: + # CI Kind cluster doesn't have kube-prometheus-stack + monitoring: + enabled: false + # No GPUs in CI runners + gpus: 0 + ui: + service: + zenith: + enabled: false + appSettings: + # Verify that we can set non-standard LLM params + llm_params: + max_tokens: 101 + temperature: 0.1 + top_k: 2 + top_p: 0.15 + presence_penalty: 0.9 + frequency_penalty: 1 diff --git a/charts/azimuth-chat/ci/vllm-cpu-values.yaml b/charts/azimuth-chat/ci/vllm-cpu-values.yaml deleted file mode 100644 index 071b55a..0000000 --- a/charts/azimuth-chat/ci/vllm-cpu-values.yaml +++ /dev/null @@ -1,14 +0,0 @@ -azimuth-llm: - ui: - service: - zenith: - enabled: false - api: - image: - repository: ghcr.io/stackhpc/vllm-cpu - version: v0.7.2 - monitoring: - enabled: false - gpus: 0 - huggingface: - model: HuggingFaceTB/SmolLM2-135M-Instruct diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml similarity index 54% rename from charts/azimuth-image-analysis/ci/ui-only-values.yaml rename to charts/azimuth-image-analysis/ci/test-values.yaml index 96f716d..72b7a5e 100644 --- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml +++ b/charts/azimuth-image-analysis/ci/test-values.yaml @@ -1,6 +1,13 @@ azimuth-llm: + huggingface: + # Use the smallest vision model we can find + model: HuggingFaceTB/SmolVLM-256M-Instruct api: - enabled: false + # CI Kind cluster doesn't have kube-prometheus-stack + monitoring: + enabled: false + # No GPUs in CI runners + gpus: 0 ui: service: zenith: diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml new file mode 100644 index 0000000..d116377 --- /dev/null +++ b/charts/azimuth-llm/ci/default-values.yaml @@ -0,0 +1,15 @@ +# This is intended to test the default chart values +# as close as possible given the lack of GPU +huggingface: + # Use the smallest LLM we can find + model: HuggingFaceTB/SmolLM2-135M-Instruct +api: + # CI Kind cluster doesn't have kube-prometheus-stack + monitoring: + enabled: false + # No GPUs in CI runners + gpus: 0 +ui: + service: + zenith: + enabled: false diff --git a/charts/azimuth-llm/ci/no-api-values.yaml b/charts/azimuth-llm/ci/no-api-values.yaml deleted file mode 100644 index e455af3..0000000 --- a/charts/azimuth-llm/ci/no-api-values.yaml +++ /dev/null @@ -1,6 +0,0 @@ -api: - enabled: false -ui: - service: - zenith: - enabled: false From 781660e1a7a3af43a45eaa09dee3e61aa663ae27 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:24:33 +0000 Subject: [PATCH 05/20] Skip Gradio tests for now --- .github/workflows/test-pr.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 2b16474..42c9b0e 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -44,10 +44,10 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - - name: Run Gradio app tests - run: | - ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }}) - working-directory: web-apps + # - name: Run Gradio app tests + # run: | + # ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }}) + # working-directory: web-apps - name: Install Helm uses: azure/setup-helm@v4 @@ -66,7 +66,7 @@ jobs: with: cluster_name: ${{ env.CLUSTER_NAME }} - # NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this + # NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this # only gets overwritten to the correct commit SHA during Helm chart build, # we need to pull these published images and load them into the kind cluster # with the tag correct tag. From 09522e8e0a50862c828635a7910b0770f2477a28 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:28:39 +0000 Subject: [PATCH 06/20] Update comment --- charts/azimuth-llm/ci/default-values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml index d116377..c610aab 100644 --- a/charts/azimuth-llm/ci/default-values.yaml +++ b/charts/azimuth-llm/ci/default-values.yaml @@ -1,5 +1,6 @@ # This is intended to test the default chart values -# as close as possible given the lack of GPU +# as close as possible given the constraints of running +# inside a Kind cluster within a CI runner huggingface: # Use the smallest LLM we can find model: HuggingFaceTB/SmolLM2-135M-Instruct From 44d1e6bdb78bb476b5ed90f2afabb920c549eb7f Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:41:34 +0000 Subject: [PATCH 07/20] Fix failing Gradio unit test --- .github/workflows/test-pr.yml | 8 ++++---- web-apps/chat/defaults.yml | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 42c9b0e..dec9f48 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -44,10 +44,10 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - # - name: Run Gradio app tests - # run: | - # ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }}) - # working-directory: web-apps + - name: Run Gradio app tests + run: | + ./test-images.sh $(git rev-parse --short ${{ github.event.pull_request.head.sha }}) + working-directory: web-apps - name: Install Helm uses: azure/setup-helm@v4 diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml index b9ce61f..80b4549 100644 --- a/web-apps/chat/defaults.yml +++ b/web-apps/chat/defaults.yml @@ -4,6 +4,9 @@ model_instruction: "You are a helpful and cheerful AI assistant. Please respond page_title: Large Language Model +backend_url: http://ollama:11434 +model_name: smollm2:135m + # LLM request parameters # See https://platform.openai.com/docs/api-reference/chat/create # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters @@ -24,6 +27,6 @@ theme_params: {} theme_params_extended: {} # Additional CSS and JS overrides -# See https://www.gradio.app/guides/custom-CSS-and-JS +# See https://www.gradio.app/guides/custom-CSS-and-JS css_overrides: custom_javascript: From 677df04cffb79cdb437b0a1bc53f8fbdc28c8206 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 17:48:55 +0000 Subject: [PATCH 08/20] Make linter happy --- charts/azimuth-llm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 7496aa3..18ac6b6 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -84,7 +84,7 @@ ui: # Container image config image: repository: ghcr.io/stackhpc/azimuth-llm-chat-ui - tag: # Defaults to chart's appVersion + tag: # Defaults to chart's appVersion imagePullPolicy: # The settings to be passed to the frontend web app. # Format depends on the chosen UI image above. For each of the UIs From e9e427d082ea8d28095e086c91705409b7761bd4 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 19:06:07 +0000 Subject: [PATCH 09/20] Bump test job time limit --- charts/azimuth-llm/templates/test/end-to-end.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index e94d4d2..5a31f5a 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -26,5 +26,5 @@ spec: restartPolicy: Never # Allow plenty of retries since downloading # model weights can take a long time. - backoffLimit: 10 + backoffLimit: 20 {{- end -}} From 873558dfd19e75c88897357d27bf790c1021129f Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 19:30:26 +0000 Subject: [PATCH 10/20] Wait for model readiness in test script --- .../azimuth-llm/templates/test/end-to-end.yml | 5 ++--- web-apps/chat/gradio-client-test.py | 20 ++++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index 5a31f5a..de97493 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -24,7 +24,6 @@ spec: - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc {{- end }} restartPolicy: Never - # Allow plenty of retries since downloading - # model weights can take a long time. - backoffLimit: 20 + # Handle retries within gradio-test-client script + backoffLimit: 1 {{- end -}} diff --git a/web-apps/chat/gradio-client-test.py b/web-apps/chat/gradio-client-test.py index ddf245c..723852d 100644 --- a/web-apps/chat/gradio-client-test.py +++ b/web-apps/chat/gradio-client-test.py @@ -1,7 +1,21 @@ import sys +import time + from gradio_client import Client gradio_host = sys.argv[1] -client = Client(gradio_host) -result = client.predict("Hi", api_name="/chat") -print(result) + +retries = 60 +for n in range(1, retries+1): + try: + client = Client(gradio_host) + result = client.predict("Hi", api_name="/chat") + print(result) + break + except Exception as err: + msg = f"Attempt {n} / {retries} encounter error: {err}" + if n < retries: + print(msg, "- waiting 10 seconds before retrying") + time.sleep(10) + else: + print(msg, "- no more retries left") From dfba4252dc851aa56e57c4a64c853187f9473e8b Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 19:32:28 +0000 Subject: [PATCH 11/20] Add missing CI test values --- charts/azimuth-chat/ci/test-values.yaml | 3 ++- charts/azimuth-image-analysis/ci/test-values.yaml | 3 ++- charts/azimuth-llm/ci/default-values.yaml | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/charts/azimuth-chat/ci/test-values.yaml b/charts/azimuth-chat/ci/test-values.yaml index 56b1dfb..98e3530 100644 --- a/charts/azimuth-chat/ci/test-values.yaml +++ b/charts/azimuth-chat/ci/test-values.yaml @@ -1,7 +1,7 @@ azimuth-llm: huggingface: # Use the smallest LLM we can find - model: HuggingFaceTB/SmolLM2-135M-Instruct + model: &model HuggingFaceTB/SmolLM2-135M-Instruct api: # CI Kind cluster doesn't have kube-prometheus-stack monitoring: @@ -13,6 +13,7 @@ azimuth-llm: zenith: enabled: false appSettings: + model_name: *model # Verify that we can set non-standard LLM params llm_params: max_tokens: 101 diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml index 72b7a5e..09ad8ba 100644 --- a/charts/azimuth-image-analysis/ci/test-values.yaml +++ b/charts/azimuth-image-analysis/ci/test-values.yaml @@ -1,7 +1,7 @@ azimuth-llm: huggingface: # Use the smallest vision model we can find - model: HuggingFaceTB/SmolVLM-256M-Instruct + model: &model HuggingFaceTB/SmolVLM-256M-Instruct api: # CI Kind cluster doesn't have kube-prometheus-stack monitoring: @@ -13,6 +13,7 @@ azimuth-llm: zenith: enabled: false appSettings: + model_name: *model # Verify that we can set non-standard LLM params llm_params: max_tokens: 101 diff --git a/charts/azimuth-llm/ci/default-values.yaml b/charts/azimuth-llm/ci/default-values.yaml index c610aab..40dbbde 100644 --- a/charts/azimuth-llm/ci/default-values.yaml +++ b/charts/azimuth-llm/ci/default-values.yaml @@ -3,7 +3,7 @@ # inside a Kind cluster within a CI runner huggingface: # Use the smallest LLM we can find - model: HuggingFaceTB/SmolLM2-135M-Instruct + model: &model HuggingFaceTB/SmolLM2-135M-Instruct api: # CI Kind cluster doesn't have kube-prometheus-stack monitoring: @@ -14,3 +14,5 @@ ui: service: zenith: enabled: false + appSettings: + model_name: *model From cdcea4ff7a348352869a716242cc1ff49cb5c357 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 20:02:58 +0000 Subject: [PATCH 12/20] Make python output visible in test job logs --- charts/azimuth-llm/templates/test/end-to-end.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index de97493..881e369 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -23,6 +23,10 @@ spec: {{- else }} - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc {{- end }} + env: + - name: PYTHONUNBUFFERED + value: "1" + tty: true # Make stdout from python visible in k8s logs restartPolicy: Never # Handle retries within gradio-test-client script backoffLimit: 1 From 2f15872d33a8c57bfe6a0e976f173823599f6db2 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 13 Feb 2025 16:45:00 +0000 Subject: [PATCH 13/20] Fix in-cluster backend detection --- web-apps/chat/app.py | 1 - web-apps/utils/utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py index df369ec..cee141f 100644 --- a/web-apps/chat/app.py +++ b/web-apps/chat/app.py @@ -61,7 +61,6 @@ class PossibleSystemPromptException(Exception): streaming=True, ) - def inference(latest_message, history): # Allow mutating global variable global BACKEND_INITIALISED diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index 28e7a90..dc1861d 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -95,7 +95,7 @@ def load_settings() -> dict: "Please check for typos" ) settings = {**defaults, **overrides} - if "backend_url" not in settings or not settings["backend_url"]: + if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]: # Try to detect in-cluster address in_cluster_backend = api_address_in_cluster() if not in_cluster_backend: From 88f957a461a5671ef95cc9bd32566f662d3d82ba Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 20:59:45 +0000 Subject: [PATCH 14/20] Set in-cluster service addresses automatically --- charts/azimuth-llm/templates/api/ingress.yml | 2 +- charts/azimuth-llm/templates/api/service.yml | 2 +- .../templates/api/zenith-client.yml | 2 +- .../azimuth-llm/templates/test/end-to-end.yml | 2 +- charts/azimuth-llm/templates/test/web-app.yml | 2 +- .../templates/ui/app-config-map.yml | 2 ++ charts/azimuth-llm/templates/ui/ingress.yml | 2 +- charts/azimuth-llm/templates/ui/service.yml | 2 +- .../templates/ui/ui-zenith-client.yml | 2 +- charts/azimuth-llm/values.yaml | 2 -- web-apps/utils/utils.py | 35 +++---------------- 11 files changed, 15 insertions(+), 40 deletions(-) diff --git a/charts/azimuth-llm/templates/api/ingress.yml b/charts/azimuth-llm/templates/api/ingress.yml index 14c13b1..d0ba22c 100644 --- a/charts/azimuth-llm/templates/api/ingress.yml +++ b/charts/azimuth-llm/templates/api/ingress.yml @@ -16,7 +16,7 @@ spec: pathType: Prefix backend: service: - name: {{ .Values.api.service.name }} + name: {{ .Release.Name }}-api port: # Must match Service resource number: 80 diff --git a/charts/azimuth-llm/templates/api/service.yml b/charts/azimuth-llm/templates/api/service.yml index e4e31c6..0400545 100644 --- a/charts/azimuth-llm/templates/api/service.yml +++ b/charts/azimuth-llm/templates/api/service.yml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: - name: {{ .Values.api.service.name }} + name: {{ .Release.Name }}-api labels: {{- include "azimuth-llm.api-selectorLabels" . | nindent 4 }} spec: diff --git a/charts/azimuth-llm/templates/api/zenith-client.yml b/charts/azimuth-llm/templates/api/zenith-client.yml index ae20b2a..d2832bc 100644 --- a/charts/azimuth-llm/templates/api/zenith-client.yml +++ b/charts/azimuth-llm/templates/api/zenith-client.yml @@ -8,7 +8,7 @@ metadata: spec: reservationName: {{ .Release.Name }}-api upstream: - serviceName: {{ .Values.api.service.name }} + serviceName: {{ .Release.Name }}-api auth: skip: {{ .Values.api.service.zenith.skipAuth }} {{- end -}} diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index 881e369..f27147e 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -21,7 +21,7 @@ spec: {{- if .Values.ingress.ui.enabled }} - {{ .Values.ingress.host }}{{ .Values.ingress.ui.path }} {{- else }} - - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc + - http://{{ .Release.Name }}-ui.{{ .Release.Namespace }}.svc {{- end }} env: - name: PYTHONUNBUFFERED diff --git a/charts/azimuth-llm/templates/test/web-app.yml b/charts/azimuth-llm/templates/test/web-app.yml index 65dbb90..3193fb3 100644 --- a/charts/azimuth-llm/templates/test/web-app.yml +++ b/charts/azimuth-llm/templates/test/web-app.yml @@ -18,7 +18,7 @@ spec: {{- if .Values.ingress.ui.enabled }} - {{ .Values.ingress.host | trimPrefix "http://" | trimPrefix "https://" }}{{ .Values.ingress.ui.path }} {{- else }} - - {{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc + - {{ .Release.Name }}-ui.{{ .Release.Namespace }}.svc {{- end }} - "80" restartPolicy: Never diff --git a/charts/azimuth-llm/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml index e77da18..8a29584 100644 --- a/charts/azimuth-llm/templates/ui/app-config-map.yml +++ b/charts/azimuth-llm/templates/ui/app-config-map.yml @@ -7,5 +7,7 @@ metadata: {{- include "azimuth-llm.labels" . | nindent 4 }} data: overrides.yml: | + {{/* Set backend URL to in-cluster API address */}} + {{- $_ := set .Values.ui.appSettings "backend_url" (printf "http://%s-api.%s.svc" .Release.Name .Release.Namespace) }} {{- .Values.ui.appSettings | toYaml | nindent 4 }} {{- end -}} diff --git a/charts/azimuth-llm/templates/ui/ingress.yml b/charts/azimuth-llm/templates/ui/ingress.yml index e9e6e02..c2cc8be 100644 --- a/charts/azimuth-llm/templates/ui/ingress.yml +++ b/charts/azimuth-llm/templates/ui/ingress.yml @@ -16,7 +16,7 @@ spec: pathType: Prefix backend: service: - name: {{ .Values.ui.service.name }} + name: {{ .Release.Name }}-ui port: # Must match Service resource number: 80 diff --git a/charts/azimuth-llm/templates/ui/service.yml b/charts/azimuth-llm/templates/ui/service.yml index 1de5c17..c199a1d 100644 --- a/charts/azimuth-llm/templates/ui/service.yml +++ b/charts/azimuth-llm/templates/ui/service.yml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: - name: {{ .Values.ui.service.name }} + name: {{ .Release.Name }}-ui labels: {{- include "azimuth-llm.labels" . | nindent 4 }} spec: diff --git a/charts/azimuth-llm/templates/ui/ui-zenith-client.yml b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml index cbdbe95..ce4b5ad 100644 --- a/charts/azimuth-llm/templates/ui/ui-zenith-client.yml +++ b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml @@ -9,7 +9,7 @@ metadata: spec: reservationName: {{ .Release.Name }}-ui upstream: - serviceName: {{ .Values.ui.service.name }} + serviceName: {{ .Release.Name }}-ui auth: skip: {{ .Values.ui.service.zenith.skipAuth }} {{- end -}} diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 18ac6b6..e693bbf 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -43,7 +43,6 @@ api: replicas: 1 # Service config service: - name: llm-backend type: ClusterIP zenith: enabled: false @@ -106,7 +105,6 @@ ui: - Arial # Service config service: - name: web-app type: ClusterIP zenith: enabled: true diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index dc1861d..cb99776 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -43,28 +43,6 @@ class LLMParams(BaseModel): model_config = ConfigDict(extra="forbid") -NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" - - -def get_k8s_namespace(): - try: - current_k8s_namespace = open(NAMESPACE_FILE_PATH).read() - return current_k8s_namespace - except FileNotFoundError: - return None - - -def api_address_in_cluster(): - k8s_ns = get_k8s_namespace() - if k8s_ns: - return f"http://llm-backend.{k8s_ns}.svc" - else: - log.warning( - "Failed to determine k8s namespace from %s - assuming non-kubernetes environment.", - NAMESPACE_FILE_PATH, - ) - - # Method for loading settings from files def load_yaml(file_path: str) -> dict: with open(file_path, "r") as file: @@ -95,12 +73,9 @@ def load_settings() -> dict: "Please check for typos" ) settings = {**defaults, **overrides} - if "backend_url" not in settings or settings["backend_url"] == defaults["backend_url"]: - # Try to detect in-cluster address - in_cluster_backend = api_address_in_cluster() - if not in_cluster_backend: - raise Exception( - "Backend URL must be provided in settings when running outside of Kubernetes." - ) - settings["backend_url"] = in_cluster_backend + + if "backend_url" not in settings: + raise Exception( + "Backend URL must be provided in settings when running outside of Kubernetes." + ) return settings From 3724392cae474969d5c336fc87567e5529a984c1 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 21:03:24 +0000 Subject: [PATCH 15/20] Remove unnecessary comment --- charts/azimuth-llm/templates/ui/app-config-map.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/charts/azimuth-llm/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml index 8a29584..7a7b9e6 100644 --- a/charts/azimuth-llm/templates/ui/app-config-map.yml +++ b/charts/azimuth-llm/templates/ui/app-config-map.yml @@ -7,7 +7,6 @@ metadata: {{- include "azimuth-llm.labels" . | nindent 4 }} data: overrides.yml: | - {{/* Set backend URL to in-cluster API address */}} {{- $_ := set .Values.ui.appSettings "backend_url" (printf "http://%s-api.%s.svc" .Release.Name .Release.Namespace) }} {{- .Values.ui.appSettings | toYaml | nindent 4 }} {{- end -}} From 3e8c8deb6b3d79ac141e7eeb575e83060a5e12f4 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 21:44:06 +0000 Subject: [PATCH 16/20] Add separate VLM end to end test --- .../azimuth-llm/templates/test/end-to-end.yml | 5 +--- web-apps/image-analysis/gradio-client-test.py | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 web-apps/image-analysis/gradio-client-test.py diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index f27147e..741a670 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -10,10 +10,7 @@ spec: spec: containers: - name: gradio-client-test - {{- /* - Use the chat image since we know this contains the gradio_client package - */}} - image: {{ printf "ghcr.io/stackhpc/azimuth-llm-chat-ui:%s" (default .Chart.AppVersion .Values.ui.image.tag) }} + image: {{ printf "%s:%s" .Values.ui.image.repository (default .Chart.AppVersion .Values.ui.image.tag) }} imagePullPolicy: IfNotPresent command: - python diff --git a/web-apps/image-analysis/gradio-client-test.py b/web-apps/image-analysis/gradio-client-test.py new file mode 100644 index 0000000..a675ba6 --- /dev/null +++ b/web-apps/image-analysis/gradio-client-test.py @@ -0,0 +1,25 @@ +import sys +import time + +from gradio_client import Client + +gradio_host = sys.argv[1] + +retries = 60 +for n in range(1, retries+1): + try: + client = Client(gradio_host) + result = client.predict( + image_url="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*xDgN0L2RfkS8Kc7uTwsRqw.jpeg", + prompt="Hi", + api_name="/predict" + ) + print(result) + break + except Exception as err: + msg = f"Attempt {n} / {retries} encounter error: {err}" + if n < retries: + print(msg, "- waiting 10 seconds before retrying") + time.sleep(10) + else: + print(msg, "- no more retries left") From d23de83abc45083e9332abe18eb12cad2fc62f29 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 22:07:01 +0000 Subject: [PATCH 17/20] Bump Helm timeout to 20m --- ct.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ct.yaml b/ct.yaml index f5fada9..3fa795d 100644 --- a/ct.yaml +++ b/ct.yaml @@ -6,3 +6,6 @@ validate-maintainers: false all: true # Split output to make it look nice in GitHub Actions tab github-groups: true +# Allow for long running install and test processes +# (e.g. downloading containers images and model weights) +helm-extra-args: --timeout 1200s From 56564cfa47fa671ab5e88cf37abf84e72b7106c0 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 14 Feb 2025 22:44:57 +0000 Subject: [PATCH 18/20] Try to speed up VLM test --- charts/azimuth-image-analysis/ci/test-values.yaml | 2 +- web-apps/image-analysis/gradio-client-test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml index 09ad8ba..7f57671 100644 --- a/charts/azimuth-image-analysis/ci/test-values.yaml +++ b/charts/azimuth-image-analysis/ci/test-values.yaml @@ -16,7 +16,7 @@ azimuth-llm: model_name: *model # Verify that we can set non-standard LLM params llm_params: - max_tokens: 101 + max_tokens: 10 # Constrain response tokens to speed up CI test temperature: 0.1 top_p: 0.15 presence_penalty: 0.9 diff --git a/web-apps/image-analysis/gradio-client-test.py b/web-apps/image-analysis/gradio-client-test.py index a675ba6..56e333e 100644 --- a/web-apps/image-analysis/gradio-client-test.py +++ b/web-apps/image-analysis/gradio-client-test.py @@ -10,7 +10,7 @@ try: client = Client(gradio_host) result = client.predict( - image_url="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*xDgN0L2RfkS8Kc7uTwsRqw.jpeg", + image_url="https://media.licdn.com/dms/image/v2/D4D0BAQHyxNra6_PoUQ/company-logo_200_200/company-logo_200_200/0/1704365018113/stackhpc_ltd_logo?e=1747872000&v=beta&t=Ed3-KZS-sHlg-ne1KC0YjI4Ez7yVvJzWr103nm5eVK0", prompt="Hi", api_name="/predict" ) From 2c19a83aba0c4b190f007c4572f06dff03804f63 Mon Sep 17 00:00:00 2001 From: sd109 Date: Sat, 15 Feb 2025 10:55:54 +0000 Subject: [PATCH 19/20] Appease chart linter --- charts/azimuth-image-analysis/ci/test-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-image-analysis/ci/test-values.yaml b/charts/azimuth-image-analysis/ci/test-values.yaml index 7f57671..a1ca32f 100644 --- a/charts/azimuth-image-analysis/ci/test-values.yaml +++ b/charts/azimuth-image-analysis/ci/test-values.yaml @@ -16,7 +16,7 @@ azimuth-llm: model_name: *model # Verify that we can set non-standard LLM params llm_params: - max_tokens: 10 # Constrain response tokens to speed up CI test + max_tokens: 10 # Constrain response tokens to speed up CI test temperature: 0.1 top_p: 0.15 presence_penalty: 0.9 From a508241a0bd36ce93c993b49d1b507c62754719a Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 18 Feb 2025 13:45:25 +0000 Subject: [PATCH 20/20] Add explanatory comments --- charts/azimuth-llm/templates/test/end-to-end.yml | 2 ++ web-apps/chat/defaults.yml | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/charts/azimuth-llm/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml index 741a670..6aecf38 100644 --- a/charts/azimuth-llm/templates/test/end-to-end.yml +++ b/charts/azimuth-llm/templates/test/end-to-end.yml @@ -10,6 +10,8 @@ spec: spec: containers: - name: gradio-client-test + # Assumes that one of the in-repo Gradio apps is used and that + # the app includes a `gradio-test-client.py` script. image: {{ printf "%s:%s" .Values.ui.image.repository (default .Chart.AppVersion .Values.ui.image.tag) }} imagePullPolicy: IfNotPresent command: diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml index 80b4549..8a18976 100644 --- a/web-apps/chat/defaults.yml +++ b/web-apps/chat/defaults.yml @@ -1,12 +1,15 @@ + +# Default target is a local ollama instance +# running inside the same docker network +model_name: smollm2:135m +backend_url: http://ollama:11434 + host_address: 0.0.0.0 model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately." page_title: Large Language Model -backend_url: http://ollama:11434 -model_name: smollm2:135m - # LLM request parameters # See https://platform.openai.com/docs/api-reference/chat/create # and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters