From 272157792d78b2f7aedfc83bc0953b06bf84ce7c Mon Sep 17 00:00:00 2001
From: sd109 <scott@stackhpc.com>
Date: Mon, 18 Mar 2024 14:23:08 +0000
Subject: [PATCH 1/5] Add tox formatter config

---
 .gitignore |  1 +
 tox.ini    | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
index a0b78a6..e916d76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 __pycache__/
 **/*.secret
 .DS_Store
+.tox/
 
 # Ignore local dev helpers
 test-values.y[a]ml
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..d813a5d
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,10 @@
+[tox]
+env_list =
+    format
+minversion = 4.11.3
+
+[testenv:format]
+description = run code formatter on web-app
+deps = black==23.12.1
+skip_install = true
+commands = black chart/web-app
\ No newline at end of file

From 630aa008d447f44b77b48d2c74204c8b0b89c7e0 Mon Sep 17 00:00:00 2001
From: sd109 <scott@stackhpc.com>
Date: Mon, 18 Mar 2024 14:27:51 +0000
Subject: [PATCH 2/5] Ensure venv is activated correctly on tilt up

---
 Tiltfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Tiltfile b/Tiltfile
index abb119e..4d54978 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -56,7 +56,8 @@ if run_ui_locally:
         deps=["chart/web-app/"],
         resource_deps=["gradio-app-venv"],
         serve_cmd=" && ".join([
+            "source {}/bin/activate".format(venv_name),
             "cd chart/web-app",
-            "python app.py {}".format(hf_model),
+            "python3 app.py {}".format(hf_model),
         ])
     )
\ No newline at end of file

From 89c0079eb79c505f9eb901f0763aee4e9ca7ab80 Mon Sep 17 00:00:00 2001
From: sd109 <scott@stackhpc.com>
Date: Mon, 18 Mar 2024 14:31:19 +0000
Subject: [PATCH 3/5] Punctuation

---
 chart/web-app/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chart/web-app/app.py b/chart/web-app/app.py
index ad43b1a..5a5785a 100644
--- a/chart/web-app/app.py
+++ b/chart/web-app/app.py
@@ -100,11 +100,11 @@ def inference(latest_message, history):
         if not BACKEND_INITIALISED:
             logger.info("Backend API not yet ready")
             gr.Info(
-                "Backend not ready - model may still be initialising - please try again later"
+                "Backend not ready - model may still be initialising - please try again later."
             )
         else:
             logger.error("Failed to connect to backend API: %s", err)
-            gr.Warning("Failed to connect to backend API")
+            gr.Warning("Failed to connect to backend API.")
 
     except openai.InternalServerError as err:
         gr.Warning(

From 5ec91aed42597e65770c79909a6de644b026643a Mon Sep 17 00:00:00 2001
From: sd109 <scott@stackhpc.com>
Date: Mon, 18 Mar 2024 15:20:26 +0000
Subject: [PATCH 4/5] Add chartTemplate config value

Also fix missed renames of model_ -> hf_model_ config options.
---
 chart/azimuth-ui.schema.yaml       |  2 +-
 chart/templates/api/deployment.yml |  4 ++++
 chart/values.schema.json           |  6 +++---
 chart/values.yaml                  | 13 +++++++++++--
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/chart/azimuth-ui.schema.yaml b/chart/azimuth-ui.schema.yaml
index e601d3d..5cd2d8a 100644
--- a/chart/azimuth-ui.schema.yaml
+++ b/chart/azimuth-ui.schema.yaml
@@ -5,7 +5,7 @@ controls:
   /huggingface/token:
     type: TextControl
     secret: true
-  /ui/appSettings/model_instruction:
+  /ui/appSettings/hf_model_instruction:
     type: TextControl
   /ui/appSettings/page_title:
     type: TextControl
diff --git a/chart/templates/api/deployment.yml b/chart/templates/api/deployment.yml
index f6e3ea5..33954f2 100644
--- a/chart/templates/api/deployment.yml
+++ b/chart/templates/api/deployment.yml
@@ -28,6 +28,10 @@ spec:
         args:
           - --model
           - {{ .Values.huggingface.model }}
+          {{- if .Values.huggingface.chatTemplate }}
+          - --chat-template
+          - {{ quote .Values.huggingface.chatTemplate }}
+          {{- end -}}
           {{- if .Values.api.extraArgs -}}
           {{- .Values.api.extraArgs | toYaml | nindent 10 }}
           {{- end -}}
diff --git a/chart/values.schema.json b/chart/values.schema.json
index b9f56e4..c4082b3 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -25,13 +25,13 @@
                 "appSettings": {
                     "type": "object",
                     "properties": {
-                        "model_name": {
+                        "hf_model_name": {
                             "type": "string",
                             "title": "Model Name",
                             "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.",
                             "default": "mistralai/Mistral-7B-Instruct-v0.2"
                         },
-                        "model_instruction": {
+                        "hf_model_instruction": {
                             "type": "string",
                             "title": "Instruction",
                             "description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.",
@@ -75,7 +75,7 @@
                         }
 
                     },
-                    "required": ["model_name", "model_instruction"]
+                    "required": ["hf_model_name", "hf_model_instruction"]
                 }
             }
         }
diff --git a/chart/values.yaml b/chart/values.yaml
index 3e016a8..e338b45 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -6,6 +6,15 @@ huggingface:
   # The name of the HuggingFace model to use
   # Use a yaml anchor to avoid duplication elsewhere
   model: &model-name ise-uiuc/Magicoder-S-DS-6.7B
+  # A Jinja formatted chat template to provide to the language model.
+  # See https://huggingface.co/blog/chat-templates for background info.
+  # If not provided, the default template specified in the HuggingFace
+  # model repository's tokenizer_config.json file is used. As explained
+  # in the above blog post, the HF template key in tokenizer_config.json
+  # is relatively new and not all HF models include a template in their
+  # repo files yet. This chart value provides a hook to manually apply the
+  # correct chat template for such models.
+  chatTemplate:
 
   # For private/gated huggingface models (e.g. Meta's Llama models)
   # you must provide your own huggingface token, for details see:
@@ -71,8 +80,8 @@ ui:
   # The values to be written to settings.yml for parsing as frontend app setting
   # (see example_app.py and config.py for example using pydantic-settings to configure app)
   appSettings:
-    model_name: *model-name
-    model_instruction: "You are a helpful AI assistant. Please response appropriately."
+    hf_model_name: *model-name
+    hf_model_instruction: "You are a helpful AI assistant. Please response appropriately."
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-ui-base

From 0381fdf2b7c8fe4568836848030175b21f06482f Mon Sep 17 00:00:00 2001
From: sd109 <scott@stackhpc.com>
Date: Mon, 18 Mar 2024 15:39:10 +0000
Subject: [PATCH 5/5] Move test model name out of Tiltfile

---
 Tiltfile | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/Tiltfile b/Tiltfile
index 4d54978..73adff2 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -1,11 +1,3 @@
-# The HuggingFace model to use for testing
-# hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing
-# hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo
-hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ"
-# hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well
-# hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM
-# hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses
-
 # Toggles whether UI should be run locally using gradio hot-reloading
 # or should be included in the remote Helm install
 run_ui_locally = True
@@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service')
 
 chart_yaml = helm(
     "chart/",
-    values="hu-dev-values.yml",
+    values="dev-values.yml",
     # Enable/disable remote UI install depending on if we're running it locally
     set=[
-        "huggingface.model={}".format(hf_model),
         "ui.enabled={}".format(not str(run_ui_locally).lower())
     ],
 )
 k8s_yaml(chart_yaml)
 
+# Parse LLM name from templated deployment
+api_deployment, _ = filter_yaml(chart_yaml, kind='Deployment', name='chart-api')
+hf_model = decode_yaml(api_deployment)['spec']['template']['spec']['containers'][0]['args'][1]
+
 if not run_ui_locally:
     # Port-forward web app to localhost:8080
     k8s_resource("chart-ui", port_forwards="8080:7680")