From 272157792d78b2f7aedfc83bc0953b06bf84ce7c Mon Sep 17 00:00:00 2001 From: sd109 Date: Mon, 18 Mar 2024 14:23:08 +0000 Subject: [PATCH 1/5] Add tox formatter config --- .gitignore | 1 + tox.ini | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index a0b78a6..e916d76 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ __pycache__/ **/*.secret .DS_Store +.tox/ # Ignore local dev helpers test-values.y[a]ml diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..d813a5d --- /dev/null +++ b/tox.ini @@ -0,0 +1,10 @@ +[tox] +env_list = + format +minversion = 4.11.3 + +[testenv:format] +description = run code formatter on web-app +deps = black==23.12.1 +skip_install = true +commands = black chart/web-app \ No newline at end of file From 630aa008d447f44b77b48d2c74204c8b0b89c7e0 Mon Sep 17 00:00:00 2001 From: sd109 Date: Mon, 18 Mar 2024 14:27:51 +0000 Subject: [PATCH 2/5] Ensure venv is activated correctly on tilt up --- Tiltfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tiltfile b/Tiltfile index abb119e..4d54978 100644 --- a/Tiltfile +++ b/Tiltfile @@ -56,7 +56,8 @@ if run_ui_locally: deps=["chart/web-app/"], resource_deps=["gradio-app-venv"], serve_cmd=" && ".join([ + "source {}/bin/activate".format(venv_name), "cd chart/web-app", - "python app.py {}".format(hf_model), + "python3 app.py {}".format(hf_model), ]) ) \ No newline at end of file From 89c0079eb79c505f9eb901f0763aee4e9ca7ab80 Mon Sep 17 00:00:00 2001 From: sd109 Date: Mon, 18 Mar 2024 14:31:19 +0000 Subject: [PATCH 3/5] Punctuation --- chart/web-app/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/web-app/app.py b/chart/web-app/app.py index ad43b1a..5a5785a 100644 --- a/chart/web-app/app.py +++ b/chart/web-app/app.py @@ -100,11 +100,11 @@ def inference(latest_message, history): if not BACKEND_INITIALISED: logger.info("Backend API not yet ready") gr.Info( - "Backend not ready - model may still be initialising - please try again later" + "Backend not ready - model may still be initialising - please try again later." ) else: logger.error("Failed to connect to backend API: %s", err) - gr.Warning("Failed to connect to backend API") + gr.Warning("Failed to connect to backend API.") except openai.InternalServerError as err: gr.Warning( From 5ec91aed42597e65770c79909a6de644b026643a Mon Sep 17 00:00:00 2001 From: sd109 Date: Mon, 18 Mar 2024 15:20:26 +0000 Subject: [PATCH 4/5] Add chartTemplate config value Also fix missed renames of model_ -> hf_model_ config options. --- chart/azimuth-ui.schema.yaml | 2 +- chart/templates/api/deployment.yml | 4 ++++ chart/values.schema.json | 6 +++--- chart/values.yaml | 13 +++++++++++-- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/chart/azimuth-ui.schema.yaml b/chart/azimuth-ui.schema.yaml index e601d3d..5cd2d8a 100644 --- a/chart/azimuth-ui.schema.yaml +++ b/chart/azimuth-ui.schema.yaml @@ -5,7 +5,7 @@ controls: /huggingface/token: type: TextControl secret: true - /ui/appSettings/model_instruction: + /ui/appSettings/hf_model_instruction: type: TextControl /ui/appSettings/page_title: type: TextControl diff --git a/chart/templates/api/deployment.yml b/chart/templates/api/deployment.yml index f6e3ea5..33954f2 100644 --- a/chart/templates/api/deployment.yml +++ b/chart/templates/api/deployment.yml @@ -28,6 +28,10 @@ spec: args: - --model - {{ .Values.huggingface.model }} + {{- if .Values.huggingface.chatTemplate }} + - --chat-template + - {{ quote .Values.huggingface.chatTemplate }} + {{- end -}} {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} diff --git a/chart/values.schema.json b/chart/values.schema.json index b9f56e4..c4082b3 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -25,13 +25,13 @@ "appSettings": { "type": "object", "properties": { - "model_name": { + "hf_model_name": { "type": "string", "title": "Model Name", "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.", "default": "mistralai/Mistral-7B-Instruct-v0.2" }, - "model_instruction": { + "hf_model_instruction": { "type": "string", "title": "Instruction", "description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.", @@ -75,7 +75,7 @@ } }, - "required": ["model_name", "model_instruction"] + "required": ["hf_model_name", "hf_model_instruction"] } } } diff --git a/chart/values.yaml b/chart/values.yaml index 3e016a8..e338b45 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -6,6 +6,15 @@ huggingface: # The name of the HuggingFace model to use # Use a yaml anchor to avoid duplication elsewhere model: &model-name ise-uiuc/Magicoder-S-DS-6.7B + # A Jinja formatted chat template to provide to the language model. + # See https://huggingface.co/blog/chat-templates for background info. + # If not provided, the default template specified in the HuggingFace + # model repository's tokenizer_config.json file is used. As explained + # in the above blog post, the HF template key in tokenizer_config.json + # is relatively new and not all HF models include a template in their + # repo files yet. This chart value provides a hook to manually apply the + # correct chat template for such models. + chatTemplate: # For private/gated huggingface models (e.g. Meta's Llama models) # you must provide your own huggingface token, for details see: @@ -71,8 +80,8 @@ ui: # The values to be written to settings.yml for parsing as frontend app setting # (see example_app.py and config.py for example using pydantic-settings to configure app) appSettings: - model_name: *model-name - model_instruction: "You are a helpful AI assistant. Please response appropriately." + hf_model_name: *model-name + hf_model_instruction: "You are a helpful AI assistant. Please response appropriately." # Container image config image: repository: ghcr.io/stackhpc/azimuth-llm-ui-base From 0381fdf2b7c8fe4568836848030175b21f06482f Mon Sep 17 00:00:00 2001 From: sd109 Date: Mon, 18 Mar 2024 15:39:10 +0000 Subject: [PATCH 5/5] Move test model name out of Tiltfile --- Tiltfile | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Tiltfile b/Tiltfile index 4d54978..73adff2 100644 --- a/Tiltfile +++ b/Tiltfile @@ -1,11 +1,3 @@ -# The HuggingFace model to use for testing -# hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing -# hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo -hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ" -# hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well -# hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM -# hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses - # Toggles whether UI should be run locally using gradio hot-reloading # or should be included in the remote Helm install run_ui_locally = True @@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service') chart_yaml = helm( "chart/", - values="hu-dev-values.yml", + values="dev-values.yml", # Enable/disable remote UI install depending on if we're running it locally set=[ - "huggingface.model={}".format(hf_model), "ui.enabled={}".format(not str(run_ui_locally).lower()) ], ) k8s_yaml(chart_yaml) +# Parse LLM name from templated deployment +api_deployment, _ = filter_yaml(chart_yaml, kind='Deployment', name='chart-api') +hf_model = decode_yaml(api_deployment)['spec']['template']['spec']['containers'][0]['args'][1] + if not run_ui_locally: # Port-forward web app to localhost:8080 k8s_resource("chart-ui", port_forwards="8080:7680")