Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__pycache__/
**/*.secret
.DS_Store
.tox/

# Ignore local dev helpers
test-values.y[a]ml
Expand Down
18 changes: 7 additions & 11 deletions Tiltfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
# The HuggingFace model to use for testing
# hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing
# hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo
hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ"
# hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well
# hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM
# hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses

# Toggles whether UI should be run locally using gradio hot-reloading
# or should be included in the remote Helm install
run_ui_locally = True
Expand All @@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service')

chart_yaml = helm(
"chart/",
values="hu-dev-values.yml",
values="dev-values.yml",
# Enable/disable remote UI install depending on if we're running it locally
set=[
"huggingface.model={}".format(hf_model),
"ui.enabled={}".format(not str(run_ui_locally).lower())
],
)
k8s_yaml(chart_yaml)

# Parse LLM name from templated deployment
api_deployment, _ = filter_yaml(chart_yaml, kind='Deployment', name='chart-api')
hf_model = decode_yaml(api_deployment)['spec']['template']['spec']['containers'][0]['args'][1]

if not run_ui_locally:
# Port-forward web app to localhost:8080
k8s_resource("chart-ui", port_forwards="8080:7680")
Expand Down Expand Up @@ -56,7 +51,8 @@ if run_ui_locally:
deps=["chart/web-app/"],
resource_deps=["gradio-app-venv"],
serve_cmd=" && ".join([
"source {}/bin/activate".format(venv_name),
"cd chart/web-app",
"python app.py {}".format(hf_model),
"python3 app.py {}".format(hf_model),
])
)
2 changes: 1 addition & 1 deletion chart/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ controls:
/huggingface/token:
type: TextControl
secret: true
/ui/appSettings/model_instruction:
/ui/appSettings/hf_model_instruction:
type: TextControl
/ui/appSettings/page_title:
type: TextControl
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/api/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ spec:
args:
- --model
- {{ .Values.huggingface.model }}
{{- if .Values.huggingface.chatTemplate }}
- --chat-template
- {{ quote .Values.huggingface.chatTemplate }}
{{- end -}}
{{- if .Values.api.extraArgs -}}
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
{{- end -}}
Expand Down
6 changes: 3 additions & 3 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
"appSettings": {
"type": "object",
"properties": {
"model_name": {
"hf_model_name": {
"type": "string",
"title": "Model Name",
"description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.",
"default": "mistralai/Mistral-7B-Instruct-v0.2"
},
"model_instruction": {
"hf_model_instruction": {
"type": "string",
"title": "Instruction",
"description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.",
Expand Down Expand Up @@ -75,7 +75,7 @@
}

},
"required": ["model_name", "model_instruction"]
"required": ["hf_model_name", "hf_model_instruction"]
}
}
}
Expand Down
13 changes: 11 additions & 2 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ huggingface:
# The name of the HuggingFace model to use
# Use a yaml anchor to avoid duplication elsewhere
model: &model-name ise-uiuc/Magicoder-S-DS-6.7B
# A Jinja formatted chat template to provide to the language model.
# See https://huggingface.co/blog/chat-templates for background info.
# If not provided, the default template specified in the HuggingFace
# model repository's tokenizer_config.json file is used. As explained
# in the above blog post, the HF template key in tokenizer_config.json
# is relatively new and not all HF models include a template in their
# repo files yet. This chart value provides a hook to manually apply the
# correct chat template for such models.
chatTemplate:

# For private/gated huggingface models (e.g. Meta's Llama models)
# you must provide your own huggingface token, for details see:
Expand Down Expand Up @@ -71,8 +80,8 @@ ui:
# The values to be written to settings.yml for parsing as frontend app setting
# (see example_app.py and config.py for example using pydantic-settings to configure app)
appSettings:
model_name: *model-name
model_instruction: "You are a helpful AI assistant. Please response appropriately."
hf_model_name: *model-name
hf_model_instruction: "You are a helpful AI assistant. Please response appropriately."
# Container image config
image:
repository: ghcr.io/stackhpc/azimuth-llm-ui-base
Expand Down
4 changes: 2 additions & 2 deletions chart/web-app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ def inference(latest_message, history):
if not BACKEND_INITIALISED:
logger.info("Backend API not yet ready")
gr.Info(
"Backend not ready - model may still be initialising - please try again later"
"Backend not ready - model may still be initialising - please try again later."
)
else:
logger.error("Failed to connect to backend API: %s", err)
gr.Warning("Failed to connect to backend API")
gr.Warning("Failed to connect to backend API.")

except openai.InternalServerError as err:
gr.Warning(
Expand Down
10 changes: 10 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[tox]
env_list =
format
minversion = 4.11.3

[testenv:format]
description = run code formatter on web-app
deps = black==23.12.1
skip_install = true
commands = black chart/web-app