From 41ecada1b51ce3a46bbc3190a36ed7890db370d3 Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 03:01:33 +0000 Subject: [PATCH 1/6] Add missing a100 and t4 devices in values_sample.yaml --- charts/llm-engine/values_sample.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/charts/llm-engine/values_sample.yaml b/charts/llm-engine/values_sample.yaml index 7b2cbbf0..7af4780a 100644 --- a/charts/llm-engine/values_sample.yaml +++ b/charts/llm-engine/values_sample.yaml @@ -171,6 +171,20 @@ imageCache: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + - name: a100 + nodeSelector: + k8s.amazonaws.com/accelerator: nvidia-ampere-a100 + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + - name: t4 + nodeSelector: + k8s.amazonaws.com/accelerator: nvidia-tesla-t4 + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" # celeryBrokerType specifies the celery broker type for async endpoints (coming soon) celeryBrokerType: sqs From 5dbe7dd90dd463a16d9d9c724e16b65ec11fbacb Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 04:23:45 +0000 Subject: [PATCH 2/6] Update http-forwarder python command for streaming endpoint --- charts/llm-engine/templates/service_template_config_map.yaml | 2 +- .../templates/service_template_config_map_circleci.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/llm-engine/templates/service_template_config_map.yaml b/charts/llm-engine/templates/service_template_config_map.yaml index 87b992cf..1c1d16b2 100644 --- a/charts/llm-engine/templates/service_template_config_map.yaml +++ b/charts/llm-engine/templates/service_template_config_map.yaml @@ -221,7 +221,7 @@ data: - ddtrace-run - python - -m - - llm_engine.inference.forwarding.http_forwarder + - server.llm_engine_server.inference.forwarding.http_forwarder - --config - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml - --port diff --git a/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml b/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml index 41ffe75b..36aca8ea 100644 --- a/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml +++ b/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml @@ -1473,7 +1473,7 @@ data: - ddtrace-run - python - -m - - llm_engine.inference.forwarding.http_forwarder + - server.llm_engine_server.inference.forwarding.http_forwarder - --config - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml - --port @@ -3107,7 +3107,7 @@ data: - ddtrace-run - python - -m - - llm_engine.inference.forwarding.http_forwarder + - server.llm_engine_server.inference.forwarding.http_forwarder - --config - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml - --port From 1370d791a0056194617665fa6c76afebd5ae677b Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 04:24:45 +0000 Subject: [PATCH 3/6] Fix tgi repo name --- .../domain/use_cases/llm_model_endpoint_use_cases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 482a4519..8de7ef72 100644 --- a/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -227,7 +227,7 @@ async def create_text_generation_inference_bundle( schema_location="TBA", flavor=StreamingEnhancedRunnableImageFlavor( flavor=ModelBundleFlavorType.STREAMING_ENHANCED_RUNNABLE_IMAGE, - repository="text-generation-inference", # TODO: let user choose repo + repository="ghcr.io/huggingface/text-generation-inference", # TODO: let user choose repo tag=framework_image_tag, command=command, streaming_command=command, From 1a9207ab4aac663e5af759c8c0543bcdec244df9 Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 04:24:45 +0000 Subject: [PATCH 4/6] Fix tgi repo name --- charts/llm-engine/values_sample.yaml | 2 +- .../domain/use_cases/llm_model_endpoint_use_cases.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/llm-engine/values_sample.yaml b/charts/llm-engine/values_sample.yaml index 7af4780a..06d70362 100644 --- a/charts/llm-engine/values_sample.yaml +++ b/charts/llm-engine/values_sample.yaml @@ -1,7 +1,7 @@ # This is a YAML-formatted file. # tag [required] is the LLM Engine docker image tag -tag: 1defd4f9c5376149e27673e154731a0c7820fe5d +tag: 41ecada1b51ce3a46bbc3190a36ed7890db370d3 # context is a user-specified deployment tag. Can be used to context: production image: diff --git a/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 482a4519..8de7ef72 100644 --- a/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/server/llm_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -227,7 +227,7 @@ async def create_text_generation_inference_bundle( schema_location="TBA", flavor=StreamingEnhancedRunnableImageFlavor( flavor=ModelBundleFlavorType.STREAMING_ENHANCED_RUNNABLE_IMAGE, - repository="text-generation-inference", # TODO: let user choose repo + repository="ghcr.io/huggingface/text-generation-inference", # TODO: let user choose repo tag=framework_image_tag, command=command, streaming_command=command, From b9bae4e115ddeef0be9dd90f9eee3200b110d668 Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 21:55:40 +0000 Subject: [PATCH 5/6] Fix TGI http forwarder config path --- .../service_template_config_map.yaml | 6 +++--- .../service_template_config_map_circleci.yaml | 20 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/charts/llm-engine/templates/service_template_config_map.yaml b/charts/llm-engine/templates/service_template_config_map.yaml index 1c1d16b2..08ce1424 100644 --- a/charts/llm-engine/templates/service_template_config_map.yaml +++ b/charts/llm-engine/templates/service_template_config_map.yaml @@ -180,7 +180,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --http - production_threads - --port @@ -223,7 +223,7 @@ data: - -m - server.llm_engine_server.inference.forwarding.http_forwarder - --config - - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml + - /workspace/server/llm_engine_server/inference/configs/service--http_forwarder.yaml - --port - "${FORWARDER_PORT}" - --num-workers @@ -266,7 +266,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --queue - "${QUEUE}" - --task-visibility diff --git a/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml b/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml index 36aca8ea..3f2e519f 100644 --- a/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml +++ b/server/llm_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml @@ -114,7 +114,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --queue - "${QUEUE}" - --task-visibility @@ -383,7 +383,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --queue - "${QUEUE}" - --task-visibility @@ -805,7 +805,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --http - production_threads - --port @@ -1071,7 +1071,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --http - production_threads - --port @@ -1475,7 +1475,7 @@ data: - -m - server.llm_engine_server.inference.forwarding.http_forwarder - --config - - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml + - /workspace/server/llm_engine_server/inference/configs/service--http_forwarder.yaml - --port - "${FORWARDER_PORT}" - --num-workers @@ -1712,7 +1712,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --queue - "${QUEUE}" - --task-visibility @@ -1987,7 +1987,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --queue - "${QUEUE}" - --task-visibility @@ -2421,7 +2421,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --http - production_threads - --port @@ -2693,7 +2693,7 @@ data: - ddtrace-run - run-service - --config - - /workspace/llm_engine/llm_engine/inference/configs/${FORWARDER_CONFIG_FILE_NAME} + - /workspace/server/llm_engine_server/inference/configs/${FORWARDER_CONFIG_FILE_NAME} - --http - production_threads - --port @@ -3109,7 +3109,7 @@ data: - -m - server.llm_engine_server.inference.forwarding.http_forwarder - --config - - /workspace/llm_engine/llm_engine/inference/configs/service--http_forwarder.yaml + - /workspace/server/llm_engine_server/inference/configs/service--http_forwarder.yaml - --port - "${FORWARDER_PORT}" - --num-workers From 09dab97f6208e4da7c1e36415b9ac994557c5acc Mon Sep 17 00:00:00 2001 From: Jui-Tse Hung Date: Wed, 19 Jul 2023 22:33:54 +0000 Subject: [PATCH 6/6] Fix forwarder aws config mount path --- charts/llm-engine/templates/_helpers.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/llm-engine/templates/_helpers.tpl b/charts/llm-engine/templates/_helpers.tpl index 04c8168f..08af45f4 100644 --- a/charts/llm-engine/templates/_helpers.tpl +++ b/charts/llm-engine/templates/_helpers.tpl @@ -344,7 +344,7 @@ volumeMounts: {{- define "llmEngine.forwarderVolumeMounts" }} volumeMounts: - name: config-volume - mountPath: /root/.aws/config + mountPath: /home/user/.aws/config subPath: config - name: user-config mountPath: /workspace/user_config