From 77d78f5a890fdba24dd6ae106f357b90ecee2663 Mon Sep 17 00:00:00 2001 From: Ian Macleod Date: Mon, 7 Aug 2023 21:51:17 +0000 Subject: [PATCH 1/3] adding status field to model get response --- clients/python/llmengine/data_types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index baa9e087..7de40998 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -201,6 +201,9 @@ class GetLLMEndpointResponse(BaseModel): source: LLMSource = Field(description="The source of the model, e.g. Hugging Face.") """The source of the model, e.g. Hugging Face.""" + status: ModelEndpointStatus = Field(description="The status of the model.") + """The status of the model.""" + inference_framework: LLMInferenceFramework = Field( description="The inference framework used by the model." ) From 1b91d24028170c0e85592ac3ab30d4f03e6a7ab6 Mon Sep 17 00:00:00 2001 From: Ian Macleod Date: Mon, 7 Aug 2023 23:28:14 +0000 Subject: [PATCH 2/3] adding docs --- clients/python/llmengine/model.py | 1 + docs/api/data_types.md | 1 + docs/guides/endpoint_creation.md | 15 +++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 docs/guides/endpoint_creation.md diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py index 1c854eba..b96afa1b 100644 --- a/clients/python/llmengine/model.py +++ b/clients/python/llmengine/model.py @@ -242,6 +242,7 @@ def get( "name": "llama-2-7b.suffix.2023-07-18-12-00-00", "model_name": null, "source": "hugging_face", + "status": "READY", "inference_framework": "text_generation_inference", "inference_framework_tag": null, "num_shards": null, diff --git a/docs/api/data_types.md b/docs/api/data_types.md index 2d53a3bf..b932fa70 100644 --- a/docs/api/data_types.md +++ b/docs/api/data_types.md @@ -35,6 +35,7 @@ - inference_framework - id - model_name + - status - inference_framework_tag - num_shards - quantize diff --git a/docs/guides/endpoint_creation.md b/docs/guides/endpoint_creation.md new file mode 100644 index 00000000..2a51d0bc --- /dev/null +++ b/docs/guides/endpoint_creation.md @@ -0,0 +1,15 @@ +When creating a model endpoint, you can periodically poll the model status field to +track the status of your model endpoint. In general, you'll need to wait after the +model creation step for the model endpoint to be ready and available for use. +An example is provided below: + +*Assuming the user has created a model named "llama-2-7b.suffix.2023-07-18-12-00-00"* +``` +model_name = "llama-2-7b.suffix.2023-07-18-12-00-00" +response = Model.get(model_name) +while response.status != "READY": + time.sleep(60) + response = Model.get(model_name) +``` + +Once the endpoint status is ready, you can use your newly created model for inference. \ No newline at end of file From c4e8987d44f96d36738a0dce97230648c63465a1 Mon Sep 17 00:00:00 2001 From: Ian Macleod Date: Mon, 7 Aug 2023 23:31:08 +0000 Subject: [PATCH 3/3] adding enum values --- clients/python/llmengine/data_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index 7de40998..26e5d996 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -202,7 +202,7 @@ class GetLLMEndpointResponse(BaseModel): """The source of the model, e.g. Hugging Face.""" status: ModelEndpointStatus = Field(description="The status of the model.") - """The status of the model.""" + """The status of the model (can be one of "READY", "UPDATE_PENDING", "UPDATE_IN_PROGRESS", "UPDATE_FAILED", "DELETE_IN_PROGRESS").""" inference_framework: LLMInferenceFramework = Field( description="The inference framework used by the model."