diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index e17e755a..c0dc185f 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -201,6 +201,9 @@ class GetLLMEndpointResponse(BaseModel): source: LLMSource = Field(description="The source of the model, e.g. Hugging Face.") """The source of the model, e.g. Hugging Face.""" + status: ModelEndpointStatus = Field(description="The status of the model.") + """The status of the model (can be one of "READY", "UPDATE_PENDING", "UPDATE_IN_PROGRESS", "UPDATE_FAILED", "DELETE_IN_PROGRESS").""" + inference_framework: LLMInferenceFramework = Field( description="The inference framework used by the model." ) diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py index 1c854eba..b96afa1b 100644 --- a/clients/python/llmengine/model.py +++ b/clients/python/llmengine/model.py @@ -242,6 +242,7 @@ def get( "name": "llama-2-7b.suffix.2023-07-18-12-00-00", "model_name": null, "source": "hugging_face", + "status": "READY", "inference_framework": "text_generation_inference", "inference_framework_tag": null, "num_shards": null, diff --git a/docs/api/data_types.md b/docs/api/data_types.md index 2d53a3bf..b932fa70 100644 --- a/docs/api/data_types.md +++ b/docs/api/data_types.md @@ -35,6 +35,7 @@ - inference_framework - id - model_name + - status - inference_framework_tag - num_shards - quantize diff --git a/docs/guides/endpoint_creation.md b/docs/guides/endpoint_creation.md new file mode 100644 index 00000000..2a51d0bc --- /dev/null +++ b/docs/guides/endpoint_creation.md @@ -0,0 +1,15 @@ +When creating a model endpoint, you can periodically poll the model status field to +track the status of your model endpoint. In general, you'll need to wait after the +model creation step for the model endpoint to be ready and available for use. +An example is provided below: + +*Assuming the user has created a model named "llama-2-7b.suffix.2023-07-18-12-00-00"* +``` +model_name = "llama-2-7b.suffix.2023-07-18-12-00-00" +response = Model.get(model_name) +while response.status != "READY": + time.sleep(60) + response = Model.get(model_name) +``` + +Once the endpoint status is ready, you can use your newly created model for inference. \ No newline at end of file