From e0ce655ce306e0295403f4635534fbe4b831621b Mon Sep 17 00:00:00 2001 From: Yunfeng Bai Date: Fri, 1 Sep 2023 22:52:59 +0000 Subject: [PATCH] Add comments --- model-engine/model_engine_server/api/worker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/model-engine/model_engine_server/api/worker.py b/model-engine/model_engine_server/api/worker.py index 945614da..d08113b5 100644 --- a/model-engine/model_engine_server/api/worker.py +++ b/model-engine/model_engine_server/api/worker.py @@ -1,5 +1,7 @@ from uvicorn.workers import UvicornWorker +# Gunicorn returns 503 instead of 429 when concurrency exceeds the limit, before adding rate limiting just increase the concurrency +# We'll autoscale at target concurrency of a much lower number (around 50), and this just makes sure we don't 503 with bursty traffic CONCURRENCY_LIMIT = 1000