From b173b8d480a5ba213fa11dffb0d4f52bb0e77ae2 Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 21:59:17 +0000 Subject: [PATCH 01/13] initial code not clean --- .../domain/entities/model_bundle_entity.py | 1 + .../configs/service--http_forwarder.yaml | 6 +++-- .../inference/forwarding/http_forwarder.py | 26 ++++++++++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index 172f84f2..a6eff6f1 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -163,6 +163,7 @@ class RunnableImageLike(BaseModel, ABC): protocol: Literal["http"] # TODO: add support for other protocols (e.g. grpc) readiness_initial_delay_seconds: int = 120 extra_routes: List[str] = Field(default_factory=list) + routes: Optional[List[str]] = None forwarder_type: Optional[str] = ForwarderType.DEFAULT.value worker_command: Optional[List[str]] = None worker_env: Optional[Dict[str, str]] = None diff --git a/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml b/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml index 56641fa0..94ad314e 100644 --- a/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml +++ b/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml @@ -9,7 +9,8 @@ forwarder: model_engine_unwrap: true serialize_results_as_string: true forward_http_status: true - extra_routes: [] + extra_routes: [] # Legacy field - still supported for backwards compatibility + # routes: [] # New field - can be used alongside or instead of extra_routes stream: user_port: 5005 user_hostname: "localhost" @@ -18,6 +19,7 @@ forwarder: batch_route: null model_engine_unwrap: true serialize_results_as_string: false - extra_routes: [] + extra_routes: [] # Legacy field - still supported for backwards compatibility + # routes: [] # New field - can be used alongside or instead of extra_routes max_concurrency: 100 diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index d0d9a1bd..8cdc81e6 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -277,15 +277,25 @@ def healthcheck(): return "OK" def add_extra_sync_or_stream_routes(app: FastAPI): - """Read extra_routes from config and dynamically add routes to app""" + """Read routes from config (both old extra_routes and new routes field) and dynamically add routes to app""" config = get_config() sync_forwarders: Dict[str, Forwarder] = dict() stream_forwarders: Dict[str, StreamingForwarder] = dict() + + # Handle legacy extra_routes configuration (backwards compatibility) for route in config.get("sync", {}).get("extra_routes", []): sync_forwarders[route] = load_forwarder(route) for route in config.get("stream", {}).get("extra_routes", []): stream_forwarders[route] = load_streaming_forwarder(route) + # Handle new routes field configuration + for route in config.get("sync", {}).get("routes", []): + if route not in sync_forwarders: # Avoid duplicates + sync_forwarders[route] = load_forwarder(route) + for route in config.get("stream", {}).get("routes", []): + if route not in stream_forwarders: # Avoid duplicates + stream_forwarders[route] = load_streaming_forwarder(route) + all_routes = set(list(sync_forwarders.keys()) + list(stream_forwarders.keys())) for route in all_routes: @@ -327,9 +337,16 @@ def add_stream_passthrough_routes(app: FastAPI): config = get_config() passthrough_forwarders: Dict[str, PassthroughForwarder] = dict() + + # Handle legacy extra_routes configuration (backwards compatibility) for route in config.get("stream", {}).get("extra_routes", []): passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) + # Handle new routes field configuration + for route in config.get("stream", {}).get("routes", []): + if route not in passthrough_forwarders: # Avoid duplicates + passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) + for route in passthrough_forwarders: def get_passthrough_forwarder(route=route): @@ -352,9 +369,16 @@ def add_sync_passthrough_routes(app: FastAPI): config = get_config() passthrough_forwarders: Dict[str, PassthroughForwarder] = dict() + + # Handle legacy extra_routes configuration (backwards compatibility) for route in config.get("sync", {}).get("extra_routes", []): passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) + # Handle new routes field configuration + for route in config.get("sync", {}).get("routes", []): + if route not in passthrough_forwarders: # Avoid duplicates + passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) + for route in passthrough_forwarders: def get_passthrough_forwarder(route=route): From 657d5586d65a18c2d036a728c54f4cd895233bbd Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 22:12:15 +0000 Subject: [PATCH 02/13] adding /predict and /stream like the other routes? --- .../inference/forwarding/http_forwarder.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 8cdc81e6..752a17ca 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -276,7 +276,7 @@ async def init_app(): def healthcheck(): return "OK" - def add_extra_sync_or_stream_routes(app: FastAPI): + def add_sync_or_stream_routes(app: FastAPI): """Read routes from config (both old extra_routes and new routes field) and dynamically add routes to app""" config = get_config() sync_forwarders: Dict[str, Forwarder] = dict() @@ -296,6 +296,10 @@ def add_extra_sync_or_stream_routes(app: FastAPI): if route not in stream_forwarders: # Avoid duplicates stream_forwarders[route] = load_streaming_forwarder(route) + # Add hardcoded routes to forwarders so they get handled consistently + sync_forwarders["/predict"] = load_forwarder(None) + stream_forwarders["/stream"] = load_streaming_forwarder(None) + all_routes = set(list(sync_forwarders.keys()) + list(stream_forwarders.keys())) for route in all_routes: @@ -408,8 +412,8 @@ def add_extra_routes(app: FastAPI): app.add_api_route(path="/healthz", endpoint=healthcheck, methods=["GET"]) app.add_api_route(path="/readyz", endpoint=healthcheck, methods=["GET"]) - app.add_api_route(path="/predict", endpoint=predict, methods=["POST"]) - app.add_api_route(path="/stream", endpoint=stream, methods=["POST"]) + # app.add_api_route(path="/predict", endpoint=predict, methods=["POST"]) + # app.add_api_route(path="/stream", endpoint=stream, methods=["POST"]) add_extra_routes(app) return app From d0ae24b6adbb0a05d5d629938e00a2ca2a218076 Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 22:34:33 +0000 Subject: [PATCH 03/13] revisions --- .../domain/entities/model_bundle_entity.py | 2 +- .../configs/service--http_forwarder.yaml | 6 +-- .../inference/forwarding/http_forwarder.py | 49 ++++++++++--------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index a6eff6f1..66128471 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -163,7 +163,7 @@ class RunnableImageLike(BaseModel, ABC): protocol: Literal["http"] # TODO: add support for other protocols (e.g. grpc) readiness_initial_delay_seconds: int = 120 extra_routes: List[str] = Field(default_factory=list) - routes: Optional[List[str]] = None + routes: Optional[List[str]] = Field(default_factory=list) forwarder_type: Optional[str] = ForwarderType.DEFAULT.value worker_command: Optional[List[str]] = None worker_env: Optional[Dict[str, str]] = None diff --git a/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml b/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml index 94ad314e..56641fa0 100644 --- a/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml +++ b/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml @@ -9,8 +9,7 @@ forwarder: model_engine_unwrap: true serialize_results_as_string: true forward_http_status: true - extra_routes: [] # Legacy field - still supported for backwards compatibility - # routes: [] # New field - can be used alongside or instead of extra_routes + extra_routes: [] stream: user_port: 5005 user_hostname: "localhost" @@ -19,7 +18,6 @@ forwarder: batch_route: null model_engine_unwrap: true serialize_results_as_string: false - extra_routes: [] # Legacy field - still supported for backwards compatibility - # routes: [] # New field - can be used alongside or instead of extra_routes + extra_routes: [] max_concurrency: 100 diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 752a17ca..250d5f40 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -283,19 +283,24 @@ def add_sync_or_stream_routes(app: FastAPI): stream_forwarders: Dict[str, StreamingForwarder] = dict() # Handle legacy extra_routes configuration (backwards compatibility) - for route in config.get("sync", {}).get("extra_routes", []): + sync_routes_to_add = set() + stream_routes_to_add = set() + sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) + sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) + + if config.get("sync", {}).get("predict_route", None) is None: + sync_routes_to_add.add("/predict") + + stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) + stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) + if config.get("stream", {}).get("predict_route", None) is None: + stream_routes_to_add.add("/stream") + + for route in sync_routes_to_add: sync_forwarders[route] = load_forwarder(route) - for route in config.get("stream", {}).get("extra_routes", []): + for route in stream_routes_to_add: stream_forwarders[route] = load_streaming_forwarder(route) - # Handle new routes field configuration - for route in config.get("sync", {}).get("routes", []): - if route not in sync_forwarders: # Avoid duplicates - sync_forwarders[route] = load_forwarder(route) - for route in config.get("stream", {}).get("routes", []): - if route not in stream_forwarders: # Avoid duplicates - stream_forwarders[route] = load_streaming_forwarder(route) - # Add hardcoded routes to forwarders so they get handled consistently sync_forwarders["/predict"] = load_forwarder(None) stream_forwarders["/stream"] = load_streaming_forwarder(None) @@ -343,13 +348,12 @@ def add_stream_passthrough_routes(app: FastAPI): passthrough_forwarders: Dict[str, PassthroughForwarder] = dict() # Handle legacy extra_routes configuration (backwards compatibility) - for route in config.get("stream", {}).get("extra_routes", []): - passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) + stream_passthrough_routes_to_add = set() + stream_passthrough_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) + stream_passthrough_routes_to_add.update(config.get("stream", {}).get("routes", [])) - # Handle new routes field configuration - for route in config.get("stream", {}).get("routes", []): - if route not in passthrough_forwarders: # Avoid duplicates - passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) + for route in stream_passthrough_routes_to_add: + passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) for route in passthrough_forwarders: @@ -375,13 +379,12 @@ def add_sync_passthrough_routes(app: FastAPI): passthrough_forwarders: Dict[str, PassthroughForwarder] = dict() # Handle legacy extra_routes configuration (backwards compatibility) - for route in config.get("sync", {}).get("extra_routes", []): - passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) + sync_passthrough_routes_to_add = set() + sync_passthrough_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) + sync_passthrough_routes_to_add.update(config.get("sync", {}).get("routes", [])) - # Handle new routes field configuration - for route in config.get("sync", {}).get("routes", []): - if route not in passthrough_forwarders: # Avoid duplicates - passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) + for route in sync_passthrough_routes_to_add: + passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) for route in passthrough_forwarders: @@ -408,7 +411,7 @@ def add_extra_routes(app: FastAPI): elif config.get("sync", {}).get("forwarder_type") == "passthrough": add_sync_passthrough_routes(app) else: - add_extra_sync_or_stream_routes(app) + add_sync_or_stream_routes(app) app.add_api_route(path="/healthz", endpoint=healthcheck, methods=["GET"]) app.add_api_route(path="/readyz", endpoint=healthcheck, methods=["GET"]) From 9841d338dae087a35dd06bb576aa9face058bf41 Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 22:36:08 +0000 Subject: [PATCH 04/13] fix --- .../model_engine_server/inference/forwarding/http_forwarder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 250d5f40..5bb493b3 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -301,9 +301,6 @@ def add_sync_or_stream_routes(app: FastAPI): for route in stream_routes_to_add: stream_forwarders[route] = load_streaming_forwarder(route) - # Add hardcoded routes to forwarders so they get handled consistently - sync_forwarders["/predict"] = load_forwarder(None) - stream_forwarders["/stream"] = load_streaming_forwarder(None) all_routes = set(list(sync_forwarders.keys()) + list(stream_forwarders.keys())) From df4a53d0cc6e9bb295aa0ea53cd189118d5258c0 Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 22:38:15 +0000 Subject: [PATCH 05/13] comments --- .../inference/forwarding/http_forwarder.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 5bb493b3..b313206b 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -282,20 +282,22 @@ def add_sync_or_stream_routes(app: FastAPI): sync_forwarders: Dict[str, Forwarder] = dict() stream_forwarders: Dict[str, StreamingForwarder] = dict() - # Handle legacy extra_routes configuration (backwards compatibility) + # Gather all sync routes from extra_routes and routes fields sync_routes_to_add = set() - stream_routes_to_add = set() sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) if config.get("sync", {}).get("predict_route", None) is None: sync_routes_to_add.add("/predict") + # Gather all stream routes from extra_routes and routes fields + stream_routes_to_add = set() stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) if config.get("stream", {}).get("predict_route", None) is None: stream_routes_to_add.add("/stream") + # Load forwarders for all routes for route in sync_routes_to_add: sync_forwarders[route] = load_forwarder(route) for route in stream_routes_to_add: @@ -344,11 +346,12 @@ def add_stream_passthrough_routes(app: FastAPI): passthrough_forwarders: Dict[str, PassthroughForwarder] = dict() - # Handle legacy extra_routes configuration (backwards compatibility) + # Gather all routes from extra_routes and routes fields stream_passthrough_routes_to_add = set() stream_passthrough_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_passthrough_routes_to_add.update(config.get("stream", {}).get("routes", [])) + # Load passthrough forwarders for all routes for route in stream_passthrough_routes_to_add: passthrough_forwarders[route] = load_stream_passthrough_forwarder(route) From 4fbc8229f5ab096a6c3e9f892914bfc47737a05c Mon Sep 17 00:00:00 2001 From: meher-m Date: Tue, 23 Sep 2025 22:44:01 +0000 Subject: [PATCH 06/13] add default paths in passthrough also --- .../inference/forwarding/http_forwarder.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index b313206b..5f6a2503 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -287,14 +287,14 @@ def add_sync_or_stream_routes(app: FastAPI): sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) - if config.get("sync", {}).get("predict_route", None) is None: + if config.get("sync", {}).get("predict_route", None) != "/predict": sync_routes_to_add.add("/predict") # Gather all stream routes from extra_routes and routes fields stream_routes_to_add = set() stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) - if config.get("stream", {}).get("predict_route", None) is None: + if config.get("stream", {}).get("predict_route", None) != "/stream": stream_routes_to_add.add("/stream") # Load forwarders for all routes @@ -350,6 +350,8 @@ def add_stream_passthrough_routes(app: FastAPI): stream_passthrough_routes_to_add = set() stream_passthrough_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_passthrough_routes_to_add.update(config.get("stream", {}).get("routes", [])) + if config.get("stream", {}).get("predict_route", None) != "/stream": + stream_passthrough_routes_to_add.add("/stream") # Load passthrough forwarders for all routes for route in stream_passthrough_routes_to_add: @@ -382,6 +384,8 @@ def add_sync_passthrough_routes(app: FastAPI): sync_passthrough_routes_to_add = set() sync_passthrough_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_passthrough_routes_to_add.update(config.get("sync", {}).get("routes", [])) + if config.get("sync", {}).get("predict_route", None) != "/predict": + sync_passthrough_routes_to_add.add("/predict") for route in sync_passthrough_routes_to_add: passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) From 87c5f9f14da10e18635374892aeaf5989affda3d Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 16:26:42 +0000 Subject: [PATCH 07/13] fix and tested --- model-engine/README.md | 35 +++++++++++++++++++ .../inference/forwarding/http_forwarder.py | 4 +++ 2 files changed, 39 insertions(+) diff --git a/model-engine/README.md b/model-engine/README.md index 3f6b9579..385afd9a 100644 --- a/model-engine/README.md +++ b/model-engine/README.md @@ -42,6 +42,41 @@ Run `mypy . --install-types` to set up mypy. Most of the business logic in Model Engine should contain unit tests, located in [`tests/unit`](./tests/unit). To run the tests, run `pytest`. +### Testing the http_forwarder + +First have some endpoint running on port 5005 +```sh +(llm-engine-vllm) ➜ vllm git:(dmchoi/vllm_batch_upgrade) ✗ export IMAGE=692474966980.dkr.ecr.us-west-2.amazonaws.com/vllm:0.10.1.1-rc2 +(llm-engine-vllm) ➜ vllm git:(dmchoi/vllm_batch_upgrade) ✗ export MODEL=meta-llama/Meta-Llama-3.1-8B-Instruct && export MODEL_PATH=/data/model_files/$MODEL +(llm-engine-vllm) ➜ vllm git:(dmchoi/vllm_batch_upgrade) ✗ export REPO_PATH=/mnt/home/dmchoi/repos/scale +(llm-engine-vllm) ➜ vllm git:(dmchoi/vllm_batch_upgrade) ✗ docker kill vlll; docker rm vllm; docker run \ + --runtime nvidia \ + --shm-size=16gb \ + --gpus '"device=0,1,2,3"' \ + -v $MODEL_PATH:/workspace/model_files:ro \ + -v ${REPO_PATH}/llm-engine/model-engine/model_engine_server/inference/vllm/vllm_server.py:/workspace/vllm_server.py \ + -p 5005:5005 \ + --name vllm \ + ${IMAGE} \ + python -m vllm_server --model model_files --port 5005 --disable-log-requests --max-model-len 4096 --max-num-seqs 16 --enforce-eager +``` + +Then you can run the forwarder locally like this +```sh +GIT_TAG=test python model_engine_server/inference/forwarding/http_forwarder.py \ + --config model_engine_server/inference/configs/service--http_forwarder.yaml \ + --num-workers 1 \ + --set "forwarder.sync.extra_routes=['/v1/chat/completions','/v1/completions']" \ + --set "forwarder.stream.extra_routes=['/v1/chat/completions','/v1/completions']" \ + --set "forwarder.sync.healthcheck_route=/health" \ + --set "forwarder.stream.healthcheck_route=/health" +``` + +Then you can hit the forwarder like this +```sh + curl -X POST localhost:5000/v1/chat/completions -H "Content-Type: application/json" -d "{\"args\": {\"model\":\"$MODEL\", \"messages\":[{\"role\": \"systemr\", \"content\": \"Hey, what's the temperature in Paris right now?\"}],\"max_tokens\":100,\"temperature\":0.2,\"guided_regex\":\"Sean.*\"}}" +``` + ## Generating OpenAI types We've decided to make our V2 APIs OpenAI compatible. We generate the corresponding Pydantic models: diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 5f6a2503..0cac9de2 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -41,6 +41,8 @@ def get_forwarder_loader(destination_path: Optional[str] = None) -> LoadForwarde config = get_config()["sync"] if "extra_routes" in config: del config["extra_routes"] + if "routes" in config: + del config["routes"] if destination_path: config["predict_route"] = destination_path if "forwarder_type" in config: @@ -55,6 +57,8 @@ def get_streaming_forwarder_loader( config = get_config()["stream"] if "extra_routes" in config: del config["extra_routes"] + if "routes" in config: + del config["routes"] if destination_path: config["predict_route"] = destination_path if "forwarder_type" in config: From 9733132447f01d53e62b6fc64389c3df4ac2274d Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 16:29:48 +0000 Subject: [PATCH 08/13] revisions --- .../domain/entities/model_bundle_entity.py | 2 +- .../inference/forwarding/http_forwarder.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index 66128471..2a5a4863 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -163,7 +163,7 @@ class RunnableImageLike(BaseModel, ABC): protocol: Literal["http"] # TODO: add support for other protocols (e.g. grpc) readiness_initial_delay_seconds: int = 120 extra_routes: List[str] = Field(default_factory=list) - routes: Optional[List[str]] = Field(default_factory=list) + routes: List[str] = Field(default_factory=list) forwarder_type: Optional[str] = ForwarderType.DEFAULT.value worker_command: Optional[List[str]] = None worker_env: Optional[Dict[str, str]] = None diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 0cac9de2..53dbf971 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -354,8 +354,6 @@ def add_stream_passthrough_routes(app: FastAPI): stream_passthrough_routes_to_add = set() stream_passthrough_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_passthrough_routes_to_add.update(config.get("stream", {}).get("routes", [])) - if config.get("stream", {}).get("predict_route", None) != "/stream": - stream_passthrough_routes_to_add.add("/stream") # Load passthrough forwarders for all routes for route in stream_passthrough_routes_to_add: @@ -388,8 +386,6 @@ def add_sync_passthrough_routes(app: FastAPI): sync_passthrough_routes_to_add = set() sync_passthrough_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_passthrough_routes_to_add.update(config.get("sync", {}).get("routes", [])) - if config.get("sync", {}).get("predict_route", None) != "/predict": - sync_passthrough_routes_to_add.add("/predict") for route in sync_passthrough_routes_to_add: passthrough_forwarders[route] = load_sync_passthrough_forwarder(route) From 977dff634c4d58bac93ff9c3ab6d6057447ff97e Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 16:40:10 +0000 Subject: [PATCH 09/13] reformat --- .../model_engine_server/inference/forwarding/http_forwarder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 53dbf971..04e9abd0 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -307,7 +307,6 @@ def add_sync_or_stream_routes(app: FastAPI): for route in stream_routes_to_add: stream_forwarders[route] = load_streaming_forwarder(route) - all_routes = set(list(sync_forwarders.keys()) + list(stream_forwarders.keys())) for route in all_routes: From 1b129b4c24915415bd15089468dd18b9b6262eda Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 17:13:44 +0000 Subject: [PATCH 10/13] fix for unit test --- .../inference/forwarding/http_forwarder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 04e9abd0..eaf1e6bb 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -291,14 +291,14 @@ def add_sync_or_stream_routes(app: FastAPI): sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) - if config.get("sync", {}).get("predict_route", None) != "/predict": + if config.get("sync", {}).get("predict_route", None) == "/predict": sync_routes_to_add.add("/predict") # Gather all stream routes from extra_routes and routes fields stream_routes_to_add = set() stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) - if config.get("stream", {}).get("predict_route", None) != "/stream": + if config.get("stream", {}).get("predict_route", None) == "/stream": stream_routes_to_add.add("/stream") # Load forwarders for all routes From 3002c02f33e6b8240f4e73ae22206a05dd9309c7 Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 18:11:01 +0000 Subject: [PATCH 11/13] remove comments --- .../model_engine_server/inference/forwarding/http_forwarder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index eaf1e6bb..70bb69ba 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -418,8 +418,6 @@ def add_extra_routes(app: FastAPI): app.add_api_route(path="/healthz", endpoint=healthcheck, methods=["GET"]) app.add_api_route(path="/readyz", endpoint=healthcheck, methods=["GET"]) - # app.add_api_route(path="/predict", endpoint=predict, methods=["POST"]) - # app.add_api_route(path="/stream", endpoint=stream, methods=["POST"]) add_extra_routes(app) return app From 081be7cc1e283a52efba9369d0a246dafb9a241c Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 18:15:38 +0000 Subject: [PATCH 12/13] revise --- .../inference/forwarding/http_forwarder.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 70bb69ba..07304023 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -291,15 +291,18 @@ def add_sync_or_stream_routes(app: FastAPI): sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) - if config.get("sync", {}).get("predict_route", None) == "/predict": - sync_routes_to_add.add("/predict") + predict_route = config.get("sync", {}).get("predict_route", None) + if predict_route: + sync_routes_to_add.add(predict_route) # Gather all stream routes from extra_routes and routes fields stream_routes_to_add = set() stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) - if config.get("stream", {}).get("predict_route", None) == "/stream": - stream_routes_to_add.add("/stream") + + stream_predict_route = config.get("stream", {}).get("predict_route", None) + if stream_predict_route: + stream_routes_to_add.add(stream_predict_route) # Load forwarders for all routes for route in sync_routes_to_add: From 3748c750da1f76c1e12950661664649eaaf521b7 Mon Sep 17 00:00:00 2001 From: meher-m Date: Wed, 24 Sep 2025 19:35:54 +0000 Subject: [PATCH 13/13] test update --- .../inference/forwarding/http_forwarder.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 07304023..eb32b180 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -291,18 +291,18 @@ def add_sync_or_stream_routes(app: FastAPI): sync_routes_to_add.update(config.get("sync", {}).get("extra_routes", [])) sync_routes_to_add.update(config.get("sync", {}).get("routes", [])) - predict_route = config.get("sync", {}).get("predict_route", None) - if predict_route: - sync_routes_to_add.add(predict_route) + # predict_route = config.get("sync", {}).get("predict_route", None) + # if predict_route: + # sync_routes_to_add.add(predict_route) # Gather all stream routes from extra_routes and routes fields stream_routes_to_add = set() stream_routes_to_add.update(config.get("stream", {}).get("extra_routes", [])) stream_routes_to_add.update(config.get("stream", {}).get("routes", [])) - stream_predict_route = config.get("stream", {}).get("predict_route", None) - if stream_predict_route: - stream_routes_to_add.add(stream_predict_route) + # stream_predict_route = config.get("stream", {}).get("predict_route", None) + # if stream_predict_route: + # stream_routes_to_add.add(stream_predict_route) # Load forwarders for all routes for route in sync_routes_to_add: @@ -421,6 +421,8 @@ def add_extra_routes(app: FastAPI): app.add_api_route(path="/healthz", endpoint=healthcheck, methods=["GET"]) app.add_api_route(path="/readyz", endpoint=healthcheck, methods=["GET"]) + app.add_api_route(path="/predict", endpoint=predict, methods=["POST"]) + app.add_api_route(path="/stream", endpoint=stream, methods=["POST"]) add_extra_routes(app) return app