From 8905cc7cbdacc2108bd75e2cbf95585af18c48f1 Mon Sep 17 00:00:00 2001 From: Katie Wu Date: Tue, 26 Sep 2023 00:20:21 +0000 Subject: [PATCH 1/3] Propagate extra server args to the gunicorn command --- .../entrypoints/start_fastapi_server.py | 7 ++++--- .../inference/forwarding/echo_server.py | 3 ++- .../inference/forwarding/http_forwarder.py | 3 ++- .../inference/sync_inference/start_fastapi_server.py | 5 +++++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py index 119935ff..650e3d74 100644 --- a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py +++ b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py @@ -8,7 +8,7 @@ from typing import List -def start_gunicorn_server(port: int, num_workers: int, debug: bool) -> None: +def start_gunicorn_server(port: int, num_workers: int, debug: bool, extra_args: List[str]) -> None: """Starts a GUnicorn server locally.""" additional_args: List[str] = [] if debug: @@ -27,6 +27,7 @@ def start_gunicorn_server(port: int, num_workers: int, debug: bool) -> None: f"{num_workers}", *additional_args, "model_engine_server.api.app:app", + *extra_args, ] subprocess.run(command, check=True) @@ -40,9 +41,9 @@ def entrypoint(): parser.add_argument("--port", type=int, default=5000) parser.add_argument("--num-workers", type=int, default=4) parser.add_argument("--debug", "-d", action="store_true") - args = parser.parse_args() + args, extra_args = parser.parse_known_args() - start_gunicorn_server(args.port, args.num_workers, args.debug) + start_gunicorn_server(args.port, args.num_workers, args.debug, extra_args) if __name__ == "__main__": diff --git a/model-engine/model_engine_server/inference/forwarding/echo_server.py b/model-engine/model_engine_server/inference/forwarding/echo_server.py index 12470cfc..0a44b832 100644 --- a/model-engine/model_engine_server/inference/forwarding/echo_server.py +++ b/model-engine/model_engine_server/inference/forwarding/echo_server.py @@ -33,7 +33,7 @@ def entrypoint(): parser.add_argument("--host", type=str, default="[::]") parser.add_argument("--port", type=int, default=5009) - args = parser.parse_args() + args, extra_args = parser.parse_known_args() command = [ "gunicorn", @@ -48,6 +48,7 @@ def entrypoint(): "--workers", str(args.num_workers), "model_engine_server.inference.forwarding.echo_server:app", + *extra_args, ] subprocess.run(command) diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index 85de6ded..e3508aac 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -139,7 +139,7 @@ def entrypoint(): parser.add_argument("--port", type=int, default=5000) parser.add_argument("--set", type=str, action="append") - args = parser.parse_args() + args, extra_args = parser.parse_known_args() values = [f"CONFIG_FILE={args.config}"] if args.set is not None: @@ -162,6 +162,7 @@ def entrypoint(): str(args.num_workers), *envs, "model_engine_server.inference.forwarding.http_forwarder:app", + *extra_args, ] subprocess.run(command) diff --git a/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py b/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py index 97aea0ed..bc15ddc7 100644 --- a/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py +++ b/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py @@ -1,3 +1,4 @@ +import argparse import os import subprocess @@ -8,6 +9,9 @@ def start_server(): + parser = argparse.ArgumentParser() + _, extra_args = parser.parse_known_args() + # TODO: HTTPS command = [ "gunicorn", @@ -22,6 +26,7 @@ def start_server(): "--workers", str(NUM_PROCESSES), "model_engine_server.inference.sync_inference.fastapi_server:app", + *extra_args, ] unset_sensitive_envvars() subprocess.run(command) From 44971803d9e06b2b4594f1855933dd94a29b7b86 Mon Sep 17 00:00:00 2001 From: Katie Wu Date: Tue, 26 Sep 2023 00:41:50 +0000 Subject: [PATCH 2/3] default graceful-timeout to 600 --- .../entrypoints/start_fastapi_server.py | 11 +++++++++-- .../inference/forwarding/http_forwarder.py | 3 +++ .../inference/sync_inference/start_fastapi_server.py | 5 ++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py index 650e3d74..6ea2687e 100644 --- a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py +++ b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py @@ -8,7 +8,9 @@ from typing import List -def start_gunicorn_server(port: int, num_workers: int, debug: bool, extra_args: List[str]) -> None: +def start_gunicorn_server( + port: int, num_workers: int, debug: bool, graceful_timeout: int, extra_args: List[str] +) -> None: """Starts a GUnicorn server locally.""" additional_args: List[str] = [] if debug: @@ -25,6 +27,8 @@ def start_gunicorn_server(port: int, num_workers: int, debug: bool, extra_args: "model_engine_server.api.worker.LaunchWorker", "--workers", f"{num_workers}", + "--graceful-timeout", + f"{graceful_timeout}", *additional_args, "model_engine_server.api.app:app", *extra_args, @@ -41,9 +45,12 @@ def entrypoint(): parser.add_argument("--port", type=int, default=5000) parser.add_argument("--num-workers", type=int, default=4) parser.add_argument("--debug", "-d", action="store_true") + parser.add_argument("--graceful-timeout", type=int, default=600) args, extra_args = parser.parse_known_args() - start_gunicorn_server(args.port, args.num_workers, args.debug, extra_args) + start_gunicorn_server( + args.port, args.num_workers, args.debug, args.graceful_timeout, extra_args + ) if __name__ == "__main__": diff --git a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py index e3508aac..5943bc50 100644 --- a/model-engine/model_engine_server/inference/forwarding/http_forwarder.py +++ b/model-engine/model_engine_server/inference/forwarding/http_forwarder.py @@ -138,6 +138,7 @@ def entrypoint(): parser.add_argument("--host", type=str, default="[::]") parser.add_argument("--port", type=int, default=5000) parser.add_argument("--set", type=str, action="append") + parser.add_argument("--graceful-timeout", type=int, default=600) args, extra_args = parser.parse_known_args() @@ -160,6 +161,8 @@ def entrypoint(): "uvicorn.workers.UvicornWorker", "--workers", str(args.num_workers), + "--graceful-timeout", + str(args.graceful_timeout), *envs, "model_engine_server.inference.forwarding.http_forwarder:app", *extra_args, diff --git a/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py b/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py index bc15ddc7..2b3aef79 100644 --- a/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py +++ b/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py @@ -10,7 +10,8 @@ def start_server(): parser = argparse.ArgumentParser() - _, extra_args = parser.parse_known_args() + parser.add_argument("--graceful-timeout", type=int, default=600) + args, extra_args = parser.parse_known_args() # TODO: HTTPS command = [ @@ -25,6 +26,8 @@ def start_server(): "uvicorn.workers.UvicornWorker", "--workers", str(NUM_PROCESSES), + "--graceful-timeout", + str(args.graceful_timeout), "model_engine_server.inference.sync_inference.fastapi_server:app", *extra_args, ] From 49a56d4027c47891a579eef0316c272e392a48ef Mon Sep 17 00:00:00 2001 From: Katie Wu Date: Tue, 26 Sep 2023 00:51:02 +0000 Subject: [PATCH 3/3] undo changes for gateway --- .../entrypoints/start_fastapi_server.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py index 6ea2687e..119935ff 100644 --- a/model-engine/model_engine_server/entrypoints/start_fastapi_server.py +++ b/model-engine/model_engine_server/entrypoints/start_fastapi_server.py @@ -8,9 +8,7 @@ from typing import List -def start_gunicorn_server( - port: int, num_workers: int, debug: bool, graceful_timeout: int, extra_args: List[str] -) -> None: +def start_gunicorn_server(port: int, num_workers: int, debug: bool) -> None: """Starts a GUnicorn server locally.""" additional_args: List[str] = [] if debug: @@ -27,11 +25,8 @@ def start_gunicorn_server( "model_engine_server.api.worker.LaunchWorker", "--workers", f"{num_workers}", - "--graceful-timeout", - f"{graceful_timeout}", *additional_args, "model_engine_server.api.app:app", - *extra_args, ] subprocess.run(command, check=True) @@ -45,12 +40,9 @@ def entrypoint(): parser.add_argument("--port", type=int, default=5000) parser.add_argument("--num-workers", type=int, default=4) parser.add_argument("--debug", "-d", action="store_true") - parser.add_argument("--graceful-timeout", type=int, default=600) - args, extra_args = parser.parse_known_args() + args = parser.parse_args() - start_gunicorn_server( - args.port, args.num_workers, args.debug, args.graceful_timeout, extra_args - ) + start_gunicorn_server(args.port, args.num_workers, args.debug) if __name__ == "__main__":