From aa21e5aadb3947da537844aa13f920cbde7300b4 Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 13:04:51 -0800 Subject: [PATCH 1/6] [Serve] Add actor id and worker id to Serve structured logs Signed-off-by: Gene Su --- python/ray/serve/_private/constants.py | 2 ++ python/ray/serve/_private/logging_utils.py | 32 +++++++++++++--------- python/ray/serve/tests/test_logging.py | 15 ++++++++-- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/python/ray/serve/_private/constants.py b/python/ray/serve/_private/constants.py index f38a7d0d587dd..9b20c2ecf3860 100644 --- a/python/ray/serve/_private/constants.py +++ b/python/ray/serve/_private/constants.py @@ -181,6 +181,8 @@ SERVE_LOG_COMPONENT = "component_name" SERVE_LOG_COMPONENT_ID = "component_id" SERVE_LOG_MESSAGE = "message" +SERVE_LOG_ACTOR_ID = "actor_id" +SERVE_LOG_WORKER_ID = "worker_id" # This is a reserved for python logging module attribute, it should not be changed. SERVE_LOG_LEVEL_NAME = "levelname" SERVE_LOG_TIME = "asctime" diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index 51aa635c333ae..0784c5ca4d161 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -14,6 +14,7 @@ RAY_SERVE_ENABLE_JSON_LOGGING, RAY_SERVE_ENABLE_MEMORY_PROFILING, RAY_SERVE_LOG_TO_STDERR, + SERVE_LOG_ACTOR_ID, SERVE_LOG_APPLICATION, SERVE_LOG_COMPONENT, SERVE_LOG_COMPONENT_ID, @@ -26,6 +27,7 @@ SERVE_LOG_REQUEST_ID, SERVE_LOG_ROUTE, SERVE_LOG_TIME, + SERVE_LOG_WORKER_ID, SERVE_LOGGER_NAME, ) from ray.serve.schema import EncodingType, LoggingConfig @@ -47,6 +49,14 @@ class ServeJSONFormatter(logging.Formatter): based on the field of record. """ + ADD_IF_EXIST_FIELDS = [ + SERVE_LOG_REQUEST_ID, + SERVE_LOG_ROUTE, + SERVE_LOG_APPLICATION, + SERVE_LOG_ACTOR_ID, + SERVE_LOG_WORKER_ID, + ] + def __init__( self, component_name: str, @@ -82,21 +92,12 @@ def format(self, record: logging.LogRecord) -> str: record_attributes = copy.deepcopy(record.__dict__) record_format[SERVE_LOG_LEVEL_NAME] = record.levelname record_format[SERVE_LOG_TIME] = self.asctime_formatter.format(record) - - if SERVE_LOG_REQUEST_ID in record_attributes: - record_format[SERVE_LOG_REQUEST_ID] = record_attributes[ - SERVE_LOG_REQUEST_ID - ] - if SERVE_LOG_ROUTE in record_attributes: - record_format[SERVE_LOG_ROUTE] = record_attributes[SERVE_LOG_ROUTE] - - if SERVE_LOG_APPLICATION in record_attributes: - record_format[SERVE_LOG_APPLICATION] = record_attributes[ - SERVE_LOG_APPLICATION - ] - record_format[SERVE_LOG_MESSAGE] = self.message_formatter.format(record) + for field in ServeJSONFormatter.ADD_IF_EXIST_FIELDS: + if field in record_attributes: + record_format[field] = record_attributes[field] + if SERVE_LOG_EXTRA_FIELDS in record_attributes: if not isinstance(record_attributes[SERVE_LOG_EXTRA_FIELDS], dict): raise ValueError( @@ -298,6 +299,11 @@ def record_factory(*args, **kwargs): setattr(record, SERVE_LOG_REQUEST_ID, request_context.request_id) if request_context.app_name: setattr(record, SERVE_LOG_APPLICATION, request_context.app_name) + + runtime_context = ray.get_runtime_context() + setattr(record, SERVE_LOG_ACTOR_ID, runtime_context.get_actor_id()) + setattr(record, SERVE_LOG_WORKER_ID, runtime_context.get_worker_id()) + return record logging.setLogRecordFactory(record_factory) diff --git a/python/ray/serve/tests/test_logging.py b/python/ray/serve/tests/test_logging.py index ccd1606695450..b6c81ba9636d9 100644 --- a/python/ray/serve/tests/test_logging.py +++ b/python/ray/serve/tests/test_logging.py @@ -248,6 +248,8 @@ def fn(*args): "app_name": request_context.app_name, "log_file": logger.handlers[1].baseFilename, "replica": serve.get_replica_context().replica_tag, + "actor_id": ray.get_runtime_context().get_actor_id(), + "worker_id": ray.get_runtime_context().get_worker_id(), } @serve.deployment( @@ -263,6 +265,8 @@ def __call__(self, req: starlette.requests.Request): "app_name": request_context.app_name, "log_file": logger.handlers[1].baseFilename, "replica": serve.get_replica_context().replica_tag, + "actor_id": ray.get_runtime_context().get_actor_id(), + "worker_id": ray.get_runtime_context().get_worker_id(), } serve.run(fn.bind(), name="app1", route_prefix="/fn") @@ -309,18 +313,23 @@ def check_log(): f'.*"deployment": "{resp["app_name"]}_fn", ' f'"replica": "{method_replica_id}", ' f'"component_name": "replica", ' + '"message":.* user func.*, ' f'"request_id": "{resp["request_id"]}", ' f'"route": "{resp["route"]}", ' - f'"application": "{resp["app_name"]}", "message":.* user func.*' + f'"application": "{resp["app_name"]}", ' + f'"actor_id": "{resp["actor_id"]}", ' + f'"worker_id": "{resp["worker_id"]}"' ) user_class_method_log_regex = ( f'.*"deployment": "{resp2["app_name"]}_Model", ' f'"replica": "{class_method_replica_id}", ' f'"component_name": "replica", ' + '"message":.* user log message from class method.*' f'"request_id": "{resp2["request_id"]}", ' f'"route": "{resp2["route"]}", ' - f'"application": "{resp2["app_name"]}", "message":.* user log ' - "message from class method.*" + f'"application": "{resp2["app_name"]}", ' + f'"actor_id": "{resp2["actor_id"]}", ' + f'"worker_id": "{resp2["worker_id"]}"' ) else: user_method_log_regex = ( From 0ab01ce8bf82ec2300ed14190a37cf474ff3c6a0 Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 13:27:33 -0800 Subject: [PATCH 2/6] move actor and worker id to constructor Signed-off-by: Gene Su --- python/ray/serve/_private/logging_utils.py | 23 ++++++++++++++-------- python/ray/serve/tests/test_logging.py | 18 +++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index 0784c5ca4d161..76140545e11b5 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -53,19 +53,21 @@ class ServeJSONFormatter(logging.Formatter): SERVE_LOG_REQUEST_ID, SERVE_LOG_ROUTE, SERVE_LOG_APPLICATION, - SERVE_LOG_ACTOR_ID, - SERVE_LOG_WORKER_ID, ] def __init__( self, component_name: str, component_id: str, + actor_id: str, + worker_id: str, component_type: Optional[ServeComponentType] = None, ): self.component_log_fmt = { SERVE_LOG_LEVEL_NAME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_LEVEL_NAME], SERVE_LOG_TIME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_TIME], + SERVE_LOG_ACTOR_ID: actor_id, + SERVE_LOG_WORKER_ID: worker_id, } if component_type and component_type == ServeComponentType.REPLICA: self.component_log_fmt[SERVE_LOG_DEPLOYMENT] = component_name @@ -299,11 +301,6 @@ def record_factory(*args, **kwargs): setattr(record, SERVE_LOG_REQUEST_ID, request_context.request_id) if request_context.app_name: setattr(record, SERVE_LOG_APPLICATION, request_context.app_name) - - runtime_context = ray.get_runtime_context() - setattr(record, SERVE_LOG_ACTOR_ID, runtime_context.get_actor_id()) - setattr(record, SERVE_LOG_WORKER_ID, runtime_context.get_worker_id()) - return record logging.setLogRecordFactory(record_factory) @@ -344,8 +341,18 @@ def record_factory(*args, **kwargs): "'LoggingConfig' to enable json format." ) if RAY_SERVE_ENABLE_JSON_LOGGING or logging_config.encoding == EncodingType.JSON: + runtime_context = ray.get_runtime_context() + actor_id = runtime_context.get_actor_id() + worker_id = runtime_context.get_worker_id() + file_handler.setFormatter( - ServeJSONFormatter(component_name, component_id, component_type) + ServeJSONFormatter( + component_name=component_name, + component_id=component_id, + component_type=component_type, + actor_id=actor_id, + worker_id=worker_id, + ) ) else: file_handler.setFormatter(ServeFormatter(component_name, component_id)) diff --git a/python/ray/serve/tests/test_logging.py b/python/ray/serve/tests/test_logging.py index b6c81ba9636d9..bdc017903d7f0 100644 --- a/python/ray/serve/tests/test_logging.py +++ b/python/ray/serve/tests/test_logging.py @@ -310,26 +310,28 @@ def check_log(): class_method_replica_id = resp2["replica"].split("#")[-1] if json_log_format: user_method_log_regex = ( - f'.*"deployment": "{resp["app_name"]}_fn", ' + ".*" + f'"actor_id": "{resp["actor_id"]}", ' + f'"worker_id": "{resp["worker_id"]}", ' + f'"deployment": "{resp["app_name"]}_fn", ' f'"replica": "{method_replica_id}", ' f'"component_name": "replica", ' '"message":.* user func.*, ' f'"request_id": "{resp["request_id"]}", ' f'"route": "{resp["route"]}", ' - f'"application": "{resp["app_name"]}", ' - f'"actor_id": "{resp["actor_id"]}", ' - f'"worker_id": "{resp["worker_id"]}"' + f'"application": "{resp["app_name"]}"' ) user_class_method_log_regex = ( - f'.*"deployment": "{resp2["app_name"]}_Model", ' + ".*" + f'"actor_id": "{resp2["actor_id"]}", ' + f'"worker_id": "{resp2["worker_id"]}", ' + f'"deployment": "{resp2["app_name"]}_Model", ' f'"replica": "{class_method_replica_id}", ' f'"component_name": "replica", ' '"message":.* user log message from class method.*' f'"request_id": "{resp2["request_id"]}", ' f'"route": "{resp2["route"]}", ' - f'"application": "{resp2["app_name"]}", ' - f'"actor_id": "{resp2["actor_id"]}", ' - f'"worker_id": "{resp2["worker_id"]}"' + f'"application": "{resp2["app_name"]}"' ) else: user_method_log_regex = ( From ad62669ba7085a9359712738b6311b097faa5614 Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 13:48:03 -0800 Subject: [PATCH 3/6] move get actor id and get worker id into json formatter constructor Signed-off-by: Gene Su --- python/ray/serve/_private/logging_utils.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index 76140545e11b5..8b0c0adf0c3c0 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -59,15 +59,14 @@ def __init__( self, component_name: str, component_id: str, - actor_id: str, - worker_id: str, component_type: Optional[ServeComponentType] = None, ): + runtime_context = ray.get_runtime_context() self.component_log_fmt = { SERVE_LOG_LEVEL_NAME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_LEVEL_NAME], SERVE_LOG_TIME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_TIME], - SERVE_LOG_ACTOR_ID: actor_id, - SERVE_LOG_WORKER_ID: worker_id, + SERVE_LOG_ACTOR_ID: runtime_context.get_actor_id(), + SERVE_LOG_WORKER_ID: runtime_context.get_worker_id(), } if component_type and component_type == ServeComponentType.REPLICA: self.component_log_fmt[SERVE_LOG_DEPLOYMENT] = component_name @@ -341,18 +340,8 @@ def record_factory(*args, **kwargs): "'LoggingConfig' to enable json format." ) if RAY_SERVE_ENABLE_JSON_LOGGING or logging_config.encoding == EncodingType.JSON: - runtime_context = ray.get_runtime_context() - actor_id = runtime_context.get_actor_id() - worker_id = runtime_context.get_worker_id() - file_handler.setFormatter( - ServeJSONFormatter( - component_name=component_name, - component_id=component_id, - component_type=component_type, - actor_id=actor_id, - worker_id=worker_id, - ) + ServeJSONFormatter(component_name, component_id, component_type) ) else: file_handler.setFormatter(ServeFormatter(component_name, component_id)) From 074c511474e0bbf9b92f619d9a63895d3ffa5674 Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 14:15:53 -0800 Subject: [PATCH 4/6] add try-except block around calling get_runtime_context() and get_actor_id() and get_worker_id() in case if they fails Signed-off-by: Gene Su --- python/ray/serve/_private/logging_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index 8b0c0adf0c3c0..ef1c454971ebd 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -68,6 +68,19 @@ def __init__( SERVE_LOG_ACTOR_ID: runtime_context.get_actor_id(), SERVE_LOG_WORKER_ID: runtime_context.get_worker_id(), } + try: + runtime_context = ray.get_runtime_context() + actor_id = runtime_context.get_actor_id() + if actor_id: + self.component_log_fmt[SERVE_LOG_ACTOR_ID] = actor_id + worker_id = runtime_context.get_worker_id() + if worker_id: + self.component_log_fmt[SERVE_LOG_WORKER_ID] = worker_id + except Exception: + # If get_runtime_context() fails for any reason, do nothing (no adding + # actor_id and/or worker_id to the fmt) + pass + if component_type and component_type == ServeComponentType.REPLICA: self.component_log_fmt[SERVE_LOG_DEPLOYMENT] = component_name self.component_log_fmt[SERVE_LOG_REPLICA] = component_id From 356d5441864952d5a00de6c39b9d01fb446f3bdd Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 14:47:58 -0800 Subject: [PATCH 5/6] fix, remove old code Signed-off-by: Gene Su --- python/ray/serve/_private/logging_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index ef1c454971ebd..d26a7c7af5d15 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -61,12 +61,9 @@ def __init__( component_id: str, component_type: Optional[ServeComponentType] = None, ): - runtime_context = ray.get_runtime_context() self.component_log_fmt = { SERVE_LOG_LEVEL_NAME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_LEVEL_NAME], SERVE_LOG_TIME: SERVE_LOG_RECORD_FORMAT[SERVE_LOG_TIME], - SERVE_LOG_ACTOR_ID: runtime_context.get_actor_id(), - SERVE_LOG_WORKER_ID: runtime_context.get_worker_id(), } try: runtime_context = ray.get_runtime_context() From 59ef04f95431cf32a86e70f7d15c9fe0478f6657 Mon Sep 17 00:00:00 2001 From: Gene Su Date: Tue, 5 Mar 2024 15:20:02 -0800 Subject: [PATCH 6/6] move message to the last field Signed-off-by: Gene Su --- python/ray/serve/_private/logging_utils.py | 3 ++- python/ray/serve/tests/test_logging.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/ray/serve/_private/logging_utils.py b/python/ray/serve/_private/logging_utils.py index d26a7c7af5d15..898a37b94e19c 100644 --- a/python/ray/serve/_private/logging_utils.py +++ b/python/ray/serve/_private/logging_utils.py @@ -103,12 +103,13 @@ def format(self, record: logging.LogRecord) -> str: record_attributes = copy.deepcopy(record.__dict__) record_format[SERVE_LOG_LEVEL_NAME] = record.levelname record_format[SERVE_LOG_TIME] = self.asctime_formatter.format(record) - record_format[SERVE_LOG_MESSAGE] = self.message_formatter.format(record) for field in ServeJSONFormatter.ADD_IF_EXIST_FIELDS: if field in record_attributes: record_format[field] = record_attributes[field] + record_format[SERVE_LOG_MESSAGE] = self.message_formatter.format(record) + if SERVE_LOG_EXTRA_FIELDS in record_attributes: if not isinstance(record_attributes[SERVE_LOG_EXTRA_FIELDS], dict): raise ValueError( diff --git a/python/ray/serve/tests/test_logging.py b/python/ray/serve/tests/test_logging.py index bdc017903d7f0..033ece69165dc 100644 --- a/python/ray/serve/tests/test_logging.py +++ b/python/ray/serve/tests/test_logging.py @@ -316,10 +316,10 @@ def check_log(): f'"deployment": "{resp["app_name"]}_fn", ' f'"replica": "{method_replica_id}", ' f'"component_name": "replica", ' - '"message":.* user func.*, ' f'"request_id": "{resp["request_id"]}", ' f'"route": "{resp["route"]}", ' - f'"application": "{resp["app_name"]}"' + f'"application": "{resp["app_name"]}", ' + '"message":.* user func.*' ) user_class_method_log_regex = ( ".*" @@ -328,10 +328,10 @@ def check_log(): f'"deployment": "{resp2["app_name"]}_Model", ' f'"replica": "{class_method_replica_id}", ' f'"component_name": "replica", ' - '"message":.* user log message from class method.*' f'"request_id": "{resp2["request_id"]}", ' f'"route": "{resp2["route"]}", ' - f'"application": "{resp2["app_name"]}"' + f'"application": "{resp2["app_name"]}", ' + '"message":.* user log message from class method.*' ) else: user_method_log_regex = (