Skip to content

Commit

Permalink
chore: Wrap callosum.AuthenticationError to custom error (#1970)
Browse files Browse the repository at this point in the history
  • Loading branch information
fregataa committed Apr 8, 2024
1 parent 84cec61 commit fc70792
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 3 deletions.
1 change: 1 addition & 0 deletions changes/1970.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Wrap RPC authentication error to custom error for better logging.
16 changes: 13 additions & 3 deletions src/ai/backend/manager/agent_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import sqlalchemy as sa
import zmq
from callosum.exceptions import AuthenticationError
from callosum.lower.zeromq import ZeroMQAddress, ZeroMQRPCTransport
from callosum.rpc import Peer, RPCUserError
from sqlalchemy.engine.row import Row
Expand All @@ -18,9 +19,7 @@
from ai.backend.common.logging import BraceStyleAdapter
from ai.backend.common.types import AgentId

from .api.exceptions import (
AgentError,
)
from .exceptions import AgentError, RPCError
from .models.agent import agents
from .models.utils import ExtendedAsyncSAEngine, execute_with_retry

Expand Down Expand Up @@ -164,5 +163,16 @@ async def rpc_context(
peer.call.order_key.reset(okey_token)
except RPCUserError as orig_exc:
raise AgentError(agent_id, orig_exc.name, orig_exc.repr, orig_exc.args)
except AuthenticationError as orig_exc:
detail = (
"Fail to initate RPC connection. "
"This could be caused by a connection delay or an attempt to connect to an invalid address. "
f"(repr: {repr(orig_exc)})."
)
raise RPCError(
agent_id,
agent_addr,
detail,
)
except Exception:
raise
23 changes: 23 additions & 0 deletions src/ai/backend/manager/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,29 @@ class InvalidArgument(Exception):
pass


class RPCError(RuntimeError):
"""
An exception class to represent any error caused in RPC functions.
"""

__slots__ = (
"agent_id",
"agent_addr",
"extra_msg",
)

def __init__(
self,
agent_id: AgentId,
agent_addr: str,
extra_msg: str,
) -> None:
super().__init__(agent_id, agent_addr, extra_msg)
self.agent_id = agent_id
self.agent_addr = agent_addr
self.extra_msg = extra_msg


class AgentError(RuntimeError):
"""
A dummy exception class to distinguish agent-side errors passed via
Expand Down

0 comments on commit fc70792

Please sign in to comment.