From be827b84b4705aa29b85f0f69948a6e0a1c407c7 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:27:18 +0100 Subject: [PATCH 1/7] lxd/cluster/heartbeat: Adds Name field to APIHeartbeatMember Signed-off-by: Thomas Parrott --- lxd/cluster/heartbeat.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lxd/cluster/heartbeat.go b/lxd/cluster/heartbeat.go index d8aa30d9d3..0e634b26c6 100644 --- a/lxd/cluster/heartbeat.go +++ b/lxd/cluster/heartbeat.go @@ -34,6 +34,7 @@ const ( type APIHeartbeatMember struct { ID int64 // ID field value in nodes table. Address string // Host and Port of node. + Name string // Name of cluster member. RaftID uint64 // ID field value in raft_nodes table, zero if non-raft node. RaftRole int // Node role in the raft cluster, from the raft_nodes table Raft bool // Deprecated, use non-zero RaftID instead to indicate raft node. From 1b3658340443249bf5f25fc99a875092173ab8e5 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:27:34 +0100 Subject: [PATCH 2/7] lxd/cluster/heartbeat: Preallocate raftNodeMap in Update For efficiency. Signed-off-by: Thomas Parrott --- lxd/cluster/heartbeat.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lxd/cluster/heartbeat.go b/lxd/cluster/heartbeat.go index 0e634b26c6..5b3beb9950 100644 --- a/lxd/cluster/heartbeat.go +++ b/lxd/cluster/heartbeat.go @@ -75,9 +75,8 @@ func (hbState *APIHeartbeat) Update(fullStateList bool, raftNodes []db.RaftNode, // If we've been supplied a fresh set of node states, this is a full state list. hbState.FullStateList = fullStateList - raftNodeMap := make(map[string]db.RaftNode) - // Convert raftNodes to a map keyed on address for lookups later. + raftNodeMap := make(map[string]db.RaftNode, len(raftNodes)) for _, raftNode := range raftNodes { raftNodeMap[raftNode.Address] = raftNode } From 0a4a19be2b1697ca5e8c1882183223f477205b38 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:28:01 +0100 Subject: [PATCH 3/7] lxd/cluster/heartbeat: Populate Name in Update Signed-off-by: Thomas Parrott --- lxd/cluster/heartbeat.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lxd/cluster/heartbeat.go b/lxd/cluster/heartbeat.go index 5b3beb9950..a88578644a 100644 --- a/lxd/cluster/heartbeat.go +++ b/lxd/cluster/heartbeat.go @@ -86,6 +86,7 @@ func (hbState *APIHeartbeat) Update(fullStateList bool, raftNodes []db.RaftNode, member := APIHeartbeatMember{ ID: node.ID, Address: node.Address, + Name: node.Name, LastHeartbeat: node.Heartbeat, Online: !node.Heartbeat.Before(time.Now().Add(-offlineThreshold)), } From b06a5dc06cb0e34a943c37580407e555b7547b2f Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:00:40 +0100 Subject: [PATCH 4/7] lxd/cluster/gateway: Update currentRaftNodes to use a single query to get cluster member info Signed-off-by: Thomas Parrott --- lxd/cluster/gateway.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go index efaa37180f..2b4ef957d6 100644 --- a/lxd/cluster/gateway.go +++ b/lxd/cluster/gateway.go @@ -958,19 +958,29 @@ func (g *Gateway) currentRaftNodes() ([]db.RaftNode, error) { // Get the names of the raft nodes from the global database. if g.Cluster != nil { err = g.Cluster.Transaction(func(tx *db.ClusterTx) error { + nodes, err := tx.GetNodes() + if err != nil { + return fmt.Errorf("Failed loading cluster members: %w", err) + } + + nodesByAddress := make(map[string]db.NodeInfo, len(nodes)) + for _, node := range nodes { + nodesByAddress[node.Address] = node + } + for i, server := range servers { - node, err := tx.GetNodeByAddress(server.Address) - if err != nil { - return err + node, found := nodesByAddress[server.Address] + if !found { + return fmt.Errorf("Cluster member info not found for %q", server.Address) } raftNodes[i].Name = node.Name - } + return nil }) if err != nil { - logger.Warn("Failed to retrieve cluster member", log.Ctx{"err": err}) + logger.Warn("Failed getting raft nodes", log.Ctx{"err": err}) } } From 0835e76801dbb73dad837a4d7242d304823ab959 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:14:45 +0100 Subject: [PATCH 5/7] lxd/cluster/gateway: Preallocate raftNodes slice for efficiency Signed-off-by: Thomas Parrott --- lxd/cluster/gateway.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go index 2b4ef957d6..9b17cde362 100644 --- a/lxd/cluster/gateway.go +++ b/lxd/cluster/gateway.go @@ -943,7 +943,7 @@ func (g *Gateway) currentRaftNodes() ([]db.RaftNode, error) { return nil, err } - raftNodes := []db.RaftNode{} + raftNodes := make([]db.RaftNode, 0, len(servers)) for i, server := range servers { address, err := g.nodeAddress(server.Address) if err != nil { From 85c3799f50f0652cb408c7e6120fc35dceb6e4af Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:25:26 +0100 Subject: [PATCH 6/7] lxd/cluster/gateway: Do not query leader cluster DB to enrich raft member name in HandlerFuncs Currently this is causing a remote transaction and query for each raft node in the heartbeat payload. This is causing the heartbeat handler to take too long (>1s for larger clusters or multi-DC clusters) and this is causing the leader to consider some members offline. Signed-off-by: Thomas Parrott --- lxd/cluster/gateway.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go index 9b17cde362..7abe1551f9 100644 --- a/lxd/cluster/gateway.go +++ b/lxd/cluster/gateway.go @@ -229,25 +229,13 @@ func (g *Gateway) HandlerFuncs(nodeRefreshTask func(*APIHeartbeat), trustedCerts raftNodes := make([]db.RaftNode, 0) for _, node := range heartbeatData.Members { if node.RaftID > 0 { - nodeInfo := db.NodeInfo{} - if g.Cluster != nil { - err = g.Cluster.Transaction(func(tx *db.ClusterTx) error { - var err error - nodeInfo, err = tx.GetNodeByAddress(node.Address) - return err - }) - if err != nil { - logger.Warn("Failed to retrieve cluster member", log.Ctx{"err": err}) - } - } - raftNodes = append(raftNodes, db.RaftNode{ NodeInfo: client.NodeInfo{ ID: node.RaftID, Address: node.Address, Role: db.RaftRole(node.RaftRole), }, - Name: nodeInfo.Name, + Name: node.Name, }) } } From 39b8e36b73204c8e68dcf8f147f1b27bde59c878 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Tue, 14 Sep 2021 17:44:43 +0100 Subject: [PATCH 7/7] lxd/cluster/recover: Preallocate nodes in Reconfigure Signed-off-by: Thomas Parrott --- lxd/cluster/recover.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/cluster/recover.go b/lxd/cluster/recover.go index a5f4de359f..bf08b074a2 100644 --- a/lxd/cluster/recover.go +++ b/lxd/cluster/recover.go @@ -139,8 +139,8 @@ func Reconfigure(database *db.Node, raftNodes []db.RaftNode) error { } localAddress := info.Address - nodes := []client.NodeInfo{} + nodes := make([]client.NodeInfo, 0, len(raftNodes)) for _, raftNode := range raftNodes { nodes = append(nodes, raftNode.NodeInfo)