Skip to content

Commit

Permalink
satellite/nodeselection: new node selection for balancing between sma…
Browse files Browse the repository at this point in the history
…ll groups

We have a node selection strategy to choose randomly from different groups (like last_net).

But we have a specific case for Storj Select program (and private installs),
 where we have smaller groups (like different providers), but the number of groups might be limited (let's say a few dozens).

The proposed algorithm is the following.

 * let's iterate over the groups in random order
 * from each group, select one node randomly
 * nodes which are already selected will be ignored
 * at the end of the iteration we will start to check each group again (if not enough groups)

Change-Id: I207ccb6e70122fd4e7b9d9acc0667a809492372e
  • Loading branch information
elek authored and Storj Robot committed Feb 7, 2024
1 parent 1ee23fb commit 66d6b2e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 0 deletions.
7 changes: 7 additions & 0 deletions satellite/nodeselection/config.go
Expand Up @@ -190,6 +190,13 @@ func selectorFromString(expr string) (NodeSelectorInit, error) {
},
"nodelist": AllowedNodesFromFile,
"filter": FilterSelector,
"balanced": func(attribute string) (NodeSelectorInit, error) {
attr, err := CreateNodeAttribute(attribute)
if err != nil {
return nil, err
}
return BalancedGroupBasedSelector(attr), nil
},
}
for k, v := range supportedFilters {
env[k] = v
Expand Down
68 changes: 68 additions & 0 deletions satellite/nodeselection/selector.go
Expand Up @@ -4,6 +4,8 @@
package nodeselection

import (
mathrand "math/rand"

"storj.io/common/storj"
)

Expand Down Expand Up @@ -111,6 +113,17 @@ func included(alreadySelected []storj.NodeID, nodes ...*SelectedNode) bool {
return false
}

func includedInNodes(alreadySelected []*SelectedNode, nodes ...*SelectedNode) bool {
for _, node := range nodes {
for _, as := range alreadySelected {
if node.ID == as.ID {
return true
}
}
}
return false
}

// RandomSelector selects any nodes with equal chance.
func RandomSelector() NodeSelectorInit {
return func(nodes []*SelectedNode, filter NodeFilter) NodeSelector {
Expand Down Expand Up @@ -158,3 +171,58 @@ func FilterSelector(loadTimeFilter NodeFilter, init NodeSelectorInit) NodeSelect
return init(filtered, selectionFilter)
}
}

// BalancedGroupBasedSelector first selects a group with equal chance (like last_net) and choose one single node randomly. .
// One group can be tried multiple times, and if the node is already selected, it will be ignored.
func BalancedGroupBasedSelector(attribute NodeAttribute) NodeSelectorInit {
rng := mathrand.New(mathrand.NewSource(mathrand.Int63()))

return func(nodes []*SelectedNode, filter NodeFilter) NodeSelector {
nodeByAttribute := make(map[string][]*SelectedNode)
for _, node := range nodes {
if filter != nil && !filter.Match(node) {
continue
}
a := attribute(*node)
if _, found := nodeByAttribute[a]; !found {
nodeByAttribute[a] = make([]*SelectedNode, 0)
}
nodeByAttribute[a] = append(nodeByAttribute[a], node)
}

var groupedNodes [][]*SelectedNode
for _, nodeList := range nodeByAttribute {
groupedNodes = append(groupedNodes, nodeList)
}

return func(n int, alreadySelected []storj.NodeID) (selected []*SelectedNode, err error) {
if n == 0 {
return selected, nil
}

// upper limit: we should find at least one node in each full group loop.
// Ideally we find len(group) in each iteration, so we stop earlier
for i := 0; i < n; i++ {
r := NewRandomOrder(len(groupedNodes))

// check all the groups in random order
for r.Next() {
nodes := groupedNodes[r.At()]

// this group has one chance to give a candidate
randomOne := nodes[rng.Intn(len(nodes))].Clone()

if !included(alreadySelected, randomOne) && !includedInNodes(selected, randomOne) {
selected = append(selected, randomOne)
}

if len(selected) >= n {
return selected, nil
}
}

}
return nil, nil
}
}
}
48 changes: 48 additions & 0 deletions satellite/nodeselection/selector_test.go
Expand Up @@ -326,3 +326,51 @@ func TestFilterSelector(t *testing.T) {
}
}
}

func TestBalancedSelector(t *testing.T) {
attribute, err := nodeselection.CreateNodeAttribute("tag:owner")
require.NoError(t, err)

ownerCounts := map[string]int{"A": 3, "B": 30, "C": 30, "D": 5}
var nodes []*nodeselection.SelectedNode

idIndex := 0
for owner, count := range ownerCounts {
for i := 0; i < count; i++ {
nodes = append(nodes, &nodeselection.SelectedNode{
ID: testidentity.MustPregeneratedIdentity(idIndex, storj.LatestIDVersion()).ID,
Tags: nodeselection.NodeTags{
{
Name: "owner",
Value: []byte(owner),
},
},
})
idIndex++
}
}

selector := nodeselection.BalancedGroupBasedSelector(attribute)(nodes, nil)

badSelection := 0
for i := 0; i < 1000; i++ {
selectedNodes, err := selector(10, nil)
require.NoError(t, err)

require.Len(t, selectedNodes, 10)

histogram := map[string]int{}
for _, node := range selectedNodes {
histogram[attribute(*node)] = histogram[attribute(*node)] + 1
}
for _, c := range histogram {
if c > 5 {
badSelection++
break
}
}
}
// there is a very-very low chance to have wrong selection if we select one from A
// and all the other random selection will select the same node again
require.True(t, badSelection < 5)
}

0 comments on commit 66d6b2e

Please sign in to comment.