-
Notifications
You must be signed in to change notification settings - Fork 604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
picker: increase range of peer observation #1267
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,7 +106,7 @@ func TestRemotesExclude(t *testing.T) { | |
// value. | ||
func TestRemotesConvergence(t *testing.T) { | ||
remotes := NewRemotes() | ||
remotes.Observe(api.Peer{Addr: "one"}, 1) | ||
remotes.Observe(api.Peer{Addr: "one"}, DefaultObservationWeight) | ||
|
||
// zero weighted against 1 | ||
if float64(remotes.Weights()[api.Peer{Addr: "one"}]) < remoteWeightSmoothingFactor { | ||
|
@@ -115,7 +115,7 @@ func TestRemotesConvergence(t *testing.T) { | |
|
||
// crank it up | ||
for i := 0; i < 10; i++ { | ||
remotes.Observe(api.Peer{Addr: "one"}, 1) | ||
remotes.Observe(api.Peer{Addr: "one"}, DefaultObservationWeight) | ||
} | ||
|
||
if float64(remotes.Weights()[api.Peer{Addr: "one"}]) < remoteWeightSmoothingFactor { | ||
|
@@ -127,7 +127,7 @@ func TestRemotesConvergence(t *testing.T) { | |
} | ||
|
||
// provided a poor review | ||
remotes.Observe(api.Peer{Addr: "one"}, -1) | ||
remotes.Observe(api.Peer{Addr: "one"}, -DefaultObservationWeight) | ||
|
||
if remotes.Weights()[api.Peer{Addr: "one"}] > 0 { | ||
t.Fatalf("should be below zero: %v", remotes.Weights()[api.Peer{Addr: "one"}]) | ||
|
@@ -149,7 +149,7 @@ func TestRemotesZeroWeights(t *testing.T) { | |
} | ||
|
||
seen := map[api.Peer]struct{}{} | ||
for i := 0; i < 25; i++ { | ||
for i := 0; i < 1000; i++ { | ||
peer, err := remotes.Select() | ||
if err != nil { | ||
t.Fatalf("unexpected error from Select: %v", err) | ||
|
@@ -165,7 +165,7 @@ func TestRemotesZeroWeights(t *testing.T) { | |
} | ||
|
||
// Pump up number 3! | ||
remotes.Observe(api.Peer{Addr: "three"}, 10) | ||
remotes.Observe(api.Peer{Addr: "three"}, DefaultObservationWeight) | ||
|
||
count := map[api.Peer]int{} | ||
for i := 0; i < 100; i++ { | ||
|
@@ -178,7 +178,7 @@ func TestRemotesZeroWeights(t *testing.T) { | |
count[peer]++ | ||
|
||
// keep observing three | ||
remotes.Observe(api.Peer{Addr: "three"}, 10) | ||
remotes.Observe(api.Peer{Addr: "three"}, DefaultObservationWeight) | ||
} | ||
|
||
// here, we ensure that three is at least three times more likely to be | ||
|
@@ -238,10 +238,10 @@ func TestRemotesDownweight(t *testing.T) { | |
} | ||
|
||
for _, p := range peers { | ||
remotes.Observe(p, 1) | ||
remotes.Observe(p, DefaultObservationWeight) | ||
} | ||
|
||
remotes.Observe(peers[0], -1) | ||
remotes.Observe(peers[0], -DefaultObservationWeight) | ||
|
||
samples := 100000 | ||
choosen := 0 | ||
|
@@ -262,6 +262,67 @@ func TestRemotesDownweight(t *testing.T) { | |
} | ||
} | ||
|
||
// TestRemotesPractical ensures that under a single poor observation, such as | ||
// an error, the likelihood of selecting the node dramatically decreases. | ||
func TestRemotesPractical(t *testing.T) { | ||
peers := []api.Peer{{Addr: "one"}, {Addr: "two"}, {Addr: "three"}} | ||
remotes := NewRemotes(peers...) | ||
seen := map[api.Peer]int{} | ||
selections := 1000 | ||
tolerance := 0.20 // allow 20% delta to reduce test failure probability | ||
|
||
// set a baseline, where selections should be even | ||
for i := 0; i < selections; i++ { | ||
peer, err := remotes.Select() | ||
if err != nil { | ||
t.Fatalf("error selecting peer: %v", err) | ||
} | ||
|
||
remotes.Observe(peer, DefaultObservationWeight) | ||
seen[peer]++ | ||
} | ||
|
||
expected, delta := selections/len(peers), int(tolerance*float64(selections)) | ||
low, high := expected-delta, expected+delta | ||
for peer, count := range seen { | ||
if !(count >= low && count <= high) { | ||
t.Fatalf("weighted selection not balanced: %v selected %v/%v, expected range %v, %v", peer, count, selections, low, high) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the selection is randomized, isn't it possible for you to get unlucky and have one address be returned much more often than others? Wouldn't 1000 runs that return the same address be a valid, if improbable, result? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's extremely unlikely. |
||
} | ||
} | ||
|
||
// one bad observation should mark the node as bad | ||
remotes.Observe(peers[0], -DefaultObservationWeight) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we observe a few times with a positive weight before this downweighting to make sure that we're still unlikely to select this node even if its weight started as > 0 (as it will in practice)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can either have balanced downweighting have failures immediately reduce selection probability or have less negative than positive to require several observations before it is fully downweighted. In this case, initial condition is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I missed the initial condition. Where is it set up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The initial loop converges it to |
||
|
||
seen = map[api.Peer]int{} // resut | ||
for i := 0; i < selections; i++ { | ||
peer, err := remotes.Select() | ||
if err != nil { | ||
t.Fatalf("error selecting peer: %v", err) | ||
} | ||
|
||
seen[peer]++ | ||
} | ||
|
||
tolerance = 0.10 // switch to 10% tolerance for two peers | ||
// same check as above, with only 2 peers, the bad peer should be unseen | ||
expected, delta = selections/(len(peers)-1), int(tolerance*float64(selections)) | ||
low, high = expected-delta, expected+delta | ||
for peer, count := range seen { | ||
if peer == peers[0] { | ||
// we have an *extremely* low probability of selecting this node | ||
// (like 0.005%) once. Selecting this more than a few times will | ||
// fail the test. | ||
if count > 3 { | ||
t.Fatalf("downweighted peer should not be selected, selected %v times", count) | ||
} | ||
} | ||
|
||
if !(count >= low && count <= high) { | ||
t.Fatalf("weighted selection not balanced: %v selected %v/%v, expected range %v, %v", peer, count, selections, low, high) | ||
} | ||
} | ||
} | ||
|
||
var peers = []api.Peer{ | ||
{Addr: "one"}, {Addr: "two"}, {Addr: "three"}, | ||
{Addr: "four"}, {Addr: "five"}, {Addr: "six"}, | ||
|
@@ -320,6 +381,6 @@ func benchmarkRemotesObserve(b *testing.B, peers ...api.Peer) { | |
remotes := NewRemotes(peers...) | ||
|
||
for i := 0; i < b.N; i++ { | ||
remotes.Observe(peers[i%len(peers)], 1.0) | ||
remotes.Observe(peers[i%len(peers)], DefaultObservationWeight) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe remove the TODO now that we are taking a closer look at this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is still questionable... and we're probably getting rid of this after the move to grpc.LoadBalancer.