Skip to content

Commit

Permalink
kv: try next replica on RangeNotFoundError
Browse files Browse the repository at this point in the history
Previously, if a Batch RPC came back with a RangeNotFoundError,
we would immediately stop trying to send to more replicas, evict the
range descriptor, and start a new attempt after a back-off.

This new attempt could end up using the same replica, so if the
RangeNotFoundError persisted for some amount of time, so would the
unsuccessful retries for requests to it as DistSender doesn't
aggressively shuffle the replicas.

It turns out that there are such situations, and the
election-after-restart roachtest spuriously hit one of them:

1. new replica receives a preemptive snapshot and the ConfChange
2. cluster restarts
3. now the new replica is in this state until the range wakes
   up, which may not happen for some time.
4. the first request to the range runs into the above problem

@nvanbenschoten: I think there is an issue to be filed about the
tendency of DistSender to get stuck in unfortunate configurations.

Fixes cockroachdb#30613.

Release note (bug fix): Avoid repeatedly trying a replica that was found
to be in the process of being added.
  • Loading branch information
tbg committed Oct 12, 2018
1 parent 9998c7a commit 6aef2e4
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 11 deletions.
35 changes: 29 additions & 6 deletions pkg/kv/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,7 @@ func (ds *DistSender) sendPartialBatch(
// they're all down, or we're using an out-of-date range
// descriptor. Invalidate the cache and try again with the new
// metadata.
log.Eventf(ctx, "evicting range descriptor on %T and backoff for re-lookup: %+v", tErr, desc)
log.VEventf(ctx, 1, "evicting range descriptor on %T and backoff for re-lookup: %+v", tErr, desc)
if err := evictToken.Evict(ctx); err != nil {
return response{pErr: roachpb.NewError(err)}
}
Expand Down Expand Up @@ -1341,25 +1341,48 @@ func (ds *DistSender) sendToReplicas(
}
}
} else {
// NB: This section of code may have unfortunate performance implications. If we
// exit the below type switch with propagateError remaining at `false`, we'll try
// more replicas. That may succeed and future requests might do the same thing over
// and over again, adding needless round-trips to the earlier replicas.
propagateError := false
switch tErr := br.Error.GetDetail().(type) {
case nil:
return br, nil
case *roachpb.StoreNotFoundError, *roachpb.NodeUnavailableError:
// These errors are likely to be unique to the replica that reported
// them, so no action is required before the next retry.
case *roachpb.RangeNotFoundError:
// The store we routed to doesn't have this replica. This can happen when
// our descriptor is outright outdated, but it can also be caused by a
// replica that has just been added but needs a snapshot to be caught up.
//
// Evict this replica from the lease holder cache, if applicable, and try
// the next replica. It is important that we do the latter, for the next
// retry might otherwise try the same replica again (assuming the replica is
// still in the descriptor), looping endlessly until the replica is caught
// up (which may never happen if the target range is dormant).
if tErr.StoreID != 0 {
cachedStoreID, found := ds.leaseHolderCache.Lookup(ctx, tErr.RangeID)
evicting := found && cachedStoreID == tErr.StoreID
if evicting {
log.Eventf(ctx, "evicting leaseholder s%d for r%d after RangeNotFoundError", tErr.StoreID, tErr.RangeID)
ds.leaseHolderCache.Update(ctx, tErr.RangeID, 0 /* evict */)
}

}
case *roachpb.NotLeaseHolderError:
ds.metrics.NotLeaseHolderErrCount.Inc(1)
if lh := tErr.LeaseHolder; lh != nil {
// If the replica we contacted knows the new lease holder, update the cache.
ds.leaseHolderCache.Update(ctx, rangeID, lh.StoreID)

// If the implicated leaseholder is not a known replica,
// return a RangeNotFoundError to signal eviction of the
// cached RangeDescriptor and re-send.
// If the implicated leaseholder is not a known replica, return a SendError
// to signal eviction of the cached RangeDescriptor and re-send.
if replicas.FindReplica(lh.StoreID) == -1 {
// Replace NotLeaseHolderError with RangeNotFoundError.
br.Error = roachpb.NewError(roachpb.NewRangeNotFoundError(rangeID, curReplica.StoreID))
br.Error = roachpb.NewError(roachpb.NewSendError(fmt.Sprintf(
"leaseholder s%d (via %+v) not in cached replicas %v", lh.StoreID, curReplica, replicas,
)))
propagateError = true
} else {
// Move the new lease holder to the head of the queue for the next retry.
Expand Down
84 changes: 84 additions & 0 deletions pkg/kv/dist_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,90 @@ func TestSendRPCRetry(t *testing.T) {
}
}

// This test reproduces the main problem in:
// https://github.com/cockroachdb/cockroach/issues/30613.
// by verifying that if a RangeNotFoundError is returned from a Replica,
// the next Replica is tried.
func TestSendRPCRangeNotFoundError(t *testing.T) {
defer leaktest.AfterTest(t)()
stopper := stop.NewStopper()
defer stopper.Stop(context.TODO())

g, clock := makeGossip(t, stopper)
if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil {
t.Fatal(err)
}

// Fill RangeDescriptor with three replicas.
var descriptor = roachpb.RangeDescriptor{
RangeID: 1,
StartKey: roachpb.RKey("a"),
EndKey: roachpb.RKey("z"),
}
for i := 1; i <= 3; i++ {
addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i))
nd := &roachpb.NodeDescriptor{
NodeID: roachpb.NodeID(i),
Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
}
if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil {
t.Fatal(err)
}

descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{
NodeID: roachpb.NodeID(i),
StoreID: roachpb.StoreID(i),
ReplicaID: roachpb.ReplicaID(i),
})
}
descDB := mockRangeDescriptorDBForDescs(
testMetaRangeDescriptor,
descriptor,
)

seen := map[roachpb.ReplicaID]struct{}{}
var ds *DistSender
var testFn simpleSendFn = func(
_ context.Context,
_ SendOptions,
_ ReplicaSlice,
ba roachpb.BatchRequest,
) (*roachpb.BatchResponse, error) {
br := ba.CreateReply()
if _, ok := seen[ba.Replica.ReplicaID]; ok {
br.Error = roachpb.NewErrorf("visited replica %+v twice", ba.Replica)
return br, nil
}
seen[ba.Replica.ReplicaID] = struct{}{}
if len(seen) <= 2 {
if len(seen) == 1 {
// Add to the leaseholder cache to verify that the response evicts it.
ds.leaseHolderCache.Update(context.Background(), ba.RangeID, ba.Replica.StoreID)
}
br.Error = roachpb.NewError(roachpb.NewRangeNotFoundError(ba.RangeID, ba.Replica.StoreID))
return br, nil
}
return br, nil
}
cfg := DistSenderConfig{
AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
Clock: clock,
TestingKnobs: ClientTestingKnobs{
TransportFactory: adaptSimpleTransport(testFn),
},
RangeDescriptorDB: descDB,
}
ds = NewDistSender(cfg, g)
get := roachpb.NewGet(roachpb.Key("b"))
_, err := client.SendWrapped(context.Background(), ds, get)
if err != nil {
t.Fatal(err)
}
if storeID, found := ds.leaseHolderCache.Lookup(context.Background(), roachpb.RangeID(1)); found {
t.Fatalf("unexpected cached leaseholder s%d", storeID)
}
}

// TestGetNodeDescriptor checks that the Node descriptor automatically gets
// looked up from Gossip.
func TestGetNodeDescriptor(t *testing.T) {
Expand Down
5 changes: 0 additions & 5 deletions pkg/storage/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,11 +382,6 @@ func TestStoreInitAndBootstrap(t *testing.T) {
t.Fatalf("unable to read store ident: %s", err)
}

// Try to get 1st range--non-existent.
if _, err := store.GetReplica(1); err == nil {
t.Error("expected error fetching non-existent range")
}

// Bootstrap first range.
if err := store.BootstrapRange(nil, cfg.Settings.Version.ServerVersion); err != nil {
t.Errorf("failure to create first range: %s", err)
Expand Down

0 comments on commit 6aef2e4

Please sign in to comment.