Skip to content

Commit

Permalink
storage: trigger GC based on SysCount/SysBytes
Browse files Browse the repository at this point in the history
If there is "a lot" of data in Sys{Bytes,Count}, then we are likely
experiencing a large abort span. The abort span is not supposed to
become that large, but it does happen and causes stability fallout,
usually due to a combination of shortcomings:

1. there's no trigger for GC based on abort span size alone (before
   this commit)
2. transaction aborts tended to create unnecessary abort span entries,
   fixed (and 19.2-backported) in:
   cockroachdb#42765
3. aborting transactions in a busy loop:
   cockroachdb#38088
   (and we suspect this also happens in user apps occasionally)
4. large snapshots would never complete due to the queue time limits
   (addressed in cockroachdb#44952).

In an ideal world, we would factor in the abort span into this method
directly, but until then the condition guarding this block will do.
At worst, there is some other reason for SysBytes become that large
while also incurring a large SysCount, but I'm not sure how this
would happen. The only other things in this span are the versioned
range descriptors (which follow regular GC and it's only ever adding
a SysCount of one), or transaction records (which expire like abort
span records).

Release note (bug fix): Range garbage collection will now trigger
based on a large abort span, adding defense-in-depth against ranges
growing large (and eventually unstable).
  • Loading branch information
tbg committed Mar 4, 2020
1 parent 860c137 commit 7bff2f5
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 7 deletions.
43 changes: 39 additions & 4 deletions pkg/storage/gc_queue.go
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/storage/gc"
"github.com/cockroachdb/cockroach/pkg/storage/storagebase"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand All @@ -42,8 +43,34 @@ const (
// on keys and intents.
gcKeyScoreThreshold = 2
gcIntentScoreThreshold = 10

probablyLargeAbortSpanSysCountThreshold = 10000
probablyLargeAbortSpanSysBytesThreshold = 16 * (1 << 20) // 16mb
)

func probablyLargeAbortSpan(ms enginepb.MVCCStats) bool {
// If there is "a lot" of data in Sys{Bytes,Count}, then we are likely
// experiencing a large abort span. The abort span is not supposed to
// become that large, but it does happen and causes stability fallout,
// usually due to a combination of shortcomings:
//
// 1. there's no trigger for GC based on abort span size alone (before
// this code block here was written)
// 2. transaction aborts tended to create unnecessary abort span entries,
// fixed (and 19.2-backported) in:
// https://github.com/cockroachdb/cockroach/pull/42765
// 3. aborting transactions in a busy loop:
// https://github.com/cockroachdb/cockroach/issues/38088
// (and we suspect this also happens in user apps occasionally)
// 4. large snapshots would never complete due to the queue time limits
// (addressed in https://github.com/cockroachdb/cockroach/pull/44952).
//
// In an ideal world, we would factor in the abort span into this method
// directly, but until then the condition guarding this block will do.
return ms.SysCount >= probablyLargeAbortSpanSysCountThreshold &&
ms.SysBytes >= probablyLargeAbortSpanSysBytesThreshold
}

// gcQueue manages a queue of replicas slated to be scanned in their
// entirety using the MVCC versions iterator. The gc queue manages the
// following tasks:
Expand Down Expand Up @@ -147,11 +174,8 @@ func makeGCQueueScore(
// have slightly different priorities and even symmetrical workloads don't
// trigger GC at the same time.
r := makeGCQueueScoreImpl(
ctx, int64(repl.RangeID), now, ms, policy,
ctx, int64(repl.RangeID), now, ms, policy, gcThreshold,
)
if (gcThreshold != hlc.Timestamp{}) {
r.LikelyLastGC = time.Duration(now.WallTime - gcThreshold.Add(r.TTL.Nanoseconds(), 0).WallTime)
}
return r
}

Expand Down Expand Up @@ -249,9 +273,14 @@ func makeGCQueueScoreImpl(
now hlc.Timestamp,
ms enginepb.MVCCStats,
policy zonepb.GCPolicy,
gcThreshold hlc.Timestamp,
) gcQueueScore {
ms.Forward(now.WallTime)
var r gcQueueScore
if (gcThreshold != hlc.Timestamp{}) {
r.LikelyLastGC = time.Duration(now.WallTime - gcThreshold.Add(r.TTL.Nanoseconds(), 0).WallTime)
}

r.TTL = policy.TTL()

// Treat a zero TTL as a one-second TTL, which avoids a priority of infinity
Expand Down Expand Up @@ -314,6 +343,12 @@ func makeGCQueueScoreImpl(
r.ShouldQueue = r.FuzzFactor*valScore > gcKeyScoreThreshold || r.FuzzFactor*r.IntentScore > gcIntentScoreThreshold
r.FinalScore = r.FuzzFactor * (valScore + r.IntentScore)

if probablyLargeAbortSpan(ms) && !r.ShouldQueue &&
(r.LikelyLastGC == 0 || r.LikelyLastGC > storagebase.TxnCleanupThreshold) {
r.ShouldQueue = true
r.FinalScore++
}

return r
}

Expand Down
36 changes: 33 additions & 3 deletions pkg/storage/gc_queue_test.go
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/kr/pretty"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"
"golang.org/x/sync/syncmap"
)

Expand Down Expand Up @@ -105,7 +106,7 @@ func TestGCQueueMakeGCScoreInvariantQuick(t *testing.T) {
GCBytesAge: gcByteAge,
}
now := initialNow.Add(timePassed.Nanoseconds(), 0)
r := makeGCQueueScoreImpl(ctx, int64(seed), now, ms, zonepb.GCPolicy{TTLSeconds: ttlSec})
r := makeGCQueueScoreImpl(ctx, int64(seed), now, ms, zonepb.GCPolicy{TTLSeconds: ttlSec}, hlc.Timestamp{})
wouldHaveToDeleteSomething := gcBytes*int64(ttlSec) < ms.GCByteAge(now.WallTime)
result := !r.ShouldQueue || wouldHaveToDeleteSomething
if !result {
Expand All @@ -126,13 +127,42 @@ func TestGCQueueMakeGCScoreAnomalousStats(t *testing.T) {
LiveBytes: int64(liveBytes),
ValBytes: int64(valBytes),
KeyBytes: int64(keyBytes),
}, zonepb.GCPolicy{TTLSeconds: 60})
}, zonepb.GCPolicy{TTLSeconds: 60}, hlc.Timestamp{})
return r.DeadFraction >= 0 && r.DeadFraction <= 1
}, &quick.Config{MaxCount: 1000}); err != nil {
t.Fatal(err)
}
}

func TestGCQueueMakeGCScoreLargeAbortSpan(t *testing.T) {
defer leaktest.AfterTest(t)()
const seed = 1
var ms enginepb.MVCCStats
ms.SysCount += probablyLargeAbortSpanSysCountThreshold
ms.SysBytes += probablyLargeAbortSpanSysBytesThreshold

thresh := storagebase.TxnCleanupThreshold.Nanoseconds()

// GC triggered if abort span should all be gc'able and it's likely large.
{
r := makeGCQueueScoreImpl(
context.Background(), seed,
hlc.Timestamp{WallTime: thresh + 1},
ms, zonepb.GCPolicy{TTLSeconds: 10000},
hlc.Timestamp{WallTime: 1},
)
require.True(t, r.ShouldQueue)
require.NotZero(t, r.FinalScore)
}

// Heuristic doesn't fire if likely last GC within TxnCleanupThreshold.
{
r := makeGCQueueScoreImpl(context.Background(), seed, hlc.Timestamp{WallTime: storagebase.TxnCleanupThreshold.Nanoseconds()}, ms, zonepb.GCPolicy{TTLSeconds: 10000}, hlc.Timestamp{WallTime: 1})
require.False(t, r.ShouldQueue)
require.Zero(t, r.FinalScore)
}
}

const cacheFirstLen = 3

type gcTestCacheKey struct {
Expand Down Expand Up @@ -248,7 +278,7 @@ func (cws *cachedWriteSimulator) shouldQueue(
ts := hlc.Timestamp{}.Add(ms.LastUpdateNanos+after.Nanoseconds(), 0)
r := makeGCQueueScoreImpl(context.Background(), 0 /* seed */, ts, ms, zonepb.GCPolicy{
TTLSeconds: int32(ttl.Seconds()),
})
}, hlc.Timestamp{})
if fmt.Sprintf("%.2f", r.FinalScore) != fmt.Sprintf("%.2f", prio) || b != r.ShouldQueue {
cws.t.Errorf("expected queued=%t (is %t), prio=%.2f, got %.2f: after=%s, ttl=%s:\nms: %+v\nscore: %s",
b, r.ShouldQueue, prio, r.FinalScore, after, ttl, ms, r)
Expand Down

0 comments on commit 7bff2f5

Please sign in to comment.