kvserver: observability for estimated MVCC stats during splits

This patch adds logging and two new metrics to track estimated MVCC stats computed during splits. `kv.split.estimated_stats`: the number of splits that computed estimated MVCC stats, as opposed to 100% accurate ones. `kv.split.total_bytes_estimates`: the number of total bytes of estimates introduced by splits. These are calculated as the difference between the pre-computed stats before the split and the stored stats during the split (while holding latches). Fixes: cockroachdb#119516 Release note (ops change): Two new metrics (kv.split.estimated_stats and kv.split.total_bytes_estimates) added to track the number of splits that produce MVCC stats estimates, and the total bytes of estimates produced.
msbutler · Mar 19, 2024 · af328f4 · af328f4
1 parent 6b4d481
commit af328f4
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 12 deletions.
diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html
@@ -247,6 +247,8 @@
 <tr><td>STORAGE</td><td>kv.replica_read_batch_evaluate.latency</td><td>Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.<br/><br/>A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.</td><td>Nanoseconds</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
 <tr><td>STORAGE</td><td>kv.replica_read_batch_evaluate.without_interleaving_iter</td><td>Number of read-only batches evaluated without an intent interleaving iter.</td><td>Batches</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
 <tr><td>STORAGE</td><td>kv.replica_write_batch_evaluate.latency</td><td>Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.<br/><br/>A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.<br/>Note that the measurement does not include the duration for replicating the evaluated command.</td><td>Nanoseconds</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
+<tr><td>STORAGE</td><td>kv.split.estimated_stats</td><td>Number of splits that computed estimated MVCC stats.</td><td>Events</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
+<tr><td>STORAGE</td><td>kv.split.total_bytes_estimates</td><td>Number of total bytes difference between the pre-split and post-split MVCC stats.</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
 <tr><td>STORAGE</td><td>kv.tenant_rate_limit.current_blocked</td><td>Number of requests currently blocked by the rate limiter</td><td>Requests</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
 <tr><td>STORAGE</td><td>kv.tenant_rate_limit.num_tenants</td><td>Number of tenants currently being tracked</td><td>Tenants</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
 <tr><td>STORAGE</td><td>kv.tenant_rate_limit.read_batches_admitted</td><td>Number of read batches admitted by the rate limiter</td><td>Requests</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>

diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go
@@ -1199,6 +1199,17 @@ func splitTriggerHelper(
 		statsInput.PreSplitStats, statsInput.MaxCountDiff, statsInput.MaxBytesDiff)
 
 	if noPreComputedStats || emptyLeftOrRight || preComputedStatsDiff {
+		var reason redact.RedactableString
+		if noPreComputedStats {
+			reason = "there are no pre-split LHS stats (or they're empty)"
+		} else if emptyLeftOrRight {
+			reason = "the in-split LHS or RHS is empty"
+		} else {
+			reason = redact.Sprintf("the pre-split user stats differ too much "+
+				"from the in-split stats; pre-split: %+v, in-split: %+v",
+				statsInput.PreSplitStats, statsInput.AbsPreSplitBothStored)
+		}
+		log.Infof(ctx, "falling back to accurate stats computation because %v", reason)
 		h, err = makeSplitStatsHelper(statsInput)
 	} else {
 		h, err = makeEstimatedSplitStatsHelper(statsInput)
@@ -1321,6 +1332,10 @@ func splitTriggerHelper(
 		RHSDelta: *h.AbsPostSplitRight(),
 	}
 
+	pd.Local.Metrics = &result.Metrics{
+		SplitsWithEstimatedStats:     h.splitsWithEstimates,
+		SplitEstimatedTotalBytesDiff: h.estimatedTotalBytesDiff,
+	}
 	deltaPostSplitLeft := h.DeltaPostSplitLeft()
 	return deltaPostSplitLeft, pd, nil
 }

diff --git a/pkg/kv/kvserver/batcheval/result/metrics.go b/pkg/kv/kvserver/batcheval/result/metrics.go
@@ -13,14 +13,16 @@ package result
 // Metrics tracks various counters related to command applications and
 // their outcomes.
 type Metrics struct {
-	LeaseRequestSuccess  int // lease request evaluated successfully
-	LeaseRequestError    int // lease request error at evaluation time
-	LeaseTransferSuccess int // lease transfer evaluated successfully
-	LeaseTransferError   int // lease transfer error at evaluation time
-	ResolveCommit        int // intent commit evaluated successfully
-	ResolveAbort         int // non-poisoning intent abort evaluated successfully
-	ResolvePoison        int // poisoning intent abort evaluated successfully
-	AddSSTableAsWrites   int // AddSSTable requests with IngestAsWrites set
+	LeaseRequestSuccess          int // lease request evaluated successfully
+	LeaseRequestError            int // lease request error at evaluation time
+	LeaseTransferSuccess         int // lease transfer evaluated successfully
+	LeaseTransferError           int // lease transfer error at evaluation time
+	ResolveCommit                int // intent commit evaluated successfully
+	ResolveAbort                 int // non-poisoning intent abort evaluated successfully
+	ResolvePoison                int // poisoning intent abort evaluated successfully
+	AddSSTableAsWrites           int // AddSSTable requests with IngestAsWrites set
+	SplitsWithEstimatedStats     int // Splits that computed stats estimates
+	SplitEstimatedTotalBytesDiff int // Difference between pre- and post-split total bytes.
 }
 
 // Add absorbs the supplied Metrics into the receiver.
@@ -33,4 +35,6 @@ func (mt *Metrics) Add(o Metrics) {
 	mt.ResolveAbort += o.ResolveAbort
 	mt.ResolvePoison += o.ResolvePoison
 	mt.AddSSTableAsWrites += o.AddSSTableAsWrites
+	mt.SplitsWithEstimatedStats += o.SplitsWithEstimatedStats
+	mt.SplitEstimatedTotalBytesDiff += o.SplitEstimatedTotalBytesDiff
 }
diff --git a/pkg/kv/kvserver/batcheval/split_stats_helper.go b/pkg/kv/kvserver/batcheval/split_stats_helper.go
@@ -10,7 +10,11 @@
 
 package batcheval
 
-import "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
+import (
+	"math"
+
+	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
+)
 
 // splitStatsHelper codifies and explains the stats computations related to a
 // split. The quantities known during a split (i.e. while the split trigger
@@ -109,8 +113,10 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
 type splitStatsHelper struct {
 	in splitStatsHelperInput
 
-	absPostSplitLeft  *enginepb.MVCCStats
-	absPostSplitRight *enginepb.MVCCStats
+	absPostSplitLeft        *enginepb.MVCCStats
+	absPostSplitRight       *enginepb.MVCCStats
+	splitsWithEstimates     int
+	estimatedTotalBytesDiff int
 }
 
 // splitStatsScanFn scans a post-split keyspace to compute its stats. The
@@ -277,6 +283,9 @@ func makeEstimatedSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelpe
 	h.absPostSplitLeft.ContainsEstimates++
 	h.absPostSplitRight.ContainsEstimates++
 
+	h.splitsWithEstimates = 1
+	h.estimatedTotalBytesDiff = int(math.Abs(
+		float64(h.in.AbsPreSplitBothStored.Total()) - float64(h.in.PreSplitStats.Total())))
 	return h, nil
 }
 

diff --git a/pkg/kv/kvserver/client_metrics_test.go b/pkg/kv/kvserver/client_metrics_test.go
@@ -160,7 +160,7 @@ func TestStoreResolveMetrics(t *testing.T) {
 	// them everywhere.
 	{
 		act := fmt.Sprintf("%+v", result.Metrics{})
-		exp := "{LeaseRequestSuccess:0 LeaseRequestError:0 LeaseTransferSuccess:0 LeaseTransferError:0 ResolveCommit:0 ResolveAbort:0 ResolvePoison:0 AddSSTableAsWrites:0}"
+		exp := "{LeaseRequestSuccess:0 LeaseRequestError:0 LeaseTransferSuccess:0 LeaseTransferError:0 ResolveCommit:0 ResolveAbort:0 ResolvePoison:0 AddSSTableAsWrites:0 SplitsWithEstimatedStats:0 SplitEstimatedTotalBytesDiff:0}"
 		if act != exp {
 			t.Errorf("need to update this test due to added fields: %v", act)
 		}

diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go
@@ -2299,6 +2299,20 @@ Note that the measurement does not include the duration for replicating the eval
 		Unit:        metric.Unit_COUNT,
 	}
 
+	metaSplitEstimatedStats = metric.Metadata{
+		Name:        "kv.split.estimated_stats",
+		Help:        "Number of splits that computed estimated MVCC stats.",
+		Measurement: "Events",
+		Unit:        metric.Unit_COUNT,
+	}
+
+	metaSplitEstimatedTotalBytesDiff = metric.Metadata{
+		Name:        "kv.split.total_bytes_estimates",
+		Help:        "Number of total bytes difference between the pre-split and post-split MVCC stats.",
+		Measurement: "Bytes",
+		Unit:        metric.Unit_BYTES,
+	}
+
 	metaStorageFlushUtilization = metric.Metadata{
 		Name:        "storage.flush.utilization",
 		Help:        "The percentage of time the storage engine is actively flushing memtables to disk.",
@@ -2803,6 +2817,9 @@ type StoreMetrics struct {
 	ReplicaReadBatchDroppedLatchesBeforeEval *metric.Counter
 	ReplicaReadBatchWithoutInterleavingIter  *metric.Counter
 
+	SplitsWithEstimatedStats     *metric.Counter
+	SplitEstimatedTotalBytesDiff *metric.Counter
+
 	FlushUtilization *metric.GaugeFloat64
 	FsyncLatency     *metric.ManualWindowHistogram
 
@@ -3567,6 +3584,10 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
 		DiskIOTime:         metric.NewGauge(metaDiskIOTime),
 		DiskWeightedIOTime: metric.NewGauge(metaDiskWeightedIOTime),
 		IopsInProgress:     metric.NewGauge(metaIopsInProgress),
+
+		// Estimated MVCC stats in split.
+		SplitsWithEstimatedStats:     metric.NewCounter(metaSplitEstimatedStats),
+		SplitEstimatedTotalBytesDiff: metric.NewCounter(metaSplitEstimatedTotalBytesDiff),
 	}
 
 	storeRegistry.AddMetricStruct(sm)
@@ -3808,6 +3829,12 @@ func (sm *StoreMetrics) handleMetricsResult(ctx context.Context, metric result.M
 	sm.AddSSTableAsWrites.Inc(int64(metric.AddSSTableAsWrites))
 	metric.AddSSTableAsWrites = 0
 
+	sm.SplitsWithEstimatedStats.Inc(int64(metric.SplitsWithEstimatedStats))
+	metric.SplitsWithEstimatedStats = 0
+
+	sm.SplitEstimatedTotalBytesDiff.Inc(int64(metric.SplitEstimatedTotalBytesDiff))
+	metric.SplitEstimatedTotalBytesDiff = 0
+
 	if metric != (result.Metrics{}) {
 		log.Fatalf(ctx, "unhandled fields in metrics result: %+v", metric)
 	}