From 3c53046fb148157e2797435a26e7dd8fca564f52 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 19 Aug 2021 13:36:07 +0200 Subject: [PATCH] Fix deadlock in recursive metric locks RecordMetrics() grabs a mutex and calls recordCancelMetric(), which wants to grab the same mutex. Go mutexes are not recursive, so recordCancelMetric blocks forever. recordCancelMetric should not grab the mutex, it can be sure that the caller did it already. --- pkg/metrics/metrics.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index c7f5f8950..82188e450 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -231,7 +231,7 @@ func (opMgr *operationMetricsManager) RecordMetrics(opKey OperationKey, opStatus obj, exists := opMgr.cache[createKey] if exists { // record a cancel metric if found - opMgr.recordCancelMetric(obj, createKey, operationDuration) + opMgr.recordCancelMetricLocked(obj, createKey, operationDuration) } // check if we have a CreateSnapshotAndReady operation pending for this @@ -239,7 +239,7 @@ func (opMgr *operationMetricsManager) RecordMetrics(opKey OperationKey, opStatus obj, exists = opMgr.cache[createAndReadyKey] if exists { // record a cancel metric if found - opMgr.recordCancelMetric(obj, createAndReadyKey, operationDuration) + opMgr.recordCancelMetricLocked(obj, createAndReadyKey, operationDuration) } } @@ -248,9 +248,8 @@ func (opMgr *operationMetricsManager) RecordMetrics(opKey OperationKey, opStatus } // recordCancelMetric records a metric for a create operation that hasn't finished -func (opMgr *operationMetricsManager) recordCancelMetric(val OperationValue, key OperationKey, duration float64) { - opMgr.mu.Lock() - defer opMgr.mu.Unlock() +// This function must be called with opMgr mutex locked (to prevent recursive locks). +func (opMgr *operationMetricsManager) recordCancelMetricLocked(val OperationValue, key OperationKey, duration float64) { // record a cancel metric if found opMgr.opLatencyMetrics.WithLabelValues(