Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #54377 #54702

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
76 changes: 48 additions & 28 deletions test/e2e/storage/volume_metrics.go
Expand Up @@ -97,33 +97,9 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))

backoff := wait.Backoff{
Duration: 10 * time.Second,
Factor: 1.2,
Steps: 21,
}

updatedStorageMetrics := make(map[string]int64)

waitErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber)

if err != nil {
framework.Logf("Error fetching controller-manager metrics")
return false, err
}
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
metricCount := len(updatedStorageMetrics)
// Usually a pod deletion does not mean immediate volume detach
// we will have to retry to verify volume_detach metrics
_, detachMetricFound := updatedStorageMetrics["volume_detach"]
if metricCount < 3 || !detachMetricFound {
framework.Logf("Volume metrics not collected yet, going to retry")
return false, nil
}
return true, nil
})
Expect(waitErr).NotTo(HaveOccurred(), "Error fetching storage c-m metrics : %v", waitErr)
Expect(len(updatedStorageMetrics)).ToNot(Equal(0), "Error fetching c-m updated storage metrics")

volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}

Expand Down Expand Up @@ -176,6 +152,48 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
})
})

func waitForDetachAndGrabMetrics(oldMetrics map[string]int64, metricsGrabber *metrics.MetricsGrabber) map[string]int64 {
backoff := wait.Backoff{
Duration: 10 * time.Second,
Factor: 1.2,
Steps: 21,
}

updatedStorageMetrics := make(map[string]int64)
oldDetachCount, ok := oldMetrics["volume_detach"]
if !ok {
oldDetachCount = 0
}

verifyMetricFunc := func() (bool, error) {
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()

if err != nil {
framework.Logf("Error fetching controller-manager metrics")
return false, err
}

updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
newDetachCount, ok := updatedStorageMetrics["volume_detach"]

// if detach metrics are not yet there, we need to retry
if !ok {
return false, nil
}

// if old Detach count is more or equal to new detach count, that means detach
// event has not been observed yet.
if oldDetachCount >= newDetachCount {
return false, nil
}
return true, nil
}

waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
Expect(waitErr).NotTo(HaveOccurred(), "Timeout error fetching storage c-m metrics : %v", waitErr)
return updatedStorageMetrics
}

func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) {
oldCount, ok := oldMetrics[metricName]
// if metric does not exist in oldMap, it probably hasn't been emitted yet.
Expand All @@ -185,8 +203,10 @@ func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64,

newCount, ok := newMetrics[metricName]
Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName)

Expect(oldCount + 1).To(Equal(newCount))
// It appears that in a busy cluster some spurious detaches are unavoidable
// even if the test is run serially. We really just verify if new count
// is greater than old count
Expect(newCount).To(BeNumerically(">", oldCount), "New count %d should be more than old count %d for action %s", newCount, oldCount, metricName)
}

func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {
Expand Down