From b2a926541119df9019f73cf313c90338f16c1edf Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 27 Sep 2023 12:06:36 -0700 Subject: [PATCH] pkg/clusterconditions/cache: Avoid panic on all-fresh-cache evaluation When: * It has been longer than MinBetweenMatches since our last wrapped evaluation, * There is no alternative key stale enough (MinForCondition) to steal the evaluation, and * The currently requested key is not cached yet, e.g. because it was recently declared as a new risk to a cluster-version operator which had recently run all the other relevent risks from the current Cincinnati response. avoid crashing with: $ grep -B1 -A15 'too fresh' previous.log I0927 12:07:55.594222 1 cincinnati.go:114] Using a root CA pool with 0 root CA subjects to request updates from https://raw.githubusercontent.com/shellyyang1989/upgrade-cincy/master/cincy-conditional-edge-invalid-promql.json?arch=amd64&channel=stable-4.15&id=dc628f75-7778-457a-bb69-6a31a243c3a9&version=4.15.0-0.test-2023-09-27-091926-ci-ln-01zw7kk-latest I0927 12:07:55.726463 1 cache.go:118] {"type":"PromQL","promql":{"promql":"0 * group(cluster_version)"}} is the most stale cached cluster-condition match entry, but it is too fresh (last evaluated on 2023-09-27 11:37:25.876804482 +0000 UTC m=+175.082381015). However, we don't have a cached evaluation for {"type":"PromQL","promql":{"promql":"group(cluster_version_available_updates{channel=buggy})"}}, so attempt to evaluate that now. I0927 12:07:55.726602 1 cache.go:129] {"type":"PromQL","promql":{"promql":"0 * group(cluster_version)"}} is stealing this cluster-condition match call for {"type":"PromQL","promql":{"promql":"group(cluster_version_available_updates{channel=buggy})"}}, because its last evaluation completed 30m29.849594461s ago I0927 12:07:55.758573 1 cvo.go:703] Finished syncing available updates "openshift-cluster-version/version" (170.074319ms) E0927 12:07:55.758847 1 runtime.go:79] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference) goroutine 194 [running]: k8s.io/apimachinery/pkg/util/runtime.logPanic({0x1c4df00?, 0x32abc60}) /go/src/github.com/openshift/cluster-version-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:75 +0x99 k8s.io/apimachinery/pkg/util/runtime.HandleCrash({0x0, 0x0, 0xc001489d40?}) /go/src/github.com/openshift/cluster-version-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:49 +0x75 panic({0x1c4df00, 0x32abc60}) /usr/lib/golang/src/runtime/panic.go:884 +0x213 github.com/openshift/cluster-version-operator/pkg/clusterconditions/promql.(*PromQL).Match(0xc0004860e0, {0x220ded8, 0xc00041e550}, 0x0) /go/src/github.com/openshift/cluster-version-operator/pkg/clusterconditions/promql/promql.go:134 +0x419 github.com/openshift/cluster-version-operator/pkg/clusterconditions/cache.(*Cache).Match(0xc0002d3ae0, {0x220ded8, 0xc00041e550}, 0xc0033948d0) /go/src/github.com/openshift/cluster-version-operator/pkg/clusterconditions/cache/cache.go:132 +0x982 github.com/openshift/cluster-version-operator/pkg/clusterconditions.(*conditionRegistry).Match(0xc000016760, {0x220ded8, 0xc00041e550}, {0xc0033948a0, 0x1, 0x0?}) --- pkg/clusterconditions/cache/cache.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/clusterconditions/cache/cache.go b/pkg/clusterconditions/cache/cache.go index 5d5802b16..71e9b3927 100644 --- a/pkg/clusterconditions/cache/cache.go +++ b/pkg/clusterconditions/cache/cache.go @@ -116,6 +116,8 @@ func (c *Cache) Match(ctx context.Context, condition *configv1.ClusterCondition) detail = fmt.Sprintf(" (last evaluated on %s)", thiefResult.When) } klog.V(2).Infof("%s is the most stale cached cluster-condition match entry, but it is too fresh%s. However, we don't have a cached evaluation for %s, so attempt to evaluate that now.", thiefKey, detail, key) + thiefKey = key + targetCondition = condition } // if we ended up stealing this Match call, log that, to make contention more clear