From 01622a4b648cf95aae51f279515c2b58d09793a1 Mon Sep 17 00:00:00 2001 From: Ben Pickard Date: Fri, 14 Oct 2022 11:51:06 -0400 Subject: [PATCH] refactor retry to close watchFactory leverage stop channel on watch factory when periodic retry stopchannel is called Problem: Hybrid test was creating creating an ovn-controller for some tests. The tests could not use the waitGroup in the test so the afterEach function (cleanup function after each test) could execute while leaving controllers up. In the tests, we were leveraging the code in ovn.go to run the controllers and creating a watcher pod on them. This issue was just a symptom of the main issue which was that we were using the incorrect stopChannel in the retry logic. Solution: The stopChannel that we use in the watchFactory is private, so we created a function that when called will close the watcher stopChannel, closing the watcher. We then call this function in the select (basically a switch statement in go but for channels) to close the watcher, because when we stop watching an object, that means we no longer care about it and should stop retrying it. Signed-off-by: Ben Pickard Closes: #3203 --- go-controller/pkg/factory/factory.go | 5 +++++ go-controller/pkg/retry/obj_retry.go | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index b35c1db1679..d5209c5d63d 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -346,6 +346,11 @@ func NewNodeWatchFactory(ovnClientset *util.OVNClientset, nodeName string) (*Wat return wf, nil } +func (wf *WatchFactory) WaitForWatchFactoryStopChannel(stopChan chan struct{}) { + <-wf.stopChan + close(stopChan) +} + func (wf *WatchFactory) Shutdown() { close(wf.stopChan) diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index ddc6f4db810..7275d9c3f02 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -364,6 +364,10 @@ func (r *RetryFramework) iterateRetryResources() { // RetryObjInterval seconds or when requested through retryChan. func (r *RetryFramework) periodicallyRetryResources() { timer := time.NewTicker(RetryObjInterval) + waitCh := make(chan struct{}) + go func() { + r.watchFactory.WaitForWatchFactoryStopChannel(waitCh) + }() defer timer.Stop() for { select { @@ -375,7 +379,7 @@ func (r *RetryFramework) periodicallyRetryResources() { r.iterateRetryResources() timer.Reset(RetryObjInterval) - case <-r.StopChan: + case <-waitCh: klog.V(5).Infof("Stop channel got triggered: will stop retrying failed objects of type %s", r.ResourceHandler.ObjType) return }