Skip to content

Commit

Permalink
delete all active jobs during restart (#920)
Browse files Browse the repository at this point in the history
  • Loading branch information
tremes committed Mar 26, 2024
1 parent 636bb48 commit 2bb7e46
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
26 changes: 26 additions & 0 deletions pkg/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"os"
"strings"
"time"

v1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -141,6 +142,7 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
var insightsDataGatherObserver configobserver.InsightsDataGatherObserver
var dgInformer periodic.DataGatherInformer
if insightsConfigAPIEnabled {
deleteAllRunningGatheringsPods(ctx, kubeClient)
configInformersForTechPreview := configv1informers.NewSharedInformerFactory(configClient, 10*time.Minute)
insightsDataGatherObserver, err = configobserver.NewInsightsDataGatherObserver(gatherKubeConfig,
controller.EventRecorder, configInformersForTechPreview)
Expand Down Expand Up @@ -316,3 +318,27 @@ func isRunning(kubeConfig *rest.Config) wait.ConditionWithContextFunc {
return true, nil
}
}

// deleteAllRunningGatheringsPods deletes all the active jobs (and their Pods) with the "periodic-gathering-"
// prefix in the openshift-insights namespace
func deleteAllRunningGatheringsPods(ctx context.Context, cli kubernetes.Interface) {
jobList, err := cli.BatchV1().Jobs("openshift-insights").List(ctx, metav1.ListOptions{})
if err != nil {
klog.Warningf("Failed to list jobs in the Insights namespace: %v ", err)
}

orphan := metav1.DeletePropagationBackground
for i := range jobList.Items {
j := jobList.Items[i]
if j.Status.Active > 0 && strings.HasPrefix(j.Name, "periodic-gathering-") {
err := cli.BatchV1().Jobs("openshift-insights").Delete(ctx, j.Name, metav1.DeleteOptions{
PropagationPolicy: &orphan,
})
if err != nil {
klog.Warningf("Failed to delete job %s: %v", j.Name, err)
} else {
klog.Infof("Job %s was deleted due to container restart", j.Name)
}
}
}
}
4 changes: 4 additions & 0 deletions pkg/controller/periodic/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ func (j *JobController) WaitForJobCompletion(ctx context.Context, job *batchv1.J
return fmt.Errorf("watcher channel was closed unexpectedly")
}

if event.Type == apiWatch.Deleted {
return nil
}

if event.Type != apiWatch.Modified {
continue
}
Expand Down

0 comments on commit 2bb7e46

Please sign in to comment.