Skip to content

Commit

Permalink
Merge pull request #2321 from sbueringer/pr-fix-deadlock
Browse files Browse the repository at this point in the history
馃悰 Revert: move health probes to runnable
  • Loading branch information
k8s-ci-robot committed May 11, 2023
2 parents 7415ca7 + dd0cb45 commit 92646a5
Showing 1 changed file with 40 additions and 11 deletions.
51 changes: 40 additions & 11 deletions pkg/manager/internal.go
Expand Up @@ -317,9 +317,9 @@ func (cm *controllerManager) addMetricsServer() error {
})
}

func (cm *controllerManager) addHealthProbeServer() error {
func (cm *controllerManager) serveHealthProbes() {
mux := http.NewServeMux()
srv := httpserver.New(mux)
server := httpserver.New(mux)

if cm.readyzHandler != nil {
mux.Handle(cm.readinessEndpointName, http.StripPrefix(cm.readinessEndpointName, cm.readyzHandler))
Expand All @@ -332,12 +332,7 @@ func (cm *controllerManager) addHealthProbeServer() error {
mux.Handle(cm.livenessEndpointName+"/", http.StripPrefix(cm.livenessEndpointName, cm.healthzHandler))
}

return cm.add(&server{
Kind: "health probe",
Log: cm.logger,
Server: srv,
Listener: cm.healthProbeListener,
})
go cm.httpServe("health probe", cm.logger, server, cm.healthProbeListener)
}

func (cm *controllerManager) addPprofServer() error {
Expand All @@ -358,6 +353,42 @@ func (cm *controllerManager) addPprofServer() error {
})
}

func (cm *controllerManager) httpServe(kind string, log logr.Logger, server *http.Server, ln net.Listener) {
log = log.WithValues("kind", kind, "addr", ln.Addr())

go func() {
log.Info("Starting server")
if err := server.Serve(ln); err != nil {
if errors.Is(err, http.ErrServerClosed) {
return
}
if atomic.LoadInt64(cm.stopProcedureEngaged) > 0 {
// There might be cases where connections are still open and we try to shutdown
// but not having enough time to close the connection causes an error in Serve
//
// In that case we want to avoid returning an error to the main error channel.
log.Error(err, "error on Serve after stop has been engaged")
return
}
cm.errChan <- err
}
}()

// Shutdown the server when stop is closed.
<-cm.internalProceduresStop
if err := server.Shutdown(cm.shutdownCtx); err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// Avoid logging context related errors.
return
}
if atomic.LoadInt64(cm.stopProcedureEngaged) > 0 {
cm.logger.Error(err, "error on Shutdown after stop has been engaged")
return
}
cm.errChan <- err
}
}

// Start starts the manager and waits indefinitely.
// There is only two ways to have start return:
// An error has occurred during in one of the internal operations,
Expand Down Expand Up @@ -418,9 +449,7 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) {

// Serve health probes.
if cm.healthProbeListener != nil {
if err := cm.addHealthProbeServer(); err != nil {
return fmt.Errorf("failed to add health probe server: %w", err)
}
cm.serveHealthProbes()
}

// Add pprof server
Expand Down

0 comments on commit 92646a5

Please sign in to comment.