Skip to content

Commit

Permalink
Merge pull request kubernetes-csi#240 from jsafrane/dont-crash
Browse files Browse the repository at this point in the history
Don't exit the probe on connection issues
  • Loading branch information
k8s-ci-robot committed Jan 5, 2024
2 parents 9cad16c + 4335b7e commit 4ed8c89
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 37 deletions.
44 changes: 8 additions & 36 deletions cmd/livenessprobe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ import (
"net"
"net/http"
"os"
"sync"
"time"

"google.golang.org/grpc"
"k8s.io/klog/v2"

"k8s.io/component-base/featuregate"
Expand Down Expand Up @@ -62,7 +60,7 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), *probeTimeout)
defer cancel()

conn, err := acquireConnection(ctx, h.metricsManager)
conn, err := connlib.Connect(*csiAddress, h.metricsManager, connlib.WithTimeout(*probeTimeout))
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
Expand Down Expand Up @@ -92,37 +90,6 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
klog.V(5).InfoS("Health check succeeded")
}

// acquireConnection wraps the connlib.Connect but adding support to context
// cancelation.
func acquireConnection(ctx context.Context, metricsManager metrics.CSIMetricsManager) (conn *grpc.ClientConn, err error) {

var m sync.Mutex
var canceled bool
ready := make(chan bool)
go func() {
conn, err = connlib.Connect(*csiAddress, metricsManager)

m.Lock()
defer m.Unlock()
if err != nil && canceled && conn != nil {
conn.Close()
}

close(ready)
}()

select {
case <-ctx.Done():
m.Lock()
defer m.Unlock()
canceled = true
return nil, ctx.Err()

case <-ready:
return conn, err
}
}

func main() {
fg := featuregate.NewFeatureGate()
logsapi.AddFeatureGates(fg)
Expand Down Expand Up @@ -151,10 +118,14 @@ func main() {
}

metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
csiConn, err := acquireConnection(context.Background(), metricsManager)
// Connect to the CSI driver without any timeout to avoid crashing the probe when the driver is not ready yet.
// Goal: liveness probe never crashes, it only fails the probe when the driver is not available (yet).
// Since a http server for the probe is not running at this point, Kubernetes liveness probe will fail immediately
// with "connection refused", which is good enough to fail the probe.
csiConn, err := connlib.Connect(*csiAddress, metricsManager, connlib.WithTimeout(0))
if err != nil {
// connlib should retry forever so a returned error should mean
// the grpc client is misconfigured rather than an error on the network
// the grpc client is misconfigured rather than an error on the network or CSI driver.
klog.ErrorS(err, "Failed to establish connection to CSI driver")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
Expand All @@ -163,6 +134,7 @@ func main() {
csiDriverName, err := rpc.GetDriverName(context.Background(), csiConn)
csiConn.Close()
if err != nil {
// The CSI driver does not support GetDriverName, which is serious enough to crash the probe.
klog.ErrorS(err, "Failed to get CSI driver name")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/golang/mock v1.6.0
github.com/kubernetes-csi/csi-lib-utils v0.17.0
github.com/kubernetes-csi/csi-test/v5 v5.2.0
google.golang.org/grpc v1.60.1
k8s.io/component-base v0.29.0
k8s.io/klog/v2 v2.110.1
)
Expand Down Expand Up @@ -46,6 +45,7 @@ require (
golang.org/x/sys v0.14.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect
google.golang.org/grpc v1.60.1 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
Expand Down

0 comments on commit 4ed8c89

Please sign in to comment.