diff --git a/go.mod b/go.mod index 45dfcf6f12..23f3b35587 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/openshift/api v0.0.0-20200929165121-b7210d15c07d github.com/openshift/build-machinery-go v0.0.0-20200819073603-48aa266c95f7 github.com/openshift/client-go v0.0.0-20200827190008-3062137373b5 - github.com/openshift/library-go v0.0.0-20201026152857-8d497f8f24b5 + github.com/openshift/library-go v0.0.0-20201126123001-e59ac21aada3 github.com/pkg/profile v1.5.0 // indirect github.com/prometheus/client_golang v1.7.1 github.com/spf13/cobra v1.0.0 diff --git a/go.sum b/go.sum index a2726aae77..f2f69322b7 100644 --- a/go.sum +++ b/go.sum @@ -383,8 +383,8 @@ github.com/openshift/build-machinery-go v0.0.0-20200819073603-48aa266c95f7 h1:mO github.com/openshift/build-machinery-go v0.0.0-20200819073603-48aa266c95f7/go.mod h1:b1BuldmJlbA/xYtdZvKi+7j5YGB44qJUJDZ9zwiNCfE= github.com/openshift/client-go v0.0.0-20200827190008-3062137373b5 h1:E6WhVL5p3rfjtc+o+jVG/29Aclnf3XIF7akxXvadwR0= github.com/openshift/client-go v0.0.0-20200827190008-3062137373b5/go.mod h1:5rGmrkQ8DJEUXA+AR3rEjfH+HFyg4/apY9iCQFgvPfE= -github.com/openshift/library-go v0.0.0-20201026152857-8d497f8f24b5 h1:v325FMTjp6yxw9Tm4dYGb8x/mapqcH/XecnVaWjBnBo= -github.com/openshift/library-go v0.0.0-20201026152857-8d497f8f24b5/go.mod h1:NI6xOQGuTnLXeHW8Z2glKSFhF7X+YxlAlqlBMaK0zEM= +github.com/openshift/library-go v0.0.0-20201126123001-e59ac21aada3 h1:sk60hcNw2Ih082dOCODekuP8lA96uYr2wT4bFsKpd/A= +github.com/openshift/library-go v0.0.0-20201126123001-e59ac21aada3/go.mod h1:KNfLGf4dIRJ+QB2aGy67AOy1k+DV783cMCuJf0d4Zik= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= diff --git a/vendor/github.com/openshift/library-go/pkg/config/client/client_config.go b/vendor/github.com/openshift/library-go/pkg/config/client/client_config.go index a247311057..e2b90ca531 100644 --- a/vendor/github.com/openshift/library-go/pkg/config/client/client_config.go +++ b/vendor/github.com/openshift/library-go/pkg/config/client/client_config.go @@ -2,14 +2,12 @@ package client import ( "io/ioutil" - "net" - "net/http" - "time" - "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "net/http" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/library-go/pkg/network" ) // GetKubeConfigOrInClusterConfig loads in-cluster config if kubeConfigFile is empty or the file if not, @@ -101,10 +99,7 @@ func (c ClientTransportOverrides) DefaultClientTransport(rt http.RoundTripper) h return rt } - transport.DialContext = (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext + transport.DialContext = network.DefaultClientDialContext() // Hold open more internal idle connections transport.MaxIdleConnsPerHost = 100 diff --git a/vendor/github.com/openshift/library-go/pkg/network/dialer.go b/vendor/github.com/openshift/library-go/pkg/network/dialer.go new file mode 100644 index 0000000000..f19be44a3e --- /dev/null +++ b/vendor/github.com/openshift/library-go/pkg/network/dialer.go @@ -0,0 +1,13 @@ +package network + +import ( + "context" + "net" +) + +type DialContext func(ctx context.Context, network, address string) (net.Conn, error) + +// DefaultDialContext returns a DialContext function from a network dialer with default options sets. +func DefaultClientDialContext() DialContext { + return dialerWithDefaultOptions() +} diff --git a/vendor/github.com/openshift/library-go/pkg/network/dialer_linux.go b/vendor/github.com/openshift/library-go/pkg/network/dialer_linux.go new file mode 100644 index 0000000000..b8ff8db85e --- /dev/null +++ b/vendor/github.com/openshift/library-go/pkg/network/dialer_linux.go @@ -0,0 +1,93 @@ +// +build linux + +package network + +import ( + "net" + "os" + "syscall" + "time" + + "golang.org/x/sys/unix" + + utilerrors "k8s.io/apimachinery/pkg/util/errors" +) + +func dialerWithDefaultOptions() DialContext { + nd := &net.Dialer{ + // TCP_USER_TIMEOUT does affect the behaviour of connect() which is controlled by this field so we set it to the same value + Timeout: 25 * time.Second, + // KeepAlive must to be set to a negative value to stop std library from applying the default values + // by doing so we ensure that the options we are interested in won't be overwritten + KeepAlive: time.Duration(-1), + Control: func(network, address string, con syscall.RawConn) error { + var errs []error + err := con.Control(func(fd uintptr) { + optionsErr := setDefaultSocketOptions(int(fd)) + if optionsErr != nil { + errs = append(errs, optionsErr) + } + }) + if err != nil { + errs = append(errs, err) + } + return utilerrors.NewAggregate(errs) + }, + } + return nd.DialContext +} + +// setDefaultSocketOptions sets custom socket options so that we can detect connections to an unhealthy (dead) peer quickly. +// In particular we set TCP_USER_TIMEOUT that specifies the maximum amount of time that transmitted data may remain +// unacknowledged before TCP will forcibly close the connection. +// +// Note +// TCP_USER_TIMEOUT can't be too low because a single dropped packet might drop the entire connection. +// Ideally it should be set to: TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT +func setDefaultSocketOptions(fd int) error { + // specifies the maximum amount of time in milliseconds that transmitted data may remain + // unacknowledged before TCP will forcibly close the corresponding connection and return ETIMEDOUT to the application + tcpUserTimeoutInMilliSeconds := int(25 * time.Second / time.Millisecond) + + // specifies the interval at which probes are sent in seconds + tcpKeepIntvl := int(roundDuration(5*time.Second, time.Second)) + + // specifies the threshold for sending the first KEEP ALIVE probe in seconds + tcpKeepIdle := int(roundDuration(2*time.Second, time.Second)) + + // enable keep-alive probes + if err := syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, tcpUserTimeoutInMilliSeconds); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, tcpKeepIntvl); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, tcpKeepIdle); err != nil { + return wrapSyscallError("setsockopt", err) + } + return nil +} + +// roundDurationUp rounds d to the next multiple of to. +// +// note that it was copied from the std library +func roundDuration(d time.Duration, to time.Duration) time.Duration { + return (d + to - 1) / to +} + +// wrapSyscallError takes an error and a syscall name. If the error is +// a syscall.Errno, it wraps it in a os.SyscallError using the syscall name. +// +// note that it was copied from the std library +func wrapSyscallError(name string, err error) error { + if _, ok := err.(syscall.Errno); ok { + err = os.NewSyscallError(name, err) + } + return err +} diff --git a/vendor/github.com/openshift/library-go/pkg/network/dialer_others.go b/vendor/github.com/openshift/library-go/pkg/network/dialer_others.go new file mode 100644 index 0000000000..6519b0986d --- /dev/null +++ b/vendor/github.com/openshift/library-go/pkg/network/dialer_others.go @@ -0,0 +1,19 @@ +// +build !linux + +package network + +import ( + "net" + "time" + + "k8s.io/klog/v2" +) + +func dialerWithDefaultOptions() DialContext { + klog.V(2).Info("Creating the default network Dialer (unsupported platform). It may take up to 15 minutes to detect broken connections and establish a new one") + nd := &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + } + return nd.DialContext +} diff --git a/vendor/github.com/openshift/library-go/pkg/operator/staticpod/controller/staticpodstate/staticpodstate_controller.go b/vendor/github.com/openshift/library-go/pkg/operator/staticpod/controller/staticpodstate/staticpodstate_controller.go index 4b7e93d378..e39f03da93 100644 --- a/vendor/github.com/openshift/library-go/pkg/operator/staticpod/controller/staticpodstate/staticpodstate_controller.go +++ b/vendor/github.com/openshift/library-go/pkg/operator/staticpod/controller/staticpodstate/staticpodstate_controller.go @@ -91,24 +91,31 @@ func (c *StaticPodStateController) sync(ctx context.Context, syncCtx factory.Syn } images.Insert(pod.Spec.Containers[0].Image) - for _, containerStatus := range pod.Status.ContainerStatuses { - if !containerStatus.Ready { - // When container is not ready, we can't determine whether the operator is failing or not and every container will become not - // ready when created, so do not blip the failing state for it. - // We will still reflect the container not ready state in error conditions, but we don't set the operator as failed. - running := "" - if containerStatus.State.Running != nil { - running = fmt.Sprintf(" running since %s but", containerStatus.State.Running.StartedAt.Time) - } - errs = append(errs, fmt.Errorf("pod/%s container %q is%s not ready: %s", pod.Name, containerStatus.Name, running, describeWaitingContainerState(containerStatus.State.Waiting))) - } - // if container status is waiting, but not initializing pod, increase the failing error counter - // this usually means the container is stucked on initializing network - if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "PodInitializing" { + for i, containerStatus := range pod.Status.ContainerStatuses { + switch { + case containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "PodInitializing": + // if container status is waiting, but not initializing pod, increase the failing error counter + // this usually means the container is stuck on initializing network errs = append(errs, fmt.Errorf("pod/%s container %q is waiting: %s", pod.Name, containerStatus.Name, describeWaitingContainerState(containerStatus.State.Waiting))) failingErrorCount++ - } - if containerStatus.State.Terminated != nil { + case containerStatus.State.Running != nil: + maxNormalStartupDuration := 30 * time.Second // assume 30s for containers without probes + if i < len(pod.Spec.Containers) { // should always happen + spec := pod.Spec.Containers[i] + if spec.LivenessProbe != nil { + maxNormalStartupDuration = maxFailureDuration(spec.LivenessProbe) + } + grace := 10 * time.Second + maxNormalStartupDuration = max(maxNormalStartupDuration, maxFailureDuration(spec.ReadinessProbe)) + maxFailureDuration(spec.StartupProbe) + grace + } + + if !containerStatus.Ready && time.Now().After(containerStatus.State.Running.StartedAt.Add(maxNormalStartupDuration)) { + // When container is not ready, we can't determine whether the operator is failing or not and every container will become not + // ready when created, so do not blip the failing state for it. + // We will still reflect the container not ready state in error conditions, but we don't set the operator as failed. + errs = append(errs, fmt.Errorf("pod/%s container %q started at %s is still not ready", pod.Name, containerStatus.Name, containerStatus.State.Running.StartedAt.Time)) + } + case containerStatus.State.Terminated != nil: // Containers can be terminated gracefully to trigger certificate reload, do not report these as failures. errs = append(errs, fmt.Errorf("pod/%s container %q is terminated: %s: %s", pod.Name, containerStatus.Name, containerStatus.State.Terminated.Reason, containerStatus.State.Terminated.Message)) @@ -116,7 +123,6 @@ func (c *StaticPodStateController) sync(ctx context.Context, syncCtx factory.Syn if containerStatus.State.Terminated.ExitCode != 0 { failingErrorCount++ } - } } } @@ -168,6 +174,21 @@ func (c *StaticPodStateController) sync(ctx context.Context, syncCtx factory.Syn return err } +func maxFailureDuration(p *v1.Probe) time.Duration { + if p == nil { + return 0 + } + + return time.Duration(p.InitialDelaySeconds)*time.Second + time.Duration(p.FailureThreshold*p.PeriodSeconds)*time.Second +} + +func max(x, y time.Duration) time.Duration { + if x > y { + return x + } + return y +} + func mirrorPodNameForNode(staticPodName, nodeName string) string { return staticPodName + "-" + nodeName } diff --git a/vendor/modules.txt b/vendor/modules.txt index 4d14539381..99996e20ba 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -209,7 +209,7 @@ github.com/openshift/client-go/operatorcontrolplane/informers/externalversions/o github.com/openshift/client-go/operatorcontrolplane/listers/operatorcontrolplane/v1alpha1 github.com/openshift/client-go/user/clientset/versioned/scheme github.com/openshift/client-go/user/clientset/versioned/typed/user/v1 -# github.com/openshift/library-go v0.0.0-20201026152857-8d497f8f24b5 +# github.com/openshift/library-go v0.0.0-20201126123001-e59ac21aada3 github.com/openshift/library-go/pkg/assets github.com/openshift/library-go/pkg/certs github.com/openshift/library-go/pkg/config/client @@ -223,6 +223,7 @@ github.com/openshift/library-go/pkg/controller/fileobserver github.com/openshift/library-go/pkg/controller/manager github.com/openshift/library-go/pkg/controller/metrics github.com/openshift/library-go/pkg/crypto +github.com/openshift/library-go/pkg/network github.com/openshift/library-go/pkg/operator/certrotation github.com/openshift/library-go/pkg/operator/condition github.com/openshift/library-go/pkg/operator/configobserver