Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubelet: Make probe to be on for time drift #123898

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 22 additions & 2 deletions pkg/kubelet/prober/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ type worker struct {
// If set, skip probing.
onHold bool

// Set to true when time drift is detected for the pod, will be unset
// once container's elapsed start time reaches +ve value.
timeDrift bool
// timeDriftStartTimeInSec contains container's elapsed start time at
// the time of first occurance of time drift.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// the time of first occurance of time drift.
// the time of first occurrence of time drift.

timeDriftStartTimeInSec float64

// proberResultsMetricLabels holds the labels attached to this worker
// for the ProberResults metric by result.
proberResultsSuccessfulMetricLabels metrics.Labels
Expand Down Expand Up @@ -265,8 +272,21 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
return false
}

// Probe disabled for InitialDelaySeconds.
if int32(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds {
// Probe disabled for InitialDelaySeconds. This also considers
// time drift scenario (in that case container's started time is
// later than system current time).
startedAtInSecs := time.Since(c.State.Running.StartedAt.Time).Seconds()
if startedAtInSecs <= 0 && !w.timeDrift {
w.timeDrift = true
w.timeDriftStartTimeInSec = startedAtInSecs
} else if startedAtInSecs > 0 {
w.timeDrift = false
}
if startedAtInSecs <= 0 && w.timeDrift &&
int32(startedAtInSecs-w.timeDriftStartTimeInSec) < w.spec.InitialDelaySeconds {
return true
}
if startedAtInSecs > 0 && int32(startedAtInSecs) < w.spec.InitialDelaySeconds {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to wait InitialDelaySeconds since doProbe called first time if startedAtInSecs <= 0.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bart0sh good point, but would that happen (i.e. pod startup followed by immediate time drift event before InitialDelaySeconds) ? In our case, time drift just happened during node maintenance on a existing pod.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point was that if startedAtInSecs <= 0 probe would run immediately, which is not desirable. It's better to wait for InitialDelaySeconds just to be on the safe side.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok @bart0sh , added code to wait for InitialDelaySeconds when time drift happens.

return true
}

Expand Down
59 changes: 59 additions & 0 deletions pkg/kubelet/prober/worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,65 @@ func TestInitialDelay(t *testing.T) {
}
}

func TestInitialDelayForTimeDrift(t *testing.T) {
ctx := context.Background()
m := newTestManager()

for _, probeType := range [...]probeType{liveness, readiness, startup} {
w := newTestWorker(m, probeType, v1.Probe{
InitialDelaySeconds: 10,
})
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))

expectContinue(t, w, w.doProbe(ctx), "during initial delay")
// Default value depends on probe, Success for liveness, Failure for readiness, Unknown for startup
switch probeType {
case liveness:
expectResult(t, w, results.Success, "during initial delay")
case readiness:
expectResult(t, w, results.Failure, "during initial delay")
case startup:
expectResult(t, w, results.Unknown, "during initial delay")
}

laterStatus := getTestRunningStatusWithStarted(probeType != startup)
// Set StartedAt to a future time to test time drift.
laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time =
time.Now().Add(600 * time.Second)
m.statusManager.SetPodStatus(w.pod, laterStatus)

// The doProbe call should fail immediately after time drift.
expectContinue(t, w, w.doProbe(ctx), "during initial delay")
switch probeType {
case liveness:
expectResult(t, w, results.Success, "during initial delay")
case readiness:
expectResult(t, w, results.Failure, "during initial delay")
case startup:
expectResult(t, w, results.Unknown, "during initial delay")
}

// The doProbe call would still fail when it called during specified
// initial delay time.
time.Sleep(5 * time.Second)
expectContinue(t, w, w.doProbe(ctx), "during initial delay")
switch probeType {
case liveness:
expectResult(t, w, results.Success, "during initial delay")
case readiness:
expectResult(t, w, results.Failure, "during initial delay")
case startup:
expectResult(t, w, results.Unknown, "during initial delay")
}

// Sleep until initial delay time elapses and probe must
// succeed now.
time.Sleep(5 * time.Second)
expectContinue(t, w, w.doProbe(ctx), "during initial delay")
expectResult(t, w, results.Success, "during initial delay")
}
}

func TestFailureThreshold(t *testing.T) {
ctx := context.Background()
m := newTestManager()
Expand Down