Skip to content

Commit

Permalink
fix(pipelines): Fixed interpretation of missing startTime during inst… (
Browse files Browse the repository at this point in the history
#2820)

* fix(pipelines): Fixed interpretation of missing startTime during instrumentation

Fixes spinnaker/spinnaker#4244

There are conditions in which CompleteStageHandler can be called without
having a startTime set. Often this is because an exception was thrown
before the startTime was set, but there are various guards within
StartStageHandler as well.

Previously when this happened, a StartTime of 0 was used. This is
particularly bad because the 40-years worth of nanoseconds that is
spit out of spectator is beyond the resolution of "double" for metric
stores that use double for time. This fix changes the assumption
to be the end time, resulting in assuming 0 time. Not that this could
possibly be a different type of issue if the underlying problem was
something else, such as StartStageHandler not even being called for some
reason.

In addition I set the startTime at the beginning of the try block where
exceptions may be thrown in order to ensure there is a startTime when
going through that control flow. I think this might be messy depending
on how time and state are used because it has a startTime but is not in
the RUNNING state. There does not seem to be an appropriate state definition
for this particular phase and am not sure the implications of adding one.
I also dont know if moving the startTime so it includes this extra check
has other implications. If so then the startTime could be reset where it
is currently set.

* defer RUNNING status until after the extra initial store.

Being in the RUNNING state affected the handler guard checking
for NOT_STARTED (line 71) causing it to be ignored. I dont
really understand the control flow here, and could not follow
along in a debugger because kotlin/mockito jumps all over the
place making it impossible for me to grok that way. I suspect
there is a better fix and could involve adding another status
between NOT_STARTED and RUNNING but that is beyond my level
of intimacy with the model here.

fixed tests to accomodate the second storeStage call.
  • Loading branch information
Eric Wiseblatt authored and robzienert committed Oct 8, 2019
1 parent 72b08f2 commit e872ce8
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,17 @@ class CompleteStageHandler(
} ?: id
}

// If startTime was not set, then assume this was instantaneous.
// In practice 0 startTimes can happen if there is
// an exception in StartStageHandler or guard causing skipping.
// Without a startTime, we cannot record a meaningful time,
// and assuming a start of 0 makes the values ridiculously large.
val endTime = stage.endTime ?: clock.millis()
val startTime = stage.startTime ?: endTime

PercentileTimer
.get(registry, id)
.record((stage.endTime ?: clock.millis()) - (stage.startTime ?: 0), TimeUnit.MILLISECONDS)
.record(endTime - startTime, TimeUnit.MILLISECONDS)
}

override val messageType = CompleteStage::class.java
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,14 @@ class StartStageHandler(
queue.push(SkipStage(stage))
} else {
try {
// Set the startTime in case we throw an exception.
stage.startTime = clock.millis()
repository.storeStage(stage)

stage.withAuth {
stage.plan()
}

stage.status = RUNNING
stage.startTime = clock.millis()
repository.storeStage(stage)

stage.start()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("updates the stage status") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.status).isEqualTo(RUNNING)
assertThat(it.startTime).isEqualTo(clock.millis())
})
}

it("attaches tasks to the stage") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.tasks.size).isEqualTo(1)
it.tasks.first().apply {
assertThat(id).isEqualTo("1")
Expand Down Expand Up @@ -226,7 +226,7 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("updates the stage status") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.status).isEqualTo(RUNNING)
assertThat(it.startTime).isEqualTo(clock.millis())
})
Expand Down Expand Up @@ -267,7 +267,7 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("updates the stage status") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.status).isEqualTo(RUNNING)
assertThat(it.startTime).isEqualTo(clock.millis())
})
Expand Down Expand Up @@ -308,7 +308,7 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
afterGroup(::resetMocks)

it("attaches tasks to the stage") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.tasks.size).isEqualTo(3)
it.tasks[0].apply {
assertThat(id).isEqualTo("1")
Expand Down Expand Up @@ -646,15 +646,15 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("starts the stage") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.type).isEqualTo("bar")
assertThat(it.status).isEqualTo(RUNNING)
assertThat(it.startTime).isEqualTo(clock.millis())
})
}

it("attaches a task to the stage") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.tasks.size).isEqualTo(1)
it.tasks.first().apply {
assertThat(id).isEqualTo("1")
Expand Down Expand Up @@ -731,7 +731,7 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("attaches the exception to the stage context") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.context["exception"]).isEqualTo(exceptionDetails)
})
}
Expand Down Expand Up @@ -761,13 +761,13 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("attaches the exception to the stage context") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.context["exception"]).isEqualTo(exceptionDetails)
})
}

it("attaches flag to the stage context to indicate that before stage planning failed") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.context["beforeStagePlanningFailed"]).isEqualTo(true)
})
}
Expand Down Expand Up @@ -797,13 +797,13 @@ object StartStageHandlerTest : SubjectSpek<StartStageHandler>({
}

it("attaches the exception to the stage context") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.context["exception"]).isEqualTo(exceptionDetails)
})
}

it("attaches flag to the stage context to indicate that before stage planning failed") {
verify(repository).storeStage(check {
verify(repository, times(2)).storeStage(check {
assertThat(it.context["beforeStagePlanningFailed"]).isEqualTo(true)
})
}
Expand Down

0 comments on commit e872ce8

Please sign in to comment.