Skip to content

Commit

Permalink
fix(zombie-check): make zombie check run regardless of instance status (
Browse files Browse the repository at this point in the history
#2962)

Remove check for all activators being up before running a zombie check.
It doesn't really buy us anything other than confusion.
E.g. if there is a dedicated instance that runs zombie checks but has
queue disabled the zombie check will not run.
Even if down instances run a zombie check that should be ok since their
view of the queue should still be accurate
  • Loading branch information
marchello2000 committed Jun 5, 2019
1 parent 897ae9f commit 5808bf8
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import com.netflix.spinnaker.orca.pipeline.model.Execution.ExecutionType.PIPELIN
import com.netflix.spinnaker.orca.pipeline.persistence.ExecutionRepository
import com.netflix.spinnaker.orca.q.ApplicationAware
import com.netflix.spinnaker.orca.q.ExecutionLevel
import com.netflix.spinnaker.q.Activator
import com.netflix.spinnaker.q.metrics.LockFailed
import com.netflix.spinnaker.q.metrics.MessageAcknowledged
import com.netflix.spinnaker.q.metrics.MessageDead
Expand Down Expand Up @@ -78,7 +77,6 @@ class AtlasQueueMonitor
private val registry: Registry,
private val repository: ExecutionRepository,
private val clock: Clock,
private val activators: List<Activator>,
private val conch: NotificationClusterLock,
@Value("\${queue.zombie-check.enabled:false}")private val zombieCheckEnabled: Boolean,
@Qualifier("scheduler") private val zombieCheckScheduler: Optional<Scheduler>,
Expand Down Expand Up @@ -113,7 +111,12 @@ class AtlasQueueMonitor

@Scheduled(fixedDelayString = "\${queue.zombie-check.interval-ms:3600000}")
fun checkForZombies() {
if (!zombieCheckEnabled || activators.none { it.enabled } || !conch.tryAcquireLock("zombie", TimeUnit.MINUTES.toSeconds(5))) return
val lockAcquired = conch.tryAcquireLock("zombie", TimeUnit.MINUTES.toSeconds(5))

if (!zombieCheckEnabled || !lockAcquired) {
log.info("Not running zombie check: checkEnabled: $zombieCheckEnabled, lockAcquired: $lockAcquired")
return
}

try {
MDC.put(AGENT_MDC_KEY, this.javaClass.simpleName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ object AtlasQueueMonitorTest : SubjectSpek<AtlasQueueMonitor>({
registry,
repository,
clock,
listOf(activator),
conch,
true,
Optional.of(Schedulers.immediate()),
Expand Down

0 comments on commit 5808bf8

Please sign in to comment.