Skip to content

Commit

Permalink
fix(provider/aws): Avoid being marked unhealthy after initialization
Browse files Browse the repository at this point in the history
Avoid being marked unhealthy should there happen to be a temporary
blip in connectivity to one or more accounts.

This can occur as a result of hitting an AWS rate limit.

The `health.amazon.errors` metric can be used to alert on possible
errors that would have previously caused an instance to go unhealthy.
  • Loading branch information
ajordens committed Dec 21, 2017
1 parent fea18b2 commit 09dc4e2
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 8 deletions.
Expand Up @@ -18,6 +18,9 @@ package com.netflix.spinnaker.clouddriver.aws.health

import com.amazonaws.AmazonClientException
import com.amazonaws.AmazonServiceException
import com.amazonaws.services.ec2.model.AmazonEC2Exception
import com.netflix.spectator.api.Counter
import com.netflix.spectator.api.Registry
import com.netflix.spinnaker.clouddriver.aws.security.AmazonClientProvider
import com.netflix.spinnaker.clouddriver.aws.security.NetflixAmazonCredentials
import com.netflix.spinnaker.clouddriver.security.AccountCredentialsProvider
Expand All @@ -32,29 +35,45 @@ import org.springframework.scheduling.annotation.Scheduled
import org.springframework.stereotype.Component
import org.springframework.web.bind.annotation.ResponseStatus

import java.util.concurrent.atomic.AtomicLong
import java.util.concurrent.atomic.AtomicReference

@Component
class AmazonHealthIndicator implements HealthIndicator {

private static final Logger LOG = LoggerFactory.getLogger(AmazonHealthIndicator)

@Autowired
AccountCredentialsProvider accountCredentialsProvider
private final AccountCredentialsProvider accountCredentialsProvider
private final AmazonClientProvider amazonClientProvider

private final AtomicReference<Exception> lastException = new AtomicReference<>(null)
private final AtomicReference<Boolean> hasInitialized = new AtomicReference<>(null)

private final AtomicLong errors;

@Autowired
AmazonClientProvider amazonClientProvider
AmazonHealthIndicator(AccountCredentialsProvider accountCredentialsProvider,
AmazonClientProvider amazonClientProvider,
Registry registry) {
this.accountCredentialsProvider = accountCredentialsProvider
this.amazonClientProvider = amazonClientProvider

private final AtomicReference<Exception> lastException = new AtomicReference<>(null)
this.errors = registry.gauge("health.amazon.errors", new AtomicLong(0))
}

@Override
Health health() {
if (hasInitialized.get() == Boolean.TRUE) {
// avoid being marked unhealthy once connectivity to all accounts has been verified at least once
return new Health.Builder().up().build()
}

def ex = lastException.get()
if (ex) {
throw ex
}

new Health.Builder().up().build()
return new Health.Builder().unknown().build()
}

@Scheduled(fixedDelay = 120000L)
Expand All @@ -71,13 +90,16 @@ class AmazonHealthIndicator implements HealthIndicator {
}
ec2.describeAccountAttributes()
} catch (AmazonServiceException e) {
throw new AmazonUnreachableException(e)
throw new AmazonUnreachableException("Failed to describe account attributes for '${credentials.name}'", e)
}
}
hasInitialized.set(Boolean.TRUE)
lastException.set(null)
errors.set(0)
} catch (Exception ex) {
LOG.error "Unhealthy", ex
lastException.set(ex)
errors.set(1)
}
}

Expand Down
Expand Up @@ -19,12 +19,16 @@ package com.netflix.spinnaker.clouddriver.aws.health
import com.amazonaws.AmazonServiceException
import com.amazonaws.services.ec2.AmazonEC2
import com.amazonaws.services.ec2.model.DescribeAccountAttributesResult
import com.netflix.spectator.api.Counter
import com.netflix.spectator.api.Registry
import com.netflix.spinnaker.clouddriver.aws.TestCredential
import com.netflix.spinnaker.clouddriver.aws.security.AmazonClientProvider
import com.netflix.spinnaker.clouddriver.security.AccountCredentialsProvider
import org.springframework.boot.actuate.health.Status
import spock.lang.Specification

import java.util.concurrent.atomic.AtomicLong

class AmazonHealthIndicatorSpec extends Specification {

def "health fails when amazon appears unreachable"() {
Expand All @@ -40,14 +44,20 @@ class AmazonHealthIndicatorSpec extends Specification {
def mockAmazonClientProvider = Stub(AmazonClientProvider) {
getAmazonEC2(*_) >> mockEc2
}
def indicator = new AmazonHealthIndicator(accountCredentialsProvider: holder, amazonClientProvider: mockAmazonClientProvider)
def counter = new AtomicLong(0)
def mockRegistry = Stub(Registry) {
gauge(_, _) >> counter
}

def indicator = new AmazonHealthIndicator(holder, mockAmazonClientProvider, mockRegistry)

when:
indicator.checkHealth()
indicator.health()

then:
thrown AmazonHealthIndicator.AmazonUnreachableException
counter.get() == 1
}

def "health succeeds when amazon is reachable"() {
Expand All @@ -63,13 +73,20 @@ class AmazonHealthIndicatorSpec extends Specification {
def mockAmazonClientProvider = Stub(AmazonClientProvider) {
getAmazonEC2(*_) >> mockEc2
}
def indicator = new AmazonHealthIndicator(accountCredentialsProvider: holder, amazonClientProvider: mockAmazonClientProvider)

def counter = new AtomicLong(0)
def mockRegistry = Stub(Registry) {
gauge(_, _) >> counter
}

def indicator = new AmazonHealthIndicator(holder, mockAmazonClientProvider, mockRegistry)

when:
indicator.checkHealth()
def health = indicator.health()

then:
health.status == Status.UP
counter.get() == 0
}
}

0 comments on commit 09dc4e2

Please sign in to comment.