Skip to content

Commit

Permalink
fix(provider/google): Avoids batch failure in LB caching agents. (#1582)
Browse files Browse the repository at this point in the history
Previously, the LB caching agents would fail a batch if any
of the nested GCP calls failed. This change makes it so that
the agents will write any LB it successfully entirely reads to the cache
and avoids writing any it fails to entirely read to the cache.
  • Loading branch information
jtk54 committed Apr 19, 2017
1 parent 7d1b547 commit 78bfbe2
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 63 deletions.
@@ -0,0 +1,50 @@
/*
* Copyright 2017 Google, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.netflix.spinnaker.clouddriver.google.provider.agent

import com.google.api.client.googleapis.json.GoogleJsonError
import com.google.api.client.http.HttpHeaders
import groovy.util.logging.Slf4j

/**
* Since pieces of GCP infrastructure (subjects) are often made of many components, read calls nested deep in
* the object model might fail. This provides facilities to record failed subjects on GCP reads during a caching
* agent execution.
*/
@Slf4j
trait FailedSubjectChronicler {

/**
* A list of subjects the caching agent failed to read from the platform.
*
* This should be a list available to the caching agent to manipulate the failed subjects as it wants
* after the agent runs.
*/
List<String> failedSubjects
/**
* The subject this particular GCP operation is executed on behalf of,
* e.g. The name of the L7 LB we are attempting to read a healthcheck for.
*/
String subject

void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException {
log.warn("Failed to read a component of subject ${subject}. The platform error message was:\n ${e.getMessage()}. \nReporting it as 'Failed' to the caching agent. ")
if (failedSubjects != null) {
failedSubjects << subject
}
}
}
Expand Up @@ -57,6 +57,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
String agentType = "${accountName}/global/${GoogleHttpLoadBalancerCachingAgent.simpleName}"
String onDemandAgentType = "${agentType}-OnDemand"
final OnDemandMetricsSupport metricsSupport
List<String> failedLoadBalancers

GoogleHttpLoadBalancerCachingAgent(String clouddriverUserAgentApplicationName,
GoogleNamedAccountCredentials credentials,
Expand All @@ -68,6 +69,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
this,
"${GoogleCloudProvider.ID}:${OnDemandAgent.OnDemandType.LoadBalancer}"
)
this.failedLoadBalancers = []
}

@Override
Expand All @@ -77,6 +79,8 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}

List<GoogleHttpLoadBalancer> getHttpLoadBalancers() {
// Reset failed load balancers on each caching agent execution.
failedLoadBalancers = []
List<GoogleHttpLoadBalancer> loadBalancers = []

BatchRequest forwardingRulesRequest = buildBatchRequest()
Expand All @@ -103,7 +107,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
executeIfRequestsAreQueued(httpHealthCheckRequest, "HttpLoadBalancerCaching.httpHealthCheck")
executeIfRequestsAreQueued(groupHealthRequest, "HttpLoadBalancerCaching.groupHealth")

return loadBalancers
return loadBalancers.findAll {!(it.name in failedLoadBalancers)}
}

CacheResult buildCacheResult(ProviderCache _, List<GoogleHttpLoadBalancer> googleLoadBalancers) {
Expand Down Expand Up @@ -217,13 +221,17 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
backendServiceRequest: backendServiceRequest,
httpHealthCheckRequest: httpHealthCheckRequest,
groupHealthRequest: groupHealthRequest,
subject: newLoadBalancer.name,
failedSubjects: failedLoadBalancers,
)
def targetHttpsProxyCallback = new TargetHttpsProxyCallback(
googleLoadBalancer: newLoadBalancer,
urlMapRequest: urlMapRequest,
backendServiceRequest: backendServiceRequest,
httpHealthCheckRequest: httpHealthCheckRequest,
groupHealthRequest: groupHealthRequest,
subject: newLoadBalancer.name,
failedSubjects: failedLoadBalancers,
)

switch (Utils.getTargetProxyType(forwardingRule.target)) {
Expand All @@ -243,7 +251,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}

// Note: The TargetProxyCallbacks assume that each proxy points to a unique urlMap.
class TargetHttpsProxyCallback<TargetHttpsProxy> extends JsonBatchCallback<TargetHttpsProxy> implements PlatformErrorPropagator {
class TargetHttpsProxyCallback<TargetHttpsProxy> extends JsonBatchCallback<TargetHttpsProxy> implements FailedSubjectChronicler {
GoogleHttpLoadBalancer googleLoadBalancer
BatchRequest urlMapRequest

Expand Down Expand Up @@ -272,7 +280,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}

// Note: The TargetProxyCallbacks assume that each proxy points to a unique urlMap.
class TargetProxyCallback<TargetHttpProxy> extends JsonBatchCallback<TargetHttpProxy> implements PlatformErrorPropagator {
class TargetProxyCallback<TargetHttpProxy> extends JsonBatchCallback<TargetHttpProxy> implements FailedSubjectChronicler {
GoogleHttpLoadBalancer googleLoadBalancer
BatchRequest urlMapRequest

Expand All @@ -291,13 +299,15 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
backendServiceRequest: backendServiceRequest,
httpHealthCheckRequest: httpHealthCheckRequest,
groupHealthRequest: groupHealthRequest,
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers
)
compute.urlMaps().get(project, urlMapName).queue(urlMapRequest, urlMapCallback)
}
}
}

class UrlMapCallback<UrlMap> extends JsonBatchCallback<UrlMap> implements PlatformErrorPropagator {
class UrlMapCallback<UrlMap> extends JsonBatchCallback<UrlMap> implements FailedSubjectChronicler {
GoogleHttpLoadBalancer googleLoadBalancer
BatchRequest backendServiceRequest

Expand Down Expand Up @@ -384,6 +394,8 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
@Override
void onSuccess(BackendService backendService, HttpHeaders responseHeaders) throws IOException {
def groupHealthCallback = new GroupHealthCallback(
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers,
googleLoadBalancer: googleLoadBalancer,
)
Boolean isHttps = backendService.protocol == 'HTTPS'
Expand Down Expand Up @@ -417,11 +429,15 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
def healthCheckName = Utils.getLocalName(healthCheckURL)
if (isHttps) {
def healthCheckCallback = new HttpsHealthCheckCallback(
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers,
googleBackendServices: backendServicesToUpdate
)
compute.httpsHealthChecks().get(project, healthCheckName).queue(httpHealthCheckRequest, healthCheckCallback)
} else {
def healthCheckCallback = new HttpHealthCheckCallback(
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers,
googleBackendServices: backendServicesToUpdate
)
compute.httpHealthChecks().get(project, healthCheckName).queue(httpHealthCheckRequest, healthCheckCallback)
Expand All @@ -430,7 +446,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}
}

class HttpsHealthCheckCallback<HttpsHealthCheck> extends JsonBatchCallback<HttpsHealthCheck> implements PlatformErrorPropagator {
class HttpsHealthCheckCallback<HttpsHealthCheck> extends JsonBatchCallback<HttpsHealthCheck> implements FailedSubjectChronicler {
List<GoogleBackendService> googleBackendServices

@Override
Expand All @@ -450,7 +466,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}
}

class HttpHealthCheckCallback<HttpHealthCheck> extends JsonBatchCallback<HttpHealthCheck> implements PlatformErrorPropagator {
class HttpHealthCheckCallback<HttpHealthCheck> extends JsonBatchCallback<HttpHealthCheck> implements FailedSubjectChronicler {
List<GoogleBackendService> googleBackendServices

@Override
Expand All @@ -470,7 +486,7 @@ class GoogleHttpLoadBalancerCachingAgent extends AbstractGoogleCachingAgent impl
}
}

class GroupHealthCallback<BackendServiceGroupHealth> extends JsonBatchCallback<BackendServiceGroupHealth> implements PlatformErrorPropagator {
class GroupHealthCallback<BackendServiceGroupHealth> extends JsonBatchCallback<BackendServiceGroupHealth> implements FailedSubjectChronicler {
GoogleHttpLoadBalancer googleLoadBalancer

@Override
Expand Down
Expand Up @@ -58,6 +58,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
String agentType = "${accountName}/${region}/${GoogleInternalLoadBalancerCachingAgent.simpleName}"
String onDemandAgentType = "${agentType}-OnDemand"
final OnDemandMetricsSupport metricsSupport
List<String> failedLoadBalancers

GoogleInternalLoadBalancerCachingAgent(String clouddriverUserAgentApplicationName,
GoogleNamedAccountCredentials credentials,
Expand All @@ -67,6 +68,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
super(clouddriverUserAgentApplicationName, credentials, objectMapper, registry)
this.region = region
this.metricsSupport = new OnDemandMetricsSupport(registry, this, "${GoogleCloudProvider.ID}:${OnDemandAgent.OnDemandType.LoadBalancer}")
failedLoadBalancers = []
}

@Override
Expand All @@ -76,6 +78,8 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}

List<GoogleInternalLoadBalancer> getInternalLoadBalancers() {
// Reset failed load balancers on each caching agent execution.
failedLoadBalancers = []
List<GoogleInternalLoadBalancer> loadBalancers = []

BatchRequest forwardingRulesRequest = buildBatchRequest()
Expand All @@ -96,7 +100,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
executeIfRequestsAreQueued(healthCheckRequest, "InternalLoadBalancerCaching.healthCheck")
executeIfRequestsAreQueued(groupHealthRequest, "InternalLoadBalancerCaching.groupHealth")

return loadBalancers
return loadBalancers.findAll {!(it.name in failedLoadBalancers)}
}

CacheResult buildCacheResult(ProviderCache _, List<GoogleInternalLoadBalancer> googleLoadBalancers) {
Expand Down Expand Up @@ -202,6 +206,8 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
googleLoadBalancer: newLoadBalancer,
healthCheckRequest: healthCheckRequest,
groupHealthRequest: groupHealthRequest,
subject: newLoadBalancer.name,
failedSubjects: failedLoadBalancers
)
compute.regionBackendServices()
.get(project, region, backendServiceName)
Expand All @@ -211,15 +217,17 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}
}

class BackendServiceCallback<BackendService> extends JsonBatchCallback<BackendService> implements PlatformErrorPropagator {
class BackendServiceCallback<BackendService> extends JsonBatchCallback<BackendService> implements FailedSubjectChronicler {
GoogleInternalLoadBalancer googleLoadBalancer
BatchRequest healthCheckRequest
BatchRequest groupHealthRequest

@Override
void onSuccess(BackendService backendService, HttpHeaders responseHeaders) throws IOException {
def groupHealthCallback = new GroupHealthCallback(
googleLoadBalancer: googleLoadBalancer
googleLoadBalancer: googleLoadBalancer,
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers
)

GoogleBackendService newService = new GoogleBackendService(
Expand Down Expand Up @@ -249,19 +257,25 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
switch (healthCheckType) {
case "httpHealthChecks":
def healthCheckCallback = new HttpHealthCheckCallback(
googleBackendService: googleLoadBalancer.backendService
googleBackendService: googleLoadBalancer.backendService,
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers
)
compute.httpHealthChecks().get(project, healthCheckName).queue(healthCheckRequest, healthCheckCallback)
break
case "httpsHealthChecks":
def healthCheckCallback = new HttpsHealthCheckCallback(
googleBackendService: googleLoadBalancer.backendService
googleBackendService: googleLoadBalancer.backendService,
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers
)
compute.httpsHealthChecks().get(project, healthCheckName).queue(healthCheckRequest, healthCheckCallback)
break
case "healthChecks":
def healthCheckCallback = new HealthCheckCallback(
googleBackendService: googleLoadBalancer.backendService
googleBackendService: googleLoadBalancer.backendService,
subject: googleLoadBalancer.name,
failedSubjects: failedLoadBalancers
)
compute.healthChecks().get(project, healthCheckName).queue(healthCheckRequest, healthCheckCallback)
break
Expand All @@ -273,7 +287,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}
}

class HealthCheckCallback<HealthCheck> extends JsonBatchCallback<HealthCheck> implements PlatformErrorPropagator {
class HealthCheckCallback<HealthCheck> extends JsonBatchCallback<HealthCheck> implements FailedSubjectChronicler {
GoogleBackendService googleBackendService

@Override
Expand Down Expand Up @@ -315,7 +329,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}
}

class HttpsHealthCheckCallback<HttpsHealthCheck> extends JsonBatchCallback<HttpsHealthCheck> implements PlatformErrorPropagator {
class HttpsHealthCheckCallback<HttpsHealthCheck> extends JsonBatchCallback<HttpsHealthCheck> implements FailedSubjectChronicler {
GoogleBackendService googleBackendService

@Override
Expand All @@ -333,7 +347,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}
}

class HttpHealthCheckCallback<HttpHealthCheck> extends JsonBatchCallback<HttpHealthCheck> implements PlatformErrorPropagator {
class HttpHealthCheckCallback<HttpHealthCheck> extends JsonBatchCallback<HttpHealthCheck> implements FailedSubjectChronicler {
GoogleBackendService googleBackendService

@Override
Expand All @@ -351,7 +365,7 @@ class GoogleInternalLoadBalancerCachingAgent extends AbstractGoogleCachingAgent
}
}

class GroupHealthCallback<BackendServiceGroupHealth> extends JsonBatchCallback<BackendServiceGroupHealth> implements PlatformErrorPropagator {
class GroupHealthCallback<BackendServiceGroupHealth> extends JsonBatchCallback<BackendServiceGroupHealth> implements FailedSubjectChronicler {
GoogleInternalLoadBalancer googleLoadBalancer

@Override
Expand Down

0 comments on commit 78bfbe2

Please sign in to comment.