Skip to content

Commit

Permalink
feat(atlas): Add support for retries with exponential backoff on call…
Browse files Browse the repository at this point in the history
…s to atlas. (#83)
  • Loading branch information
Matt Duftler committed Sep 15, 2017
1 parent eeb6f53 commit 49c47e0
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 8 deletions.
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ springVersion=4.3.10.RELEASE
springBootVersion=1.5.4.RELEASE
kotlinVersion=1.1.2
korkVersion=1.97.0-rc.1-springBoot154
orcaVersion=3.0.8
orcaVersion=3.11.8
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ public class AtlasConfigurationProperties {
@Setter
private RemoteService endpoint;

@Getter
@Setter
private int maxBackoffPeriodSeconds = 32;

@Getter
@Setter
private int stageTimoutMinutes = 3;

@Getter
private List<AtlasManagedAccount> accounts = new ArrayList<>();
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.kayenta.atlas.canary.AtlasCanaryScope;
import com.netflix.kayenta.atlas.config.AtlasConfigurationProperties;
import com.netflix.kayenta.canary.CanaryConfig;
import com.netflix.kayenta.metrics.SynchronousQueryProcessor;
import com.netflix.kayenta.security.AccountCredentials;
Expand Down Expand Up @@ -54,16 +55,28 @@ public class AtlasFetchTask implements RetryableTask {
@Autowired
SynchronousQueryProcessor synchronousQueryProcessor;

@Autowired
AtlasConfigurationProperties atlasConfigurationProperties;

@Override
public long getBackoffPeriod() {
// TODO(duftler): Externalize this configuration.
return Duration.ofSeconds(2).toMillis();
}

@Override
public long getTimeout() {
// TODO(duftler): Externalize this configuration.
return Duration.ofMinutes(2).toMillis();
return Duration.ofMinutes(atlasConfigurationProperties.getStageTimoutMinutes()).toMillis();
}

@Override
public long getDynamicBackoffPeriod(Duration taskDuration) {
int numZeros = Long.numberOfLeadingZeros(taskDuration.getSeconds());
int floorLog = 63 - numZeros;
// If the first iteration fails quickly, we still want a one second backoff period.
int exponent = Math.max(floorLog, 0);
int backoffPeriodSeconds = Math.min(atlasConfigurationProperties.getMaxBackoffPeriodSeconds(), (int)Math.pow(2, exponent));

return Duration.ofSeconds(backoffPeriodSeconds).toMillis();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright 2017 Google, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.netflix.kayenta.atlas.orca

import com.netflix.kayenta.atlas.config.AtlasConfigurationProperties
import spock.lang.Specification
import spock.lang.Unroll

import java.time.Duration

class AtlasFetchTaskSpec extends Specification {

@Unroll
void "dynamic backoff period is exponential when durationMS is #durationMS, while respecting mins/maxes"() {
given:
AtlasFetchTask atlasFetchTask = new AtlasFetchTask()
atlasFetchTask.atlasConfigurationProperties = new AtlasConfigurationProperties()

expect:
atlasFetchTask.getDynamicBackoffPeriod(Duration.ofMillis(durationMS)) == backoffPeriodMS

where:
durationMS || backoffPeriodMS
250 || 1000
500 || 1000
1000 || 1000
1500 || 1000
2000 || 2000
2500 || 2000
3000 || 2000
3500 || 2000
4000 || 4000
6000 || 4000
8000 || 8000
16000 || 16000
30000 || 16000
32000 || 32000
60000 || 32000
120000 || 32000
240000 || 32000
}
}
1 change: 1 addition & 0 deletions kayenta-core/kayenta-core.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies {
compile "org.projectlombok:lombok:1.16.10"

compile "com.netflix.spinnaker.orca:orca-core:$orcaVersion"
compile "com.netflix.spinnaker.orca:orca-retrofit:$orcaVersion"
compile "net.lariverosc:jesque-spring:1.0.1"
compile "net.greghaines:jesque:1.3.1"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@
package com.netflix.kayenta.retrofit.config;

import com.netflix.spinnaker.config.OkHttpClientConfiguration;
import com.netflix.spinnaker.orca.retrofit.exceptions.RetrofitExceptionHandler;
import com.squareup.okhttp.ConnectionPool;
import com.squareup.okhttp.OkHttpClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Scope;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;

@Configuration
public class RetrofitClientConfiguration {
Expand All @@ -45,4 +48,9 @@ OkHttpClient okHttpClient(OkHttpClientConfiguration okHttpClientConfig) {
okHttpClient.setRetryOnConnectionFailure(retryOnConnectionFailure);
return okHttpClient;
}

@Bean @Order(Ordered.HIGHEST_PRECEDENCE)
RetrofitExceptionHandler retrofitExceptionHandler() {
return new RetrofitExceptionHandler();
}
}
Binary file modified kayenta-orca/libs/orca-queue-1.369.0-SNAPSHOT-test.jar
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.netflix.kayenta.orca.lib;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.discovery.StatusChangeEvent;
import com.netflix.spectator.api.NoopRegistry;
import com.netflix.spectator.api.Registry;
Expand Down Expand Up @@ -188,4 +189,9 @@ ContextParameterProcessor contextParameterProcessor() {
DefaultExceptionHandler defaultExceptionHandler() {
return new DefaultExceptionHandler();
}

@Bean
ObjectMapper objectMapper() {
return new ObjectMapper();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,17 +203,14 @@ public String initiateCanary(@RequestParam(required = false) final String metric

Pipeline pipeline =
Pipeline
.builder()
.withApplication("kayenta-" + currentInstanceId)
.builder("kayenta-" + currentInstanceId)
.withName("Standard Canary Pipeline")
.withPipelineConfigId(UUID.randomUUID() + "")
.withStage(serviceType + "Fetch", "Fetch Control from " + serviceType, fetchControlContext)
.withStage(serviceType + "Fetch", "Fetch Experiment from " + serviceType, fetchExperimentContext)
.withStage("metricSetMixer", "Mix Control and Experiment Results", mixMetricSetsContext)
.withStage("canaryJudge", "Perform Analysis", canaryJudgeContext)
.withParallel(true)
.withLimitConcurrent(false)
.withExecutingInstance(currentInstanceId)
.withExecutionEngine(Execution.ExecutionEngine.v3)
.build();

Expand Down

0 comments on commit 49c47e0

Please sign in to comment.