Skip to content

Commit

Permalink
Add OTel span events for commit retry sleeps (#7509)
Browse files Browse the repository at this point in the history
Useful for analysing Nessie behaviour under load.
  • Loading branch information
dimas-b committed Sep 18, 2023
1 parent b9143ec commit 3d43c93
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 0 deletions.
4 changes: 4 additions & 0 deletions versioned/storage/common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ dependencies {
testImplementation(platform(libs.junit.bom))
testImplementation(libs.bundles.junit.testing)

implementation(platform(libs.opentelemetry.bom))
implementation("io.opentelemetry:opentelemetry-api")
testImplementation("io.opentelemetry:opentelemetry-sdk-testing")

testCompileOnly(libs.immutables.builder)
testCompileOnly(libs.immutables.value.annotations)
testAnnotationProcessor(libs.immutables.value.processor)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import static org.projectnessie.versioned.storage.common.logic.CommitRetry.TryLoopState.newTryLoopState;

import com.google.common.annotations.VisibleForTesting;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.Span;
import java.util.Optional;
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nonnull;
Expand All @@ -31,6 +34,10 @@

public class CommitRetry {

private static final String OTEL_SLEEP_EVENT_NAME = "nessie.commit-retry.sleep";
private static final AttributeKey<Long> OTEL_SLEEP_EVENT_TIME_KEY =
AttributeKey.longKey("nessie.commit-retry.sleep.duration-millis");

private CommitRetry() {}

public static <T> T commitRetry(Persist persist, CommitAttempt<T> attempt)
Expand Down Expand Up @@ -168,6 +175,9 @@ private void sleepAndBackoff(long totalElapsed, long attemptElapsed) {
// consider the already elapsed time of the last attempt
sleepMillis = Math.max(1L, sleepMillis - NANOSECONDS.toMillis(attemptElapsed));

Span.current()
.addEvent(OTEL_SLEEP_EVENT_NAME, Attributes.of(OTEL_SLEEP_EVENT_TIME_KEY, sleepMillis));

monotonicClock.sleepMillis(sleepMillis);

upper = upper * 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.assertj.core.api.InstanceOfAssertFactories.type;
import static org.junit.jupiter.api.AssertionFailureBuilder.assertionFailure;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.ArgumentMatchers.longThat;
import static org.mockito.Mockito.clearInvocations;
Expand All @@ -29,14 +30,24 @@
import static org.mockito.Mockito.when;
import static org.projectnessie.versioned.storage.common.logic.CommitRetry.commitRetry;

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Scope;
import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension;
import io.opentelemetry.sdk.trace.data.EventData;
import io.opentelemetry.sdk.trace.data.SpanData;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.assertj.core.api.SoftAssertions;
import org.assertj.core.api.junit.jupiter.InjectSoftAssertions;
import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.ArgumentMatcher;
Expand Down Expand Up @@ -347,6 +358,45 @@ public void retriesOutOfTime(int retries) {
soft.assertThat(tryLoopState.retry(0L)).isFalse();
}

@Nested
class Telemetry {
@RegisterExtension public final OpenTelemetryExtension otel = OpenTelemetryExtension.create();

private List<EventData> eventsFrom(Runnable action) {
Span span = otel.getOpenTelemetry().getTracer("test").spanBuilder("test").startSpan();
try (Scope ignored = span.makeCurrent()) {
action.run();
}
span.end();

String spanId = span.getSpanContext().getSpanId();
Optional<SpanData> spanData =
otel.getSpans().stream().filter(s -> s.getSpanId().equals(spanId)).findFirst();

return spanData
.orElseThrow(
() ->
assertionFailure()
.actual(otel.getSpans())
.message("Expected Span not found: " + spanId)
.build())
.getEvents();
}

@Test
void commitRetryEvents() {
List<EventData> events = eventsFrom(TestCommitRetry.this::commitRetrySuccessAfterRetry);
// 1 sleep due to failure induced by commitRetrySuccessAfterRetry()
soft.assertThat(events).hasSize(1);
soft.assertThat(events.get(0).getName()).isEqualTo("nessie.commit-retry.sleep");
soft.assertThat(events)
.extracting(
e ->
e.getAttributes().asMap().keySet().stream().map(AttributeKey::getKey).findFirst())
.containsExactly(Optional.of("nessie.commit-retry.sleep.duration-millis"));
}
}

MonotonicClock mockedClock(int retries) {
Long[] times = new Long[retries];
Arrays.fill(times, 0L);
Expand Down

0 comments on commit 3d43c93

Please sign in to comment.