Skip to content

Commit

Permalink
change duration to be indexed as milliseconds
Browse files Browse the repository at this point in the history
this will make the SASI on this column contain fewer distinct values, improving write performance.
users won't be discerning within milliseconds about searches they perform in the UI, in regards to the duration field.

duration is always rounded up, so the result list would not skip any results. and duration in the span table would remain accurate to microseconds.

ref: #1758 (comment)
  • Loading branch information
michaelsembwever authored and Adrian Cole committed Nov 12, 2017
1 parent 2cdc7dc commit 12adf1a
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 7 deletions.
7 changes: 7 additions & 0 deletions zipkin-storage/zipkin2_cassandra/README.md
Expand Up @@ -66,6 +66,13 @@ Note: annotations with values longer than 256 characters
are not written to the `annotation_query` SASI, as they aren't intended
for use in user queries.

The `trace_by_service_span` index is only used by query apis, and notably supports millisecond
resolution duration. In other words, query inputs are rounded up to the next millisecond. For
example, a call to GET /api/v2/traces?minDuration=12345 will returns traces who include a span that
has at least 13 millisecond duration. This resolution only affects the query: original duration data
remains at microsecond granularity. Meanwhile, write performance is dramatically better than writing
discrete values, via fewer distinct writes.

### Time-To_live
Time-To-Live is default now at the table level. It can not be overridden in write requests.

Expand Down
Expand Up @@ -192,7 +192,9 @@ void storeTraceServiceSpanName(
.setString("trace_id", traceId);

if (null != duration) {
bound = bound.setLong("duration", duration);
// stored as milliseconds, not microseconds
long durationMillis = TimeUnit.MICROSECONDS.toMillis(duration);
bound.setLong("duration", durationMillis);
}
session.executeAsync(bound);
} catch (RuntimeException ignore) {
Expand Down
Expand Up @@ -63,6 +63,7 @@
import static com.google.common.util.concurrent.Futures.immediateFailedFuture;
import static com.google.common.util.concurrent.Futures.immediateFuture;
import static com.google.common.util.concurrent.Futures.transformAsync;
import java.util.concurrent.TimeUnit;
import static zipkin2.storage.cassandra.Schema.TABLE_DEPENDENCY;
import static zipkin2.storage.cassandra.Schema.TABLE_SERVICE_SPANS;
import static zipkin2.storage.cassandra.Schema.TABLE_SPAN;
Expand Down Expand Up @@ -438,9 +439,13 @@ ListenableFuture<Map<String, Long>> getTraceIdsByServiceNames(QueryRequest reque
.setInt("limit_", request.limit());

if (withDuration) {
bound = bound
.setLong("start_duration", null != request.minDuration() ? request.minDuration() : 0)
.setLong("end_duration", null != request.maxDuration() ? request.maxDuration() : Long.MAX_VALUE);
long minDuration = TimeUnit.MICROSECONDS
.toMillis(null != request.minDuration() ? request.minDuration() : 0);

long maxDuration = TimeUnit.MICROSECONDS
.toMillis(null != request.maxDuration() ? request.maxDuration() : Long.MAX_VALUE);

bound = bound.setLong("start_duration", minDuration).setLong("end_duration", maxDuration);
}
bound.setFetchSize(request.limit());

Expand Down
Expand Up @@ -52,7 +52,7 @@ final class CassandraUtil {

public static int durationIndexBucket(long ts_micro) {
// if the window constant has microsecond precision, the division produces negative values
return (int) ((ts_micro / DURATION_INDEX_BUCKET_WINDOW_SECONDS) / 1000000);
return (int) (ts_micro / (DURATION_INDEX_BUCKET_WINDOW_SECONDS * 1_000_000));
}

/**
Expand Down
Expand Up @@ -47,7 +47,7 @@ CREATE TABLE IF NOT EXISTS zipkin2_cassandra.trace_by_service_span (
bucket int, //-- time bucket, calculated as ts/interval (in microseconds), for some pre-configured interval like 1 day.
ts timeuuid, //-- start timestamp of the span, truncated to millisecond precision
trace_id text, //-- trace ID
duration bigint, //-- span duration, in microseconds
duration bigint, //-- span duration, in milliseconds
PRIMARY KEY ((service, span, bucket), ts)
)
WITH CLUSTERING ORDER BY (ts DESC)
Expand All @@ -57,7 +57,7 @@ CREATE TABLE IF NOT EXISTS zipkin2_cassandra.trace_by_service_span (
AND read_repair_chance = 0
AND dclocal_read_repair_chance = 0
AND speculative_retry = '95percentile'
AND comment = 'Secondary table for looking up a trace by a service, or service and span. span column may be blank (when only looking up by service). bucket column adds time bucketing to the partition key, values are microseconds rounded to a pre-configured interval (typically one day)';
AND comment = 'Secondary table for looking up a trace by a service, or service and span. span column may be blank (when only looking up by service). bucket column adds time bucketing to the partition key, values are microseconds rounded to a pre-configured interval (typically one day). ts column is start timestamp of the span as time-uuid, truncated to millisecond precision. duration column is span duration, rounded up to tens of milliseconds (or hundredths of seconds)';

CREATE TABLE IF NOT EXISTS zipkin2_cassandra.span_by_service (
service text,
Expand Down
Expand Up @@ -13,6 +13,7 @@
*/
package zipkin2.storage.cassandra;

import java.util.concurrent.TimeUnit;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
Expand All @@ -22,6 +23,7 @@
import zipkin2.storage.QueryRequest;

import static org.assertj.core.api.Assertions.assertThat;
import static zipkin2.TestObjects.TODAY;

public class CassandraUtilTest {

Expand Down Expand Up @@ -82,4 +84,14 @@ public void annotationKeys_skipsTagsLongerThan256chars() throws Exception {
.contains("aws.arn", "aws.arn:" + arn)
.doesNotContain(TraceKeys.HTTP_URL, TraceKeys.HTTP_URL + ':' + url);
}

/** Sanity checks our bucketing scheme for numeric overflow */
@Test public void durationIndexBucket_notNegative() {
// today isn't negative
assertThat(CassandraUtil.durationIndexBucket(TODAY * 1000L))
.isNotNegative();
// neither is 10 years from now
assertThat(CassandraUtil.durationIndexBucket((TODAY + TimeUnit.DAYS.toMillis(3654)) * 1000L))
.isNotNegative();
}
}

0 comments on commit 12adf1a

Please sign in to comment.