Add invert match on tail sampling string attribute

open-telemetry · Aug 6, 2021 · 899589d · 899589d
1 parent 9a8b636
commit 899589d
Show file tree

Hide file tree

Showing 8 changed files with 368 additions and 28 deletions.
diff --git a/processor/tailsamplingprocessor/README.md b/processor/tailsamplingprocessor/README.md
@@ -67,6 +67,11 @@ processors:
             name: test-policy-7,
             type: rate_limiting,
             rate_limiting: {spans_per_second: 35}
+          },
+          {
+            name: test-policy-8,
+            type: string_attribute,
+            string_attribute: {key: http.url, values: [\/health, \/metrics], enabled_regex_matching: true, invert_match: true}
          }
       ]
 ```

diff --git a/processor/tailsamplingprocessor/config.go b/processor/tailsamplingprocessor/config.go
@@ -96,6 +96,10 @@ type StringAttributeCfg struct {
 	// from the regular expressions defined in Values.
 	// CacheMaxSize will not be used if EnabledRegexMatching is set to false.
 	CacheMaxSize int `mapstructure:"cache_max_size"`
+	// InvertMatch indicates that values or regular expressions must not match against attribute values.
+	// If InvertMatch is true and Values is equal to 'acme', all other values will be sampled except 'acme'.
+	// Also, if the specified Key does not match on any resource or span attributes, data will be sampled.
+	InvertMatch bool `mapstructure:"invert_match"`
 }
 
 // RateLimitingCfg holds the configurable settings to create a rate limiting

diff --git a/processor/tailsamplingprocessor/internal/sampling/policy.go b/processor/tailsamplingprocessor/internal/sampling/policy.go
@@ -53,6 +53,14 @@ const (
 	// Dropped is used when data needs to be purged before the sampling policy
 	// had a chance to evaluate it.
 	Dropped
+	// Error is used to indicate that policy evaluation was not succeeded.
+	Error
+	// InvertSampled is used on the invert match flow and indicates to sample
+	// the data.
+	InvertSampled
+	// InvertNotSampled is used on the invert match flow and indicates to not
+	// sample the data.
+	InvertNotSampled
 )
 
 // PolicyEvaluator implements a tail-based sampling policy evaluator,

diff --git a/processor/tailsamplingprocessor/internal/sampling/string_tag_filter.go b/processor/tailsamplingprocessor/internal/sampling/string_tag_filter.go
@@ -29,7 +29,8 @@ type stringAttributeFilter struct {
 	logger *zap.Logger
 	// matcher defines the func to match the attribute values in strict string
 	// or in regular expression
-	matcher func(string) bool
+	matcher     func(string) bool
+	invertMatch bool
 }
 
 type regexStrSetting struct {
@@ -41,7 +42,7 @@ var _ PolicyEvaluator = (*stringAttributeFilter)(nil)
 
 // NewStringAttributeFilter creates a policy evaluator that samples all traces with
 // the given attribute in the given numeric range.
-func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, regexMatchEnabled bool, evictSize int) PolicyEvaluator {
+func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, regexMatchEnabled bool, evictSize int, invertMatch bool) PolicyEvaluator {
 	// initialize regex filter rules and LRU cache for matched results
 	if regexMatchEnabled {
 		if evictSize <= 0 {
@@ -72,6 +73,7 @@ func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, r
 				regexStrSetting.matchedAttrs.Add(toMatch, false)
 				return false
 			},
+			invertMatch: invertMatch,
 		}
 	}
 
@@ -90,6 +92,7 @@ func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, r
 			_, matched := valuesMap[toMatch]
 			return matched
 		},
+		invertMatch: invertMatch,
 	}
 }
 
@@ -111,6 +114,32 @@ func (saf *stringAttributeFilter) Evaluate(_ pdata.TraceID, trace *TraceData) (D
 	batches := trace.ReceivedBatches
 	trace.Unlock()
 
+	if saf.invertMatch {
+		// Invert Match returns true by default, except when key and value are matched
+		return invertHasResourceOrSpanWithCondition(
+			batches,
+			func(resource pdata.Resource) bool {
+				if v, ok := resource.Attributes().Get(saf.key); ok {
+					if ok := saf.matcher(v.StringVal()); ok {
+						return false
+					}
+				}
+				return true
+			},
+			func(span pdata.Span) bool {
+				if v, ok := span.Attributes().Get(saf.key); ok {
+					truncableStr := v.StringVal()
+					if len(truncableStr) > 0 {
+						if ok := saf.matcher(v.StringVal()); ok {
+							return false
+						}
+					}
+				}
+				return true
+			},
+		), nil
+	}
+
 	return hasResourceOrSpanWithCondition(
 		batches,
 		func(resource pdata.Resource) bool {

diff --git a/processor/tailsamplingprocessor/internal/sampling/string_tag_filter_test.go b/processor/tailsamplingprocessor/internal/sampling/string_tag_filter_test.go
@@ -28,6 +28,7 @@ type TestStringAttributeCfg struct {
 	Values               []string
 	EnabledRegexMatching bool
 	CacheMaxSize         int
+	InvertMatch          bool
 }
 
 func TestStringTagFilter(t *testing.T) {
@@ -106,11 +107,113 @@ func TestStringTagFilter(t *testing.T) {
 			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{}, EnabledRegexMatching: true},
 			Decision:  NotSampled,
 		},
+		{
+			Desc:      "invert nonmatching node attribute key",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"non_matching": pdata.NewAttributeValueString("value")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert nonmatching node attribute value",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("non_matching")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert nonmatching node attribute list",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("non_matching")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert matching node attribute",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert matching node attribute list",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute key",
+			Trace:     newTraceStringAttrs(empty, "nonmatching", "value"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute value",
+			Trace:     newTraceStringAttrs(empty, "example", "nonmatching"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute list",
+			Trace:     newTraceStringAttrs(empty, "example", "nonmatching"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert matching span attribute",
+			Trace:     newTraceStringAttrs(empty, "example", "value"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert matching span attribute list",
+			Trace:     newTraceStringAttrs(empty, "example", "value"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert matching span attribute with regex",
+			Trace:     newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"v[0-9]+.HealthCheck$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert matching span attribute with regex list",
+			Trace:     newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"^http", "v[0-9]+.HealthCheck$", "metrics$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute with regex",
+			Trace:     newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"v[a-z]+.HealthCheck$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute with regex list",
+			Trace:     newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"^http", "v[a-z]+.HealthCheck$", "metrics$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
+		{
+			Desc:      "invert matching plain text node attribute in regex",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert matching plain text node attribute in regex list",
+			Trace:     newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
+			Decision:  InvertNotSampled,
+		},
+		{
+			Desc:      "invert nonmatching span attribute on empty filter list",
+			Trace:     newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
+			filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{}, EnabledRegexMatching: true, InvertMatch: true},
+			Decision:  InvertSampled,
+		},
 	}
 
 	for _, c := range cases {
 		t.Run(c.Desc, func(t *testing.T) {
-			filter := NewStringAttributeFilter(zap.NewNop(), c.filterCfg.Key, c.filterCfg.Values, c.filterCfg.EnabledRegexMatching, c.filterCfg.CacheMaxSize)
+			filter := NewStringAttributeFilter(zap.NewNop(), c.filterCfg.Key, c.filterCfg.Values, c.filterCfg.EnabledRegexMatching, c.filterCfg.CacheMaxSize, c.filterCfg.InvertMatch)
 			decision, err := filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), c.Trace)
 			assert.NoError(t, err)
 			assert.Equal(t, decision, c.Decision)
@@ -120,7 +223,7 @@ func TestStringTagFilter(t *testing.T) {
 
 func BenchmarkStringTagFilterEvaluatePlainText(b *testing.B) {
 	trace := newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", "")
-	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, 0)
+	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, 0, false)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), trace)
@@ -129,7 +232,7 @@ func BenchmarkStringTagFilterEvaluatePlainText(b *testing.B) {
 
 func BenchmarkStringTagFilterEvaluateRegex(b *testing.B) {
 	trace := newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("grpc.health.v1.HealthCheck")}, "", "")
-	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"v[0-9]+.HealthCheck$"}, true, 0)
+	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"v[0-9]+.HealthCheck$"}, true, 0, false)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), trace)
@@ -155,7 +258,7 @@ func newTraceStringAttrs(nodeAttrs map[string]pdata.AttributeValue, spanAttrKey
 }
 
 func TestOnLateArrivingSpans_StringAttribute(t *testing.T) {
-	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, defaultCacheSize)
+	filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, defaultCacheSize, false)
 	err := filter.OnLateArrivingSpans(NotSampled, nil)
 	assert.Nil(t, err)
 }
diff --git a/processor/tailsamplingprocessor/internal/sampling/util.go b/processor/tailsamplingprocessor/internal/sampling/util.go
@@ -42,6 +42,32 @@ func hasResourceOrSpanWithCondition(
 	return NotSampled
 }
 
+// invertHasResourceOrSpanWithCondition iterates through all the resources and instrumentation library spans until any
+// callback returns false.
+func invertHasResourceOrSpanWithCondition(
+	batches []pdata.Traces,
+	shouldSampleResource func(resource pdata.Resource) bool,
+	shouldSampleSpan func(span pdata.Span) bool,
+) Decision {
+	for _, batch := range batches {
+		rspans := batch.ResourceSpans()
+
+		for i := 0; i < rspans.Len(); i++ {
+			rs := rspans.At(i)
+
+			resource := rs.Resource()
+			if !shouldSampleResource(resource) {
+				return InvertNotSampled
+			}
+
+			if !invertHasInstrumentationLibrarySpanWithCondition(rs.InstrumentationLibrarySpans(), shouldSampleSpan) {
+				return InvertNotSampled
+			}
+		}
+	}
+	return InvertSampled
+}
+
 // hasSpanWithCondition iterates through all the instrumentation library spans until any callback returns true.
 func hasSpanWithCondition(batches []pdata.Traces, shouldSample func(span pdata.Span) bool) Decision {
 	for _, batch := range batches {
@@ -72,3 +98,18 @@ func hasInstrumentationLibrarySpanWithCondition(ilss pdata.InstrumentationLibrar
 	}
 	return false
 }
+
+func invertHasInstrumentationLibrarySpanWithCondition(ilss pdata.InstrumentationLibrarySpansSlice, check func(span pdata.Span) bool) bool {
+	for i := 0; i < ilss.Len(); i++ {
+		ils := ilss.At(i)
+
+		for j := 0; j < ils.Spans().Len(); j++ {
+			span := ils.Spans().At(j)
+
+			if !check(span) {
+				return false
+			}
+		}
+	}
+	return true
+}