Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add invert match on tail sampling string attribute #4393

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions processor/tailsamplingprocessor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ processors:
name: test-policy-8,
type: rate_limiting,
rate_limiting: {spans_per_second: 35}
},
{
name: test-policy-8,
type: string_attribute,
string_attribute: {key: http.url, values: [\/health, \/metrics], enabled_regex_matching: true, invert_match: true}
}
]
```
Expand Down
4 changes: 4 additions & 0 deletions processor/tailsamplingprocessor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ type StringAttributeCfg struct {
// from the regular expressions defined in Values.
// CacheMaxSize will not be used if EnabledRegexMatching is set to false.
CacheMaxSize int `mapstructure:"cache_max_size"`
// InvertMatch indicates that values or regular expressions must not match against attribute values.
// If InvertMatch is true and Values is equal to 'acme', all other values will be sampled except 'acme'.
// Also, if the specified Key does not match on any resource or span attributes, data will be sampled.
InvertMatch bool `mapstructure:"invert_match"`
}

// RateLimitingCfg holds the configurable settings to create a rate limiting
Expand Down
8 changes: 8 additions & 0 deletions processor/tailsamplingprocessor/internal/sampling/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ const (
// Dropped is used when data needs to be purged before the sampling policy
// had a chance to evaluate it.
Dropped
// Error is used to indicate that policy evaluation was not succeeded.
Error
// InvertSampled is used on the invert match flow and indicates to sample
// the data.
InvertSampled
// InvertNotSampled is used on the invert match flow and indicates to not
// sample the data.
InvertNotSampled
)

// PolicyEvaluator implements a tail-based sampling policy evaluator,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ type stringAttributeFilter struct {
logger *zap.Logger
// matcher defines the func to match the attribute values in strict string
// or in regular expression
matcher func(string) bool
matcher func(string) bool
invertMatch bool
}

type regexStrSetting struct {
Expand All @@ -41,7 +42,7 @@ var _ PolicyEvaluator = (*stringAttributeFilter)(nil)

// NewStringAttributeFilter creates a policy evaluator that samples all traces with
// the given attribute in the given numeric range.
func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, regexMatchEnabled bool, evictSize int) PolicyEvaluator {
func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, regexMatchEnabled bool, evictSize int, invertMatch bool) PolicyEvaluator {
// initialize regex filter rules and LRU cache for matched results
if regexMatchEnabled {
if evictSize <= 0 {
Expand Down Expand Up @@ -72,6 +73,7 @@ func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, r
regexStrSetting.matchedAttrs.Add(toMatch, false)
return false
},
invertMatch: invertMatch,
}
}

Expand All @@ -90,6 +92,7 @@ func NewStringAttributeFilter(logger *zap.Logger, key string, values []string, r
_, matched := valuesMap[toMatch]
return matched
},
invertMatch: invertMatch,
}
}

Expand All @@ -111,6 +114,32 @@ func (saf *stringAttributeFilter) Evaluate(_ pdata.TraceID, trace *TraceData) (D
batches := trace.ReceivedBatches
trace.Unlock()

if saf.invertMatch {
// Invert Match returns true by default, except when key and value are matched
return invertHasResourceOrSpanWithCondition(
batches,
func(resource pdata.Resource) bool {
if v, ok := resource.Attributes().Get(saf.key); ok {
if ok := saf.matcher(v.StringVal()); ok {
return false
}
}
return true
},
func(span pdata.Span) bool {
if v, ok := span.Attributes().Get(saf.key); ok {
truncableStr := v.StringVal()
if len(truncableStr) > 0 {
if ok := saf.matcher(v.StringVal()); ok {
return false
}
}
}
return true
},
), nil
}

return hasResourceOrSpanWithCondition(
batches,
func(resource pdata.Resource) bool {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type TestStringAttributeCfg struct {
Values []string
EnabledRegexMatching bool
CacheMaxSize int
InvertMatch bool
}

func TestStringTagFilter(t *testing.T) {
Expand Down Expand Up @@ -106,11 +107,113 @@ func TestStringTagFilter(t *testing.T) {
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{}, EnabledRegexMatching: true},
Decision: NotSampled,
},
{
Desc: "invert nonmatching node attribute key",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"non_matching": pdata.NewAttributeValueString("value")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the regex setting here decisive for the test? If not, leave it out. Same for the cache max size, and same for the other assertions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regex setting is necessary on this case. I duplicated all tests to ensure backward compatibility.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, looks good then.

Decision: InvertSampled,
},
{
Desc: "invert nonmatching node attribute value",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("non_matching")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert nonmatching node attribute list",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("non_matching")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert matching node attribute",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert matching node attribute list",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert nonmatching span attribute key",
Trace: newTraceStringAttrs(empty, "nonmatching", "value"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert nonmatching span attribute value",
Trace: newTraceStringAttrs(empty, "example", "nonmatching"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert nonmatching span attribute list",
Trace: newTraceStringAttrs(empty, "example", "nonmatching"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert matching span attribute",
Trace: newTraceStringAttrs(empty, "example", "value"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert matching span attribute list",
Trace: newTraceStringAttrs(empty, "example", "value"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: false, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert matching span attribute with regex",
Trace: newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"v[0-9]+.HealthCheck$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert matching span attribute with regex list",
Trace: newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"^http", "v[0-9]+.HealthCheck$", "metrics$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert nonmatching span attribute with regex",
Trace: newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"v[a-z]+.HealthCheck$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert nonmatching span attribute with regex list",
Trace: newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"^http", "v[a-z]+.HealthCheck$", "metrics$"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertSampled,
},
{
Desc: "invert matching plain text node attribute in regex",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"value"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert matching plain text node attribute in regex list",
Trace: newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", ""),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{"first_value", "value", "last_value"}, EnabledRegexMatching: true, CacheMaxSize: defaultCacheSize, InvertMatch: true},
Decision: InvertNotSampled,
},
{
Desc: "invert nonmatching span attribute on empty filter list",
Trace: newTraceStringAttrs(empty, "example", "grpc.health.v1.HealthCheck"),
filterCfg: &TestStringAttributeCfg{Key: "example", Values: []string{}, EnabledRegexMatching: true, InvertMatch: true},
Decision: InvertSampled,
},
}

for _, c := range cases {
t.Run(c.Desc, func(t *testing.T) {
filter := NewStringAttributeFilter(zap.NewNop(), c.filterCfg.Key, c.filterCfg.Values, c.filterCfg.EnabledRegexMatching, c.filterCfg.CacheMaxSize)
filter := NewStringAttributeFilter(zap.NewNop(), c.filterCfg.Key, c.filterCfg.Values, c.filterCfg.EnabledRegexMatching, c.filterCfg.CacheMaxSize, c.filterCfg.InvertMatch)
decision, err := filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), c.Trace)
assert.NoError(t, err)
assert.Equal(t, decision, c.Decision)
Expand All @@ -120,7 +223,7 @@ func TestStringTagFilter(t *testing.T) {

func BenchmarkStringTagFilterEvaluatePlainText(b *testing.B) {
trace := newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("value")}, "", "")
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, 0)
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, 0, false)
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), trace)
Expand All @@ -129,7 +232,7 @@ func BenchmarkStringTagFilterEvaluatePlainText(b *testing.B) {

func BenchmarkStringTagFilterEvaluateRegex(b *testing.B) {
trace := newTraceStringAttrs(map[string]pdata.AttributeValue{"example": pdata.NewAttributeValueString("grpc.health.v1.HealthCheck")}, "", "")
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"v[0-9]+.HealthCheck$"}, true, 0)
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"v[0-9]+.HealthCheck$"}, true, 0, false)
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Evaluate(pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), trace)
Expand All @@ -155,7 +258,7 @@ func newTraceStringAttrs(nodeAttrs map[string]pdata.AttributeValue, spanAttrKey
}

func TestOnLateArrivingSpans_StringAttribute(t *testing.T) {
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, defaultCacheSize)
filter := NewStringAttributeFilter(zap.NewNop(), "example", []string{"value"}, false, defaultCacheSize, false)
err := filter.OnLateArrivingSpans(NotSampled, nil)
assert.Nil(t, err)
}
41 changes: 41 additions & 0 deletions processor/tailsamplingprocessor/internal/sampling/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,32 @@ func hasResourceOrSpanWithCondition(
return NotSampled
}

// invertHasResourceOrSpanWithCondition iterates through all the resources and instrumentation library spans until any
// callback returns false.
func invertHasResourceOrSpanWithCondition(
batches []pdata.Traces,
shouldSampleResource func(resource pdata.Resource) bool,
shouldSampleSpan func(span pdata.Span) bool,
) Decision {
for _, batch := range batches {
rspans := batch.ResourceSpans()

for i := 0; i < rspans.Len(); i++ {
rs := rspans.At(i)

resource := rs.Resource()
if !shouldSampleResource(resource) {
return InvertNotSampled
}

if !invertHasInstrumentationLibrarySpanWithCondition(rs.InstrumentationLibrarySpans(), shouldSampleSpan) {
return InvertNotSampled
}
}
}
return InvertSampled
}

// hasSpanWithCondition iterates through all the instrumentation library spans until any callback returns true.
func hasSpanWithCondition(batches []pdata.Traces, shouldSample func(span pdata.Span) bool) Decision {
for _, batch := range batches {
Expand Down Expand Up @@ -72,3 +98,18 @@ func hasInstrumentationLibrarySpanWithCondition(ilss pdata.InstrumentationLibrar
}
return false
}

func invertHasInstrumentationLibrarySpanWithCondition(ilss pdata.InstrumentationLibrarySpansSlice, check func(span pdata.Span) bool) bool {
for i := 0; i < ilss.Len(); i++ {
ils := ilss.At(i)

for j := 0; j < ils.Spans().Len(); j++ {
span := ils.Spans().At(j)

if !check(span) {
return false
}
}
}
return true
}
Loading