Skip to content

Commit

Permalink
store: Added regex-set optimization to ExpandedPostings (#2450)
Browse files Browse the repository at this point in the history
* Added regex-set optimization to ExpandedPostings

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed capitalization.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed unnecessary change.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Remove whitespace

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use testutil instead of testify.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added copyright header, from original Prometheus querier.go

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use Thanos copyright header. 🤦

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added · at the end of the sentence. 🤯.

I will randomly add emojis and GitHub emoji markup to commit messages that fix frustrating checks like this one. And intentionally not break the line. Let's see how lint deals with that! Ha.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
  • Loading branch information
pstibrany committed Apr 17, 2020
1 parent b391ca1 commit 6452f5d
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -19,6 +19,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
### Added

### Changed
- [#2450](https://github.com/thanos-io/thanos/pull/2450) Store: regex-set optimization for `label=~"a|b|c"` matchers.

## [v0.12.0](https://github.com/thanos-io/thanos/releases/tag/v0.12.0) - 2020.04.15

Expand Down
9 changes: 9 additions & 0 deletions pkg/store/bucket.go
Expand Up @@ -1502,6 +1502,15 @@ func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Match
return emptyPostingsGroup, nil
}

if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 {
vals := findSetMatches(m.Value)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
}
return newPostingGroup(false, toAdd, nil), nil
}

// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555.
Expand Down
2 changes: 2 additions & 0 deletions pkg/store/bucket_test.go
Expand Up @@ -1005,6 +1005,7 @@ func benchmarkExpandedPostings(
iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "")
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
iRegexSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "0"+postingsBenchSuffix+"|1"+postingsBenchSuffix+"|2"+postingsBenchSuffix)

series = series / 5
cases := []struct {
Expand All @@ -1029,6 +1030,7 @@ func benchmarkExpandedPostings(
{`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}, int(float64(series) * 0.011111)},
{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}, int(float64(series) * 0.1)},
{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}, int(1 + float64(series)*0.088888)},
{`i=~"0|1|2"`, []*labels.Matcher{iRegexSet}, 150}, // 50 series for "1", 50 for "2" and 50 for "3".
}

for _, c := range cases {
Expand Down
63 changes: 63 additions & 0 deletions pkg/store/opts.go
@@ -0,0 +1,63 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package store

import (
"strings"
"unicode/utf8"
)

// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
var regexMetaCharacterBytes [16]byte

// isRegexMetaCharacter reports whether byte b needs to be escaped.
func isRegexMetaCharacter(b byte) bool {
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
}

func init() {
for _, b := range []byte(`.+*?()|[]{}^$`) {
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
}
}

// Copied from Prometheus querier.go, removed check for Prometheus wrapper.
// Returns list of values that can regex matches.
func findSetMatches(pattern string) []string {
escaped := false
sets := []*strings.Builder{{}}
for i := 0; i < len(pattern); i++ {
if escaped {
switch {
case isRegexMetaCharacter(pattern[i]):
sets[len(sets)-1].WriteByte(pattern[i])
case pattern[i] == '\\':
sets[len(sets)-1].WriteByte('\\')
default:
return nil
}
escaped = false
} else {
switch {
case isRegexMetaCharacter(pattern[i]):
if pattern[i] == '|' {
sets = append(sets, &strings.Builder{})
} else {
return nil
}
case pattern[i] == '\\':
escaped = true
default:
sets[len(sets)-1].WriteByte(pattern[i])
}
}
}
matches := make([]string, 0, len(sets))
for _, s := range sets {
if s.Len() > 0 {
matches = append(matches, s.String())
}
}
return matches
}
53 changes: 53 additions & 0 deletions pkg/store/opts_test.go
@@ -0,0 +1,53 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package store

import (
"testing"

"github.com/thanos-io/thanos/pkg/testutil"
)

// Refer to https://github.com/prometheus/prometheus/issues/2651.
func TestFindSetMatches(t *testing.T) {
cases := []struct {
pattern string
exp []string
}{
// Simple sets.
{
pattern: "foo|bar|baz",
exp: []string{
"foo",
"bar",
"baz",
},
},
// Simple sets containing escaped characters.
{
pattern: "fo\\.o|bar\\?|\\^baz",
exp: []string{
"fo.o",
"bar?",
"^baz",
},
},
// Simple sets containing special characters without escaping.
{
pattern: "fo.o|bar?|^baz",
exp: nil,
},
{
pattern: "foo\\|bar\\|baz",
exp: []string{
"foo|bar|baz",
},
},
}

for _, c := range cases {
matches := findSetMatches(c.pattern)
testutil.Equals(t, c.exp, matches)
}
}

0 comments on commit 6452f5d

Please sign in to comment.