Skip to content

Commit

Permalink
Merge pull request FeatureBaseDB#69 from seebs/ctbench
Browse files Browse the repository at this point in the history
Container op benchmarks
  • Loading branch information
seebs committed Oct 22, 2020
2 parents d12639b + 8095455 commit 9eb251e
Show file tree
Hide file tree
Showing 7 changed files with 443 additions and 11 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
github.com/hashicorp/memberlist v0.1.3
github.com/improbable-eng/grpc-web v0.13.0
github.com/lib/pq v1.8.0
github.com/molecula/apophenia v0.0.0-20190827192002-68b7a14a478b
github.com/opentracing/opentracing-go v1.1.0
github.com/pelletier/go-toml v1.2.0
github.com/pkg/errors v0.9.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQz
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/molecula/apophenia v0.0.0-20190827192002-68b7a14a478b h1:cZADDaNYM7xn/nklO3g198JerGQjadFuA0ofxBJgK0Y=
github.com/molecula/apophenia v0.0.0-20190827192002-68b7a14a478b/go.mod h1:uXd1BiH7xLmgkhVmspdJLENv6uGWrTL/MQX2TN7Yz9s=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223 h1:F9x/1yl3T2AeKLr2AMdilSD8+f9bvMnNN8VS5iDtovc=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
Expand Down
1 change: 1 addition & 0 deletions license.exceptions
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
./lru/lru.go
./roaring/btree.go
./roaring/btree_test.go
./roaring/containerarchetype_string.go
./proto/pilosa.pb.go
./logger/filewriter.go
./logger/filewriter_test.go
Expand Down
128 changes: 128 additions & 0 deletions roaring/benchpretty/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2019 Pilosa Corp.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"bufio"
"fmt"
"log"
"os"
"regexp"
"sort"
"strconv"
"strings"

"github.com/pilosa/pilosa/v2/roaring"
)

var pattern = regexp.MustCompile(`^BenchmarkCtOps/([^/]+)/([^/]+)/([^-]+)-([0-9]+)\s*([0-9]+)\s*([0-9.]+) ns/op`)

func parseFile(path string, benchmarks map[string]map[string]map[string]float64, known map[string]bool) error {
// unset all the seen flags in the known map. if we end up with any unseen, the
// benchmarks are incomplete.
for k := range known {
known[k] = false
}
file, err := os.Open(path)
if err != nil {
return err
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "BenchmarkCtOps/") {
continue
}
matches := pattern.FindStringSubmatch(line)
if matches == nil {
return fmt.Errorf("can't parse line: '%s'", line)
}
t1, t2 := matches[1], matches[2]
if _, ok := known[t1]; !ok {
return fmt.Errorf("unknown archetype '%s'", t1)
}
if _, ok := known[t2]; !ok {
return fmt.Errorf("unknown archetype '%s'", t2)
}
known[t1] = true
known[t2] = true
op := matches[3]
time, err := strconv.ParseFloat(matches[6], 64)
if err != nil {
return fmt.Errorf("parsing float [%s]: %v", matches[6], err)
}
if benchmarks[op] == nil {
benchmarks[op] = make(map[string]map[string]float64, 16)
}
if benchmarks[op][t1] == nil {
benchmarks[op][t1] = make(map[string]float64, 16)
}
benchmarks[op][t1][t2] += time
}
for k, v := range known {
if !v {
fmt.Printf("warning: container archetype '%s' missing in benchmarks\n", k)
}
}
return nil
}

func main() {
maxLen := 0

knownArchetypes := make(map[string]bool, len(roaring.ContainerArchetypeNames))
for _, name := range roaring.ContainerArchetypeNames {
if len(name) > maxLen {
maxLen = len(name)
}
knownArchetypes[name] = false
}
benchmarks := make(map[string]map[string]map[string]float64, 8)
for _, file := range os.Args[1:] {
err := parseFile(file, benchmarks, knownArchetypes)
if err != nil {
log.Fatalf("parsing '%s': %v", file, err)
}
}
if len(benchmarks) < 1 {
log.Fatalf("no benchmarks parsed?")
}
ops := make([]string, 0, 8)
for k := range benchmarks {
ops = append(ops, k)
}
sort.Strings(ops)
for _, op := range ops {
fmt.Printf("%s:\n", op)
fmt.Printf("%*s ", maxLen, "")
for _, name := range roaring.ContainerArchetypeNames {
fmt.Printf(" %*s", maxLen, name)
}
fmt.Print("\n")
for _, self := range roaring.ContainerArchetypeNames {
fmt.Printf("%*s ", maxLen, self)
for _, other := range roaring.ContainerArchetypeNames {
time, ok := benchmarks[op][self][other]
if ok {
fmt.Printf(" %*.1f", maxLen, time)
} else {
fmt.Printf(" %*s", maxLen, "--")
}
}
fmt.Print("\n")
}
fmt.Print("\n")
}
}
205 changes: 205 additions & 0 deletions roaring/container_archetypes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
// Copyright 2019 Pilosa Corp.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package roaring

import (
"fmt"
"math/rand"
"sort"
"strconv"
"strings"
"sync"

"github.com/molecula/apophenia"
)

// ContainerArchetypeNames is the list of supported container archetypes
// used in some testing. This is exported for use in a benchmark-analysis
// tool.
var ContainerArchetypeNames = []string{
"Empty",
"Ary1",
"Ary16",
"Ary256",
"Ary512",
"Ary1024",
"Ary4096",
"RunFull",
"RunSplit",
"Run16",
"Run16Small",
"Run256",
"Run256Small",
"Run1024",
"BM512",
"BM1024",
"BM4096",
"BM4097",
"BM32768",
"BM65000",
}

var containerArchetypes [][]*Container
var containerArchetypesErr error
var initContainerArchetypes sync.Once

func makeArchetypalContainer(rng *rand.Rand, name string) (*Container, error) {
var c *Container
switch {
case name == "Empty":
c = NewContainerArray(nil)
case strings.HasPrefix(name, "Ary"):
size, err := strconv.Atoi(name[3:])
if err != nil {
return nil, fmt.Errorf("can't parse array size: %v", err)
}
array := make([]uint16, size)
seq := apophenia.NewSequence(rng.Int63())
perm, err := apophenia.NewPermutation(65536, 0, seq)
if err != nil {
return nil, err
}
for i := 0; i < size; i++ {
array[i] = uint16(perm.Next())
}
sort.Slice(array, func(a, b int) bool { return array[a] < array[b] })
c = NewContainerArray(array)
case name == "RunFull":
c = NewContainerRun([]Interval16{{Start: 0, Last: 65535}})
case name == "RunSplit":
runs := []Interval16{
{Start: 0, Last: 32700 + uint16(rng.Intn(30))},
{Start: 32768 + uint16(rng.Intn(30)), Last: 65535},
}
c = NewContainerRun(runs)
case strings.HasPrefix(name, "Run"):
countEnd := len(name)
small := strings.HasSuffix(name, "Small")
if small {
countEnd -= 5
}
count, err := strconv.Atoi(name[3:countEnd])
if err != nil {
return nil, fmt.Errorf("can't parse run count in '%s': %v", name, err)
}
runs := make([]Interval16, count)
// For size intervals, we want to divvy up the total
// space around count+1 points, and populate the space between
// those points with a run smaller than that.
stride := int32(65535 / (count + 1))
upper := stride - 10
lower := stride / 10
if small {
lower = 3
upper = lower + (stride / 20)
}
variance := upper - lower
next := int32(0)
prev := int32(0)
for i := 0; i < count; i++ {
next += stride
middle := (prev + next) / 2
runSize := rng.Int31n(variance) + lower
offset := rng.Int31n(variance)
runs[i].Start = uint16(middle + offset - (runSize / 2))
runs[i].Last = runs[i].Start + uint16(runSize)
if runs[i].Last < runs[i].Start {
return nil, fmt.Errorf("fatal, run %d starts at %d, tries to end at %d", i, runs[i].Start, runs[i].Last)
}
prev = next
if i > 0 {
if runs[i].Start <= runs[i-1].Last {
if runs[i-1].Last > 65533 {
return nil, fmt.Errorf("fatal, run %d starts at %d, previous run ended at %d",
i, runs[i].Start, runs[i-1].Last)
} else {
runs[i].Start = runs[i-1].Last + 2
if runs[i].Last < runs[i].Start {
runs[i].Last = runs[i].Start
}
}
}
}
}
c = NewContainerRun(runs)
case strings.HasPrefix(name, "BM"):
size, err := strconv.Atoi(name[2:])
if err != nil {
return nil, fmt.Errorf("can't parse bitmap size: %v", err)
}
bitmap := make([]uint64, bitmapN)
n := int32(0)
flip := false
// Picking random bits sometimes overlaps, so we want to
// keep trying until we get the requested number. But that's
// really slow for N close to the maximum, so if we want more
// than half the bits set, we'll do it backwards and then
// invert the bits.
bits := int32(size)
if size > 32768 {
flip = true
bits = 65536 - bits
}
for n < bits {
pos := (rng.Uint64() & 65535)
bit := uint64(1 << (pos & 63))
if bitmap[pos/64]&bit == 0 {
n++
bitmap[pos/64] |= bit
}
}
if flip {
for i := 0; i < bitmapN; i++ {
bitmap[i] = ^bitmap[i]
}
n = 65536 - n
}
c = NewContainerBitmap(int(n), bitmap)
count := c.count()
if count != n {
return nil, fmt.Errorf("bitmap should have %d bits, has %d", n, count)
}
}
return c, nil
}

// InitContainerArchetypes ensures that createContainerArchetypes has been
// called, and returns the results of that one call.
func InitContainerArchetypes() ([][]*Container, error) {
initContainerArchetypes.Do(func() {
containerArchetypes, containerArchetypesErr = createContainerArchetypes(8)
})
return containerArchetypes, containerArchetypesErr
}

// createContainerArchetypes creates a slice of *roaring.Container corresponding
// to each container archetype, or reports an error.
func createContainerArchetypes(count int) (cats [][]*Container, err error) {
cats = make([][]*Container, len(ContainerArchetypeNames))
// seed is arbitrary, but picking a seed means we don't get different
// behavior for each run
rng := rand.New(rand.NewSource(23))
for i, name := range ContainerArchetypeNames {
cats[i] = make([]*Container, count)
for j := 0; j < count; j++ {
c, err := makeArchetypalContainer(rng, name)
if err != nil {
return nil, err
}
cats[i][j] = c
}
}
return cats, nil
}
23 changes: 12 additions & 11 deletions roaring/roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -4238,21 +4238,22 @@ func intersectBitmapRun(a, b *Container) *Container {
statsHit("intersect/BitmapRun")
var output *Container
runs := b.runs()
if b.N() <= ArrayMaxSize || a.N() <= ArrayMaxSize {
// output is array container
array := make([]uint16, 0, b.N())
// Intersection will be array-sized for sure if either of the inputs
// is array-sized.
if b.N() <= ArrayMaxSize {
var scratch [ArrayMaxSize]uint16
n := 0
for _, iv := range runs {
for i := iv.Start; i <= iv.Last; i++ {
if a.bitmapContains(i) {
array = append(array, i)
}
// If the run ends the container, break to avoid an infinite loop.
if i == 65535 {
break
for i := int(iv.Start); i <= int(iv.Last); i++ {
if a.bitmapContains(uint16(i)) {
scratch[n] = uint16(i)
n++
}
}
}

// output is array container
array := make([]uint16, n)
copy(array, scratch[:])
output = NewContainerArray(array)
} else {
// right now this iterates through the runs and sets integers in the
Expand Down
Loading

0 comments on commit 9eb251e

Please sign in to comment.