Skip to content

Commit

Permalink
types: New algorithm for (Any).Union + new benchmarks (#6228)
Browse files Browse the repository at this point in the history
This commit swaps out the old algorithm in the `(Any).Union` method that
would explode on some inputs to one that uses dramatically less memory,
and runs in nearly linear time and memory usage.

This commit also includes improvements to the
`BenchmarkCompileDynamicPolicy` benchmark, as well as new benchmarks for
the `Any` type's `Merge` and `Union` methods.

Signed-off-by: Philip Conrad <philipaconrad@gmail.com>
  • Loading branch information
philipaconrad committed Sep 20, 2023
1 parent 38733ed commit 434d324
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 17 deletions.
9 changes: 5 additions & 4 deletions compile/compile_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package compile
import (
"context"
"fmt"
"io/fs"
"strings"
"testing"

Expand All @@ -22,11 +23,11 @@ func BenchmarkCompileDynamicPolicy(b *testing.B) {

for _, n := range numPolicies {
testcase := generateDynamicPolicyBenchmarkData(n)
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
test.WithTempFS(testcase, func(root string) {
b.ResetTimer()

test.WithTestFS(testcase, true, func(root string, fileSys fs.FS) {
b.ResetTimer()
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
compiler := New().
WithFS(fileSys).
WithPaths(root)

err := compiler.Build(context.Background())
Expand Down
75 changes: 62 additions & 13 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func (t *Array) String() string {
for _, tpe := range t.static {
buf = append(buf, Sprint(tpe))
}
var repr = prefix
repr := prefix
if len(buf) > 0 {
repr += "<" + strings.Join(buf, ", ") + ">"
}
Expand Down Expand Up @@ -355,7 +355,7 @@ func (t *Object) String() string {
for _, p := range t.static {
buf = append(buf, fmt.Sprintf("%v: %v", p.Key, Sprint(p.Value)))
}
var repr = prefix
repr := prefix
if len(buf) > 0 {
repr += "<" + strings.Join(buf, ", ") + ">"
}
Expand Down Expand Up @@ -412,7 +412,6 @@ func (t *Object) toMap() map[string]interface{} {

// Select returns the type of the named property.
func (t *Object) Select(name interface{}) Type {

pos := sort.Search(len(t.static), func(x int) bool {
return util.Compare(t.static[x].Key, name) >= 0
})
Expand Down Expand Up @@ -566,22 +565,73 @@ func (t Any) Merge(other Type) Any {
}

// Union returns a new Any type that is the union of the two Any types.
// Note(philipc): The two Any slices MUST be sorted before running Union,
// or else this method will fail to merge the two slices correctly.
func (t Any) Union(other Any) Any {
if len(t) == 0 {
lenT := len(t)
lenOther := len(other)
// Return the more general (blank) Any type if present.
if lenT == 0 {
return t
}
if len(other) == 0 {
if lenOther == 0 {
return other
}
cpy := make(Any, len(t))
copy(cpy, t)
for i := range other {
if !cpy.Contains(other[i]) {
cpy = append(cpy, other[i])
// Prealloc the output list.
maxLen := lenT
if lenT < lenOther {
maxLen = lenOther
}
merged := make(Any, 0, maxLen)
// Note(philipc): Create a merged slice, doing the minimum number of
// comparisons along the way. We treat this as a problem of merging two
// sorted lists that might have duplicates. This specifically saves us
// from cases where one list might be *much* longer than the other.
// Algorithm:
// Assume:
// - List A
// - List B
// - List Output
// - Idx_a, Idx_b
// Procedure:
// - While Idx_a < len(A) and Idx_b < len(B)
// - Compare head(A) and head(B)
// - Cases:
// - A < B: Append head(A) to Output, advance Idx_a
// - A == B: Append head(A) to Output, advance Idx_a, Idx_b
// - A > B: Append head(B) to Output, advance Idx_b
// - Return output
idxA := 0
idxB := 0
for idxA < lenT || idxB < lenOther {
// Early-exit cases:
if idxA == lenT {
// Ran out of elements in t. Copy over what's left from other.
merged = append(merged, other[idxB:]...)
break
} else if idxB == lenOther {
// Ran out of elements in other. Copy over what's left from t.
merged = append(merged, t[idxA:]...)
break
}
// Normal selection of next element to merge:
switch Compare(t[idxA], other[idxB]) {
// A < B:
case -1:
merged = append(merged, t[idxA])
idxA++
// A == B:
case 0:
merged = append(merged, t[idxA])
idxA++
idxB++
// A > B:
case 1:
merged = append(merged, other[idxB])
idxB++
}
}
sort.Sort(typeSlice(cpy))
return cpy
return merged
}

func (t Any) String() string {
Expand Down Expand Up @@ -706,7 +756,6 @@ func (t *Function) MarshalJSON() ([]byte, error) {

// UnmarshalJSON decodes the JSON serialized function declaration.
func (t *Function) UnmarshalJSON(bs []byte) error {

tpe, err := Unmarshal(bs)
if err != nil {
return err
Expand Down
54 changes: 54 additions & 0 deletions types/types_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,57 @@ func generateType(n int) Type {
}
return NewObject(static, nil)
}

func generateTypeWithPrefix(n int, prefix string) Type {
static := make([]*StaticProperty, n)
for i := 0; i < n; i++ {
static[i] = NewStaticProperty(prefix+fmt.Sprint(i), S)
}
return NewObject(static, nil)
}

func BenchmarkAnyMergeOne(b *testing.B) {
sizes := []int{100, 500, 1000, 5000, 10000}
for _, size := range sizes {
anyA := Any(make([]Type, 0, size))
for i := 0; i < size; i++ {
tpe := generateType(i)
anyA = append(anyA, tpe)
}
tpeB := N
b.ResetTimer()
b.Run(fmt.Sprint(size), func(b *testing.B) {
result := anyA.Merge(tpeB)
if len(result) != len(anyA)+1 {
b.Fatalf("Expected length of merged result to be: %d, got: %d", len(anyA)+1, len(result))
}
})
}
}

// Build up 2x Any type lists of unique and different types, then Union merge.
func BenchmarkAnyUnionAllUniqueTypes(b *testing.B) {
sizes := []int{100, 250, 500, 1000, 2500}
for _, sizeA := range sizes {
for _, sizeB := range sizes {
anyA := Any(make([]Type, 0, sizeA))
for i := 0; i < sizeA; i++ {
tpe := generateType(i)
anyA = append(anyA, tpe)
}
anyB := Any(make([]Type, 0, sizeB))
for i := 0; i < sizeB; i++ {
tpe := generateTypeWithPrefix(i, "B-")
anyB = append(anyB, tpe)
}
b.ResetTimer()
b.Run(fmt.Sprintf("%dx%d", sizeA, sizeB), func(b *testing.B) {
resultA2B := anyA.Union(anyB)
// Expect length to be A + B - 1, because the `object` type is present in both Any type sets.
if len(resultA2B) != (len(anyA) + len(anyB) - 1) {
b.Fatalf("Expected length of unioned result to be: %d, got: %d", len(anyA)+len(anyB), len(resultA2B))
}
})
}
}
}

0 comments on commit 434d324

Please sign in to comment.