Skip to content

Commit

Permalink
add pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
wasaga committed Dec 1, 2023
1 parent 7e1c167 commit 2071140
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 1 deletion.
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -13,6 +13,7 @@ require (
github.com/aws/aws-sdk-go-v2 v1.22.2
github.com/aws/aws-sdk-go-v2/config v1.18.42
github.com/aws/aws-sdk-go-v2/service/s3 v1.42.1
github.com/bits-and-blooms/bitset v1.11.0
github.com/caddyserver/certmagic v0.19.2
github.com/cenkalti/backoff/v4 v4.2.1
github.com/cespare/xxhash/v2 v2.2.0
Expand Down Expand Up @@ -108,7 +109,6 @@ require (
github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 // indirect
github.com/aws/smithy-go v1.16.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bits-and-blooms/bitset v1.11.0 // indirect
github.com/bufbuild/buf v1.26.1 // indirect
github.com/bufbuild/connect-go v1.9.0 // indirect
github.com/bufbuild/connect-opentelemetry-go v0.4.0 // indirect
Expand Down
64 changes: 64 additions & 0 deletions pkg/counter/counter.go
@@ -0,0 +1,64 @@
// Package counter implements linear counter estimator
package counter

import (
"hash/crc32"
"math"

"github.com/bits-and-blooms/bitset"
)

const (
// DefaultCap max capacity for the counter
DefaultCap = 1 << 19
loadFactor = 4
)

// Counter implements a simple probabilistic counter estimator with 1% estimation accuracy
// as described in https://www.waitingforcode.com/big-data-algorithms/cardinality-estimation-linear-probabilistic-counting/read
type Counter struct {
Bits *bitset.BitSet `json:"bits"`
}

// New creates a counter for the maximum amount unique elements provided
func New(cap uint) *Counter {
return &Counter{
// from paper: a load factor (number of unique values/hash table size) much larger
// than 1.0 (e.g., 12) can be used for accurate estimation (e.g., 1% of error)
Bits: bitset.New(cap / loadFactor),
}
}

// FromBinary unmarshals counter state
func FromBinary(data []byte) (*Counter, error) {
pc := &Counter{
Bits: &bitset.BitSet{},
}
if err := pc.Bits.UnmarshalBinary(data); err != nil {
return nil, err
}
return pc, nil
}

// ToBinary marshals counter state
func (c *Counter) ToBinary() ([]byte, error) {
return c.Bits.MarshalBinary()
}

// Reset the counter
func (c *Counter) Reset() {
c.Bits.ClearAll()
}

// Mark marks key as present in the set
func (c *Counter) Mark(key string) {
hash := crc32.ChecksumIEEE([]byte(key))
c.Bits.Set(uint(hash) % c.Bits.Len())
}

// Count returns an estimate of distinct elements in the set
func (c *Counter) Count() uint {
size := float64(c.Bits.Len())
zeros := size - float64(c.Bits.Count())
return uint(-1 * size * math.Log(zeros/size))
}
77 changes: 77 additions & 0 deletions pkg/counter/counter_test.go
@@ -0,0 +1,77 @@
package counter_test

import (
"fmt"
"math"
"math/rand"
"testing"

"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/pomerium/pomerium/pkg/counter"
)

func stableRandomUUIDs(n int) []string {
r := rand.New(rand.NewSource(1234567890))
out := make([]string, 0, n)
for i := 0; i < n; i++ {
u, _ := uuid.NewRandomFromReader(r)
out = append(out, u.String())
}
return out
}

func TestStableRandomUUIDs(t *testing.T) {
t.Parallel()

assert.Equal(t, stableRandomUUIDs(20), stableRandomUUIDs(20))
}

func TestCounter(t *testing.T) {
t.Parallel()

limit := 1000
n := (limit * 8) / 10
for j := 0; j < 20; j++ {
t.Run(fmt.Sprint(j), func(t *testing.T) {
c := counter.New(uint(limit))
for _, id := range stableRandomUUIDs(n) {
c.Mark(id)
}
est := c.Count()
assert.LessOrEqual(t, math.Abs(float64(n)-float64(est)), math.Ceil(float64(n)*0.01))
})
}
}

func TestSerialize(t *testing.T) {
t.Parallel()

c := counter.New(counter.DefaultCap)
for _, id := range stableRandomUUIDs(20) {
c.Mark(id)
}
assert.EqualValues(t, 20, c.Count())

data, err := c.ToBinary()
require.NoError(t, err)

c2, err := counter.FromBinary(data)
require.NoError(t, err)

assert.EqualValues(t, 20, c2.Count())
}

func TestReset(t *testing.T) {
t.Parallel()

c := counter.New(counter.DefaultCap)
for _, id := range stableRandomUUIDs(20) {
c.Mark(id)
}
assert.EqualValues(t, 20, c.Count())
c.Reset()
assert.EqualValues(t, 0, c.Count())
}

0 comments on commit 2071140

Please sign in to comment.