From 7e849b7b1a26989c959fbf5ae955c66d29135f7a Mon Sep 17 00:00:00 2001 From: Roman Atachiants Date: Sat, 11 Sep 2021 00:01:02 +0400 Subject: [PATCH] Fixed benchmark example --- examples/bench/README.md | 89 +++++++++++++++++++++------------- examples/bench/bench.go | 100 ++++++++++++++++++++++++--------------- go.mod | 1 - go.sum | 2 - 4 files changed, 117 insertions(+), 75 deletions(-) diff --git a/examples/bench/README.md b/examples/bench/README.md index ad8961f..4ba06e0 100644 --- a/examples/bench/README.md +++ b/examples/bench/README.md @@ -6,39 +6,60 @@ This is an example benchmark with various workloads (90% read / 10% write, etc) Note that the goal of this benchmark is to validate concurrency, not throughput this represents the current "best" case scenario when the updates are random and do less likely to incur contention. Reads, however quite often would hit the same chunks as only the index itself is randomized. +## Results + +Below are some results from running on my 8-core machine (Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz). + ``` - WORK PROCS READS WRITES -90%-10% 1 procs 51,642 txn/s 5,884 txn/s -90%-10% 8 procs 195,201 txn/s 21,803 txn/s -90%-10% 16 procs 311,078 txn/s 34,519 txn/s -90%-10% 32 procs 370,100 txn/s 41,225 txn/s -90%-10% 64 procs 374,964 txn/s 41,582 txn/s -90%-10% 128 procs 347,933 txn/s 38,589 txn/s -90%-10% 256 procs 337,840 txn/s 37,329 txn/s -90%-10% 512 procs 342,272 txn/s 37,692 txn/s -90%-10% 1024 procs 339,367 txn/s 37,049 txn/s -90%-10% 2048 procs 327,060 txn/s 35,568 txn/s -90%-10% 4096 procs 314,160 txn/s 32,818 txn/s -50%-50% 1 procs 28,944 txn/s 29,054 txn/s -50%-50% 8 procs 59,487 txn/s 59,342 txn/s -50%-50% 16 procs 70,271 txn/s 70,276 txn/s -50%-50% 32 procs 70,067 txn/s 69,796 txn/s -50%-50% 64 procs 61,443 txn/s 61,559 txn/s -50%-50% 128 procs 54,985 txn/s 54,760 txn/s -50%-50% 256 procs 53,684 txn/s 53,465 txn/s -50%-50% 512 procs 62,488 txn/s 61,967 txn/s -50%-50% 1024 procs 69,211 txn/s 68,090 txn/s -50%-50% 2048 procs 74,262 txn/s 73,639 txn/s -50%-50% 4096 procs 77,700 txn/s 75,452 txn/s -10%-90% 1 procs 4,811 txn/s 43,825 txn/s -10%-90% 8 procs 8,585 txn/s 77,136 txn/s -10%-90% 16 procs 8,582 txn/s 77,260 txn/s -10%-90% 32 procs 8,866 txn/s 79,127 txn/s -10%-90% 64 procs 8,090 txn/s 73,265 txn/s -10%-90% 128 procs 7,412 txn/s 67,985 txn/s -10%-90% 256 procs 6,473 txn/s 58,903 txn/s -10%-90% 512 procs 6,916 txn/s 61,835 txn/s -10%-90% 1024 procs 7,989 txn/s 71,794 txn/s -10%-90% 2048 procs 8,930 txn/s 78,657 txn/s -10%-90% 4096 procs 9,231 txn/s 81,465 txn/s + WORK PROCS READ RATE WRITE RATE +100%-0% 1 8,149,482 txn/s 0 txn/s +100%-0% 2 12,622,747 txn/s 0 txn/s +100%-0% 4 14,378,647 txn/s 0 txn/s +100%-0% 8 16,298,860 txn/s 0 txn/s +100%-0% 16 16,276,835 txn/s 0 txn/s +100%-0% 32 16,297,247 txn/s 0 txn/s +100%-0% 64 16,214,731 txn/s 0 txn/s +100%-0% 128 16,185,721 txn/s 0 txn/s +100%-0% 256 16,171,638 txn/s 0 txn/s +100%-0% 512 16,237,574 txn/s 0 txn/s +90%-10% 1 2,248,513 txn/s 239,309 txn/s +90%-10% 2 2,297,998 txn/s 226,016 txn/s +90%-10% 4 1,432,691 txn/s 184,189 txn/s +90%-10% 8 1,112,076 txn/s 153,934 txn/s +90%-10% 16 1,432,723 txn/s 147,244 txn/s +90%-10% 32 1,375,383 txn/s 161,755 txn/s +90%-10% 64 1,441,755 txn/s 144,570 txn/s +90%-10% 128 1,272,174 txn/s 140,107 txn/s +90%-10% 256 925,191 txn/s 105,999 txn/s +90%-10% 512 858,555 txn/s 89,202 txn/s +50%-50% 1 305,245 txn/s 320,159 txn/s +50%-50% 2 262,496 txn/s 250,654 txn/s +50%-50% 4 255,906 txn/s 262,823 txn/s +50%-50% 8 238,096 txn/s 225,565 txn/s +50%-50% 16 236,144 txn/s 240,810 txn/s +50%-50% 32 250,954 txn/s 237,928 txn/s +50%-50% 64 214,474 txn/s 220,495 txn/s +50%-50% 128 156,660 txn/s 162,219 txn/s +50%-50% 256 125,956 txn/s 120,344 txn/s +50%-50% 512 103,619 txn/s 98,510 txn/s +10%-90% 1 40,723 txn/s 339,694 txn/s +10%-90% 2 24,746 txn/s 298,934 txn/s +10%-90% 4 35,483 txn/s 290,769 txn/s +10%-90% 8 34,265 txn/s 279,838 txn/s +10%-90% 16 28,678 txn/s 274,759 txn/s +10%-90% 32 23,662 txn/s 227,651 txn/s +10%-90% 64 36,056 txn/s 208,993 txn/s +10%-90% 128 17,463 txn/s 149,558 txn/s +10%-90% 256 14,125 txn/s 113,701 txn/s +10%-90% 512 11,435 txn/s 96,999 txn/s +0%-100% 1 0 txn/s 345,335 txn/s +0%-100% 2 0 txn/s 297,386 txn/s +0%-100% 4 0 txn/s 300,023 txn/s +0%-100% 8 0 txn/s 276,361 txn/s +0%-100% 16 0 txn/s 243,448 txn/s +0%-100% 32 0 txn/s 208,523 txn/s +0%-100% 64 0 txn/s 195,732 txn/s +0%-100% 128 0 txn/s 145,990 txn/s +0%-100% 256 0 txn/s 110,786 txn/s +0%-100% 512 0 txn/s 94,313 txn/s ``` \ No newline at end of file diff --git a/examples/bench/bench.go b/examples/bench/bench.go index f6399ee..4b1e2f5 100644 --- a/examples/bench/bench.go +++ b/examples/bench/bench.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "fmt" + "hash/crc32" "os" "sync" "sync/atomic" @@ -15,7 +16,6 @@ import ( "github.com/dustin/go-humanize" "github.com/kelindar/async" "github.com/kelindar/column" - "github.com/kelindar/rand" ) var ( @@ -28,63 +28,68 @@ func main() { players := column.NewCollection(column.Options{ Capacity: amount, }) - - // insert the data first createCollection(players, amount) - // Iterate over various workloads - fmt.Printf(" WORK PROCS READS WRITES\n") - for _, w := range []int{10, 50, 90} { + // This runs point query benchmarks + runBenchmark("Point Reads/Writes", func(writeTxn bool) (reads int, writes int) { + + // To avoid task granuarity problem, load up a bit more work on each + // of the goroutines, a few hundred reads should be enough to amortize + // the cost of scheduling goroutines, so we can actually test our code. + for i := 0; i < 1000; i++ { + offset := randN(amount - 1) + if writeTxn { + players.UpdateAt(offset, "balance", func(v column.Cursor) error { + v.SetFloat64(0) + return nil + }) + writes++ + } else { + players.SelectAt(offset, func(v column.Selector) { + _ = v.FloatAt("balance") // Read + }) + reads++ + } + } + return + }) +} - // Iterate over various concurrency levels - for _, n := range []int{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096} { +// runBenchmark runs a benchmark +func runBenchmark(name string, fn func(bool) (int, int)) { + fmt.Printf("Benchmarking %v ...\n", name) + fmt.Printf("%7v\t%6v\t%17v\t%13v\n", "WORK", "PROCS", "READ RATE", "WRITE RATE") + for _, workload := range []int{0, 10, 50, 90, 100} { - // Create a pool of N goroutines + // Iterate over various concurrency levels + for _, n := range []int{1, 2, 4, 8, 16, 32, 64, 128, 256, 512} { work := make(chan async.Task, n) pool := async.Consume(context.Background(), n, work) - //run(fmt.Sprintf("(%v/%v)-%v", 100-w, w, n), func(b *testing.B) { - var reads int64 - var writes int64 - + var reads, writes int64 var wg sync.WaitGroup - start := time.Now() - for time.Since(start) < 2*time.Second { + for time.Since(start) < time.Second { wg.Add(1) work <- async.NewTask(func(ctx context.Context) (interface{}, error) { defer wg.Done() - offset := uint32(rand.Uint32n(uint32(amount - 1))) - - // Given our write probabiliy, randomly read/write at an offset - if rand.Uint32n(100) < uint32(w) { - players.UpdateAt(offset, "balance", func(v column.Cursor) error { - v.SetFloat64(0) - return nil - }) - atomic.AddInt64(&writes, 1) - } else { - players.SelectAt(offset, func(v column.Selector) { - _ = v.FloatAt("balance") // Read - }) - atomic.AddInt64(&reads, 1) - } + + r, w := fn(chanceOf(workload)) + atomic.AddInt64(&reads, int64(r)) + atomic.AddInt64(&writes, int64(w)) return nil, nil }) } - elapsed := time.Since(start) - readsPerSec := int64(float64(reads) / elapsed.Seconds()) - writesPerSec := int64(float64(writes) / elapsed.Seconds()) - wg.Wait() pool.Cancel() - fmt.Printf("%v%%-%v%% %4v procs %15v %15v\n", 100-w, w, n, - humanize.Comma(readsPerSec)+" txn/s", - humanize.Comma(writesPerSec)+" txn/s", + + elapsed := time.Since(start) + fmt.Printf("%v%%-%v%%\t%6v\t%17v\t%13v\n", 100-workload, workload, n, + humanize.Comma(int64(float64(reads)/elapsed.Seconds()))+" txn/s", + humanize.Comma(int64(float64(writes)/elapsed.Seconds()))+" txn/s", ) } - } } @@ -139,3 +144,22 @@ func createCollection(out *column.Collection, amount int) *column.Collection { return out } + +var epoch uint32 + +// This random number generator not the most amazing one, but much better +// than using math.Rand for our benchmarks, since it would create a lock +// contention and bias the results. +func randN(n int) uint32 { + v := atomic.AddUint32(&epoch, 1) + return crc32.ChecksumIEEE([]byte{ + byte(v >> 24), + byte(v >> 16), + byte(v >> 8), + byte(v), + }) % uint32(n) +} + +func chanceOf(chance int) bool { + return randN(100) < uint32(chance) +} diff --git a/go.mod b/go.mod index 0798dc4..32176f8 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ require ( github.com/dustin/go-humanize v1.0.0 github.com/kelindar/async v1.0.0 github.com/kelindar/bitmap v1.1.1 - github.com/kelindar/rand v1.0.2 github.com/kelindar/smutex v1.0.0 github.com/stretchr/testify v1.7.0 ) diff --git a/go.sum b/go.sum index 7149ef2..73e7acf 100644 --- a/go.sum +++ b/go.sum @@ -7,8 +7,6 @@ github.com/kelindar/async v1.0.0 h1:oJiFAt3fVB/b5zVZKPBU+pP9lR3JVyeox9pYlpdnIK8= github.com/kelindar/async v1.0.0/go.mod h1:bJRlwaRiqdHi+4dpVDNHdwgyRyk6TxpA21fByLf7hIY= github.com/kelindar/bitmap v1.1.1 h1:qgoVt+3r7RpvCQDXGOovDS/GrFVkFxSO5mbAMbEELKk= github.com/kelindar/bitmap v1.1.1/go.mod h1:shAFyS8BOif+pvJ05GqxnCM0SdohHQjKvDetqI/9z6M= -github.com/kelindar/rand v1.0.2 h1:PKVCNdVENEb6/h8ZXWa56NDJX8r7zwXoYPgzGbT+7yA= -github.com/kelindar/rand v1.0.2/go.mod h1:kEcA6wZSY1uBzo9j2BCH811NzngM0yRsCkF5GzY/cg8= github.com/kelindar/smutex v1.0.0 h1:+LIZYwPz+v3IWPOse764fNaVQGMVxKV6mbD6OWjQV3o= github.com/kelindar/smutex v1.0.0/go.mod h1:nMbCZeAHWCsY9Kt4JqX7ETd+NJeR6Swy9im+Th+qUZQ= github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=