From 7e849b7b1a26989c959fbf5ae955c66d29135f7a Mon Sep 17 00:00:00 2001
From: Roman Atachiants <roman.atachiants@gmail.com>
Date: Sat, 11 Sep 2021 00:01:02 +0400
Subject: [PATCH] Fixed benchmark example

---
 examples/bench/README.md |  89 +++++++++++++++++++++-------------
 examples/bench/bench.go  | 100 ++++++++++++++++++++++++---------------
 go.mod                   |   1 -
 go.sum                   |   2 -
 4 files changed, 117 insertions(+), 75 deletions(-)

diff --git a/examples/bench/README.md b/examples/bench/README.md
index ad8961f..4ba06e0 100644
--- a/examples/bench/README.md
+++ b/examples/bench/README.md
@@ -6,39 +6,60 @@ This is an example benchmark with various workloads (90% read / 10% write, etc)
 
 Note that the goal of this benchmark is to validate concurrency, not throughput this represents the current "best" case scenario when the updates are random and do less likely to incur contention. Reads, however quite often would hit the same chunks as only the index itself is randomized.
 
+## Results
+
+Below are some results from running on my 8-core machine (Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz).
+
 ```
-   WORK         PROCS              READS             WRITES
-90%-10%       1 procs       51,642 txn/s        5,884 txn/s
-90%-10%       8 procs      195,201 txn/s       21,803 txn/s
-90%-10%      16 procs      311,078 txn/s       34,519 txn/s
-90%-10%      32 procs      370,100 txn/s       41,225 txn/s
-90%-10%      64 procs      374,964 txn/s       41,582 txn/s
-90%-10%     128 procs      347,933 txn/s       38,589 txn/s
-90%-10%     256 procs      337,840 txn/s       37,329 txn/s
-90%-10%     512 procs      342,272 txn/s       37,692 txn/s
-90%-10%    1024 procs      339,367 txn/s       37,049 txn/s
-90%-10%    2048 procs      327,060 txn/s       35,568 txn/s
-90%-10%    4096 procs      314,160 txn/s       32,818 txn/s
-50%-50%       1 procs       28,944 txn/s       29,054 txn/s
-50%-50%       8 procs       59,487 txn/s       59,342 txn/s
-50%-50%      16 procs       70,271 txn/s       70,276 txn/s
-50%-50%      32 procs       70,067 txn/s       69,796 txn/s
-50%-50%      64 procs       61,443 txn/s       61,559 txn/s
-50%-50%     128 procs       54,985 txn/s       54,760 txn/s
-50%-50%     256 procs       53,684 txn/s       53,465 txn/s
-50%-50%     512 procs       62,488 txn/s       61,967 txn/s
-50%-50%    1024 procs       69,211 txn/s       68,090 txn/s
-50%-50%    2048 procs       74,262 txn/s       73,639 txn/s
-50%-50%    4096 procs       77,700 txn/s       75,452 txn/s
-10%-90%       1 procs        4,811 txn/s       43,825 txn/s
-10%-90%       8 procs        8,585 txn/s       77,136 txn/s
-10%-90%      16 procs        8,582 txn/s       77,260 txn/s
-10%-90%      32 procs        8,866 txn/s       79,127 txn/s
-10%-90%      64 procs        8,090 txn/s       73,265 txn/s
-10%-90%     128 procs        7,412 txn/s       67,985 txn/s
-10%-90%     256 procs        6,473 txn/s       58,903 txn/s
-10%-90%     512 procs        6,916 txn/s       61,835 txn/s
-10%-90%    1024 procs        7,989 txn/s       71,794 txn/s
-10%-90%    2048 procs        8,930 txn/s       78,657 txn/s
-10%-90%    4096 procs        9,231 txn/s       81,465 txn/s
+   WORK  PROCS          READ RATE          WRITE RATE
+100%-0%      1    8,149,482 txn/s             0 txn/s
+100%-0%      2   12,622,747 txn/s             0 txn/s
+100%-0%      4   14,378,647 txn/s             0 txn/s
+100%-0%      8   16,298,860 txn/s             0 txn/s
+100%-0%     16   16,276,835 txn/s             0 txn/s
+100%-0%     32   16,297,247 txn/s             0 txn/s
+100%-0%     64   16,214,731 txn/s             0 txn/s
+100%-0%    128   16,185,721 txn/s             0 txn/s
+100%-0%    256   16,171,638 txn/s             0 txn/s
+100%-0%    512   16,237,574 txn/s             0 txn/s
+90%-10%      1    2,248,513 txn/s       239,309 txn/s
+90%-10%      2    2,297,998 txn/s       226,016 txn/s
+90%-10%      4    1,432,691 txn/s       184,189 txn/s
+90%-10%      8    1,112,076 txn/s       153,934 txn/s
+90%-10%     16    1,432,723 txn/s       147,244 txn/s
+90%-10%     32    1,375,383 txn/s       161,755 txn/s
+90%-10%     64    1,441,755 txn/s       144,570 txn/s
+90%-10%    128    1,272,174 txn/s       140,107 txn/s
+90%-10%    256      925,191 txn/s       105,999 txn/s
+90%-10%    512      858,555 txn/s        89,202 txn/s
+50%-50%      1      305,245 txn/s       320,159 txn/s
+50%-50%      2      262,496 txn/s       250,654 txn/s
+50%-50%      4      255,906 txn/s       262,823 txn/s
+50%-50%      8      238,096 txn/s       225,565 txn/s
+50%-50%     16      236,144 txn/s       240,810 txn/s
+50%-50%     32      250,954 txn/s       237,928 txn/s
+50%-50%     64      214,474 txn/s       220,495 txn/s
+50%-50%    128      156,660 txn/s       162,219 txn/s
+50%-50%    256      125,956 txn/s       120,344 txn/s
+50%-50%    512      103,619 txn/s        98,510 txn/s
+10%-90%      1       40,723 txn/s       339,694 txn/s
+10%-90%      2       24,746 txn/s       298,934 txn/s
+10%-90%      4       35,483 txn/s       290,769 txn/s
+10%-90%      8       34,265 txn/s       279,838 txn/s
+10%-90%     16       28,678 txn/s       274,759 txn/s
+10%-90%     32       23,662 txn/s       227,651 txn/s
+10%-90%     64       36,056 txn/s       208,993 txn/s
+10%-90%    128       17,463 txn/s       149,558 txn/s
+10%-90%    256       14,125 txn/s       113,701 txn/s
+10%-90%    512       11,435 txn/s        96,999 txn/s
+0%-100%      1            0 txn/s       345,335 txn/s
+0%-100%      2            0 txn/s       297,386 txn/s
+0%-100%      4            0 txn/s       300,023 txn/s
+0%-100%      8            0 txn/s       276,361 txn/s
+0%-100%     16            0 txn/s       243,448 txn/s
+0%-100%     32            0 txn/s       208,523 txn/s
+0%-100%     64            0 txn/s       195,732 txn/s
+0%-100%    128            0 txn/s       145,990 txn/s
+0%-100%    256            0 txn/s       110,786 txn/s
+0%-100%    512            0 txn/s        94,313 txn/s
 ```
\ No newline at end of file
diff --git a/examples/bench/bench.go b/examples/bench/bench.go
index f6399ee..4b1e2f5 100644
--- a/examples/bench/bench.go
+++ b/examples/bench/bench.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"hash/crc32"
 	"os"
 	"sync"
 	"sync/atomic"
@@ -15,7 +16,6 @@ import (
 	"github.com/dustin/go-humanize"
 	"github.com/kelindar/async"
 	"github.com/kelindar/column"
-	"github.com/kelindar/rand"
 )
 
 var (
@@ -28,63 +28,68 @@ func main() {
 	players := column.NewCollection(column.Options{
 		Capacity: amount,
 	})
-
-	// insert the data first
 	createCollection(players, amount)
 
-	// Iterate over various workloads
-	fmt.Printf("   WORK         PROCS              READS             WRITES\n")
-	for _, w := range []int{10, 50, 90} {
+	// This runs point query benchmarks
+	runBenchmark("Point Reads/Writes", func(writeTxn bool) (reads int, writes int) {
+
+		// To avoid task granuarity problem, load up a bit more work on each
+		// of the goroutines, a few hundred reads should be enough to amortize
+		// the cost of scheduling goroutines, so we can actually test our code.
+		for i := 0; i < 1000; i++ {
+			offset := randN(amount - 1)
+			if writeTxn {
+				players.UpdateAt(offset, "balance", func(v column.Cursor) error {
+					v.SetFloat64(0)
+					return nil
+				})
+				writes++
+			} else {
+				players.SelectAt(offset, func(v column.Selector) {
+					_ = v.FloatAt("balance") // Read
+				})
+				reads++
+			}
+		}
+		return
+	})
+}
 
-		// Iterate over various concurrency levels
-		for _, n := range []int{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096} {
+// runBenchmark runs a benchmark
+func runBenchmark(name string, fn func(bool) (int, int)) {
+	fmt.Printf("Benchmarking %v ...\n", name)
+	fmt.Printf("%7v\t%6v\t%17v\t%13v\n", "WORK", "PROCS", "READ RATE", "WRITE RATE")
+	for _, workload := range []int{0, 10, 50, 90, 100} {
 
-			// Create a pool of N goroutines
+		// Iterate over various concurrency levels
+		for _, n := range []int{1, 2, 4, 8, 16, 32, 64, 128, 256, 512} {
 			work := make(chan async.Task, n)
 			pool := async.Consume(context.Background(), n, work)
 
-			//run(fmt.Sprintf("(%v/%v)-%v", 100-w, w, n), func(b *testing.B) {
-			var reads int64
-			var writes int64
-
+			var reads, writes int64
 			var wg sync.WaitGroup
-
 			start := time.Now()
-			for time.Since(start) < 2*time.Second {
+			for time.Since(start) < time.Second {
 				wg.Add(1)
 				work <- async.NewTask(func(ctx context.Context) (interface{}, error) {
 					defer wg.Done()
-					offset := uint32(rand.Uint32n(uint32(amount - 1)))
-
-					// Given our write probabiliy, randomly read/write at an offset
-					if rand.Uint32n(100) < uint32(w) {
-						players.UpdateAt(offset, "balance", func(v column.Cursor) error {
-							v.SetFloat64(0)
-							return nil
-						})
-						atomic.AddInt64(&writes, 1)
-					} else {
-						players.SelectAt(offset, func(v column.Selector) {
-							_ = v.FloatAt("balance") // Read
-						})
-						atomic.AddInt64(&reads, 1)
-					}
+
+					r, w := fn(chanceOf(workload))
+					atomic.AddInt64(&reads, int64(r))
+					atomic.AddInt64(&writes, int64(w))
 					return nil, nil
 				})
 			}
 
-			elapsed := time.Since(start)
-			readsPerSec := int64(float64(reads) / elapsed.Seconds())
-			writesPerSec := int64(float64(writes) / elapsed.Seconds())
-
 			wg.Wait()
 			pool.Cancel()
-			fmt.Printf("%v%%-%v%%    %4v procs    %15v    %15v\n", 100-w, w, n,
-				humanize.Comma(readsPerSec)+" txn/s",
-				humanize.Comma(writesPerSec)+" txn/s",
+
+			elapsed := time.Since(start)
+			fmt.Printf("%v%%-%v%%\t%6v\t%17v\t%13v\n", 100-workload, workload, n,
+				humanize.Comma(int64(float64(reads)/elapsed.Seconds()))+" txn/s",
+				humanize.Comma(int64(float64(writes)/elapsed.Seconds()))+" txn/s",
 			)
 		}
-
 	}
 }
 
@@ -139,3 +144,22 @@ func createCollection(out *column.Collection, amount int) *column.Collection {
 
 	return out
 }
+
+var epoch uint32
+
+// This random number generator not the most amazing one, but much better
+// than using math.Rand for our benchmarks, since it would create a lock
+// contention and bias the results.
+func randN(n int) uint32 {
+	v := atomic.AddUint32(&epoch, 1)
+	return crc32.ChecksumIEEE([]byte{
+		byte(v >> 24),
+		byte(v >> 16),
+		byte(v >> 8),
+		byte(v),
+	}) % uint32(n)
+}
+
+func chanceOf(chance int) bool {
+	return randN(100) < uint32(chance)
+}
diff --git a/go.mod b/go.mod
index 0798dc4..32176f8 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,6 @@ require (
 	github.com/dustin/go-humanize v1.0.0
 	github.com/kelindar/async v1.0.0
 	github.com/kelindar/bitmap v1.1.1
-	github.com/kelindar/rand v1.0.2
 	github.com/kelindar/smutex v1.0.0
 	github.com/stretchr/testify v1.7.0
 )
diff --git a/go.sum b/go.sum
index 7149ef2..73e7acf 100644
--- a/go.sum
+++ b/go.sum
@@ -7,8 +7,6 @@ github.com/kelindar/async v1.0.0 h1:oJiFAt3fVB/b5zVZKPBU+pP9lR3JVyeox9pYlpdnIK8=
 github.com/kelindar/async v1.0.0/go.mod h1:bJRlwaRiqdHi+4dpVDNHdwgyRyk6TxpA21fByLf7hIY=
 github.com/kelindar/bitmap v1.1.1 h1:qgoVt+3r7RpvCQDXGOovDS/GrFVkFxSO5mbAMbEELKk=
 github.com/kelindar/bitmap v1.1.1/go.mod h1:shAFyS8BOif+pvJ05GqxnCM0SdohHQjKvDetqI/9z6M=
-github.com/kelindar/rand v1.0.2 h1:PKVCNdVENEb6/h8ZXWa56NDJX8r7zwXoYPgzGbT+7yA=
-github.com/kelindar/rand v1.0.2/go.mod h1:kEcA6wZSY1uBzo9j2BCH811NzngM0yRsCkF5GzY/cg8=
 github.com/kelindar/smutex v1.0.0 h1:+LIZYwPz+v3IWPOse764fNaVQGMVxKV6mbD6OWjQV3o=
 github.com/kelindar/smutex v1.0.0/go.mod h1:nMbCZeAHWCsY9Kt4JqX7ETd+NJeR6Swy9im+Th+qUZQ=
 github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=