cmd/compile: add signed divisibility by power of 2 rules

For powers of two (c=1<<k), the divisibility check x%c == 0 can be made just by checking the trailing zeroes via a mask x&(c-1) == 0 even for signed integers. This avoids division fix-ups when just divisibility check is needed. To apply this rule, we match on the fixed-up version of the division. This is neccessary because the mod and division rewrite rules are already applied during the initial opt pass. The speed up on amd64 due to elimination of unneccessary fix-up code is ~55%: name old time/op new time/op delta DivconstI64-4 2.08ns ± 0% 2.09ns ± 1% ~ (p=0.730 n=5+5) DivisiblePow2constI64-4 1.78ns ± 1% 0.81ns ± 1% -54.66% (p=0.008 n=5+5) DivconstU64-4 2.08ns ± 0% 2.08ns ± 0% ~ (p=0.683 n=5+5) DivconstI32-4 1.53ns ± 0% 1.53ns ± 1% ~ (p=0.968 n=4+5) DivisiblePow2constI32-4 1.79ns ± 1% 0.81ns ± 1% -54.97% (p=0.008 n=5+5) DivconstU32-4 1.78ns ± 1% 1.80ns ± 2% ~ (p=0.206 n=5+5) DivconstI16-4 1.54ns ± 2% 1.54ns ± 0% ~ (p=0.238 n=5+4) DivisiblePow2constI16-4 1.78ns ± 0% 0.81ns ± 1% -54.72% (p=0.000 n=4+5) DivconstU16-4 1.00ns ± 5% 1.01ns ± 1% ~ (p=0.119 n=5+5) DivconstI8-4 1.54ns ± 0% 1.54ns ± 2% ~ (p=0.571 n=4+5) DivisiblePow2constI8-4 1.78ns ± 0% 0.82ns ± 8% -53.71% (p=0.008 n=5+5) DivconstU8-4 0.93ns ± 1% 0.93ns ± 1% ~ (p=0.643 n=5+5) A follow-up CL will address the general case of x%c == 0 for signed integers. Updates golang#15806 Change-Id: Iabadbbe369b6e0998c8ce85d038ebc236142e42a Reviewed-on: https://go-review.googlesource.com/c/go/+/173557 Run-TryBot: Brian Kessler <brian.m.kessler@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
saschagrunert · Apr 25, 2019 · 44343c7 · 44343c7
1 parent 2693b42
commit 44343c7
Show file tree

Hide file tree

Showing 5 changed files with 1,910 additions and 9 deletions.
diff --git a/src/cmd/compile/internal/gc/testdata/arith_test.go b/src/cmd/compile/internal/gc/testdata/arith_test.go
@@ -7,6 +7,7 @@
 package main
 
 import (
+	"math"
 	"runtime"
 	"testing"
 )
@@ -924,6 +925,7 @@ func TestArithmetic(t *testing.T) {
 	testShiftRemoval(t)
 	testShiftedOps(t)
 	testDivFixUp(t)
+	testDivisibleSignedPow2(t)
 }
 
 // testDivFixUp ensures that signed division fix-ups are being generated.
@@ -952,3 +954,292 @@ func testDivFixUp(t *testing.T) {
 		g64 = z % int64(i)
 	}
 }
+
+//go:noinline
+func divisible_int8_2to1(x int8) bool {
+	return x%(1<<1) == 0
+}
+
+//go:noinline
+func divisible_int8_2to2(x int8) bool {
+	return x%(1<<2) == 0
+}
+
+//go:noinline
+func divisible_int8_2to3(x int8) bool {
+	return x%(1<<3) == 0
+}
+
+//go:noinline
+func divisible_int8_2to4(x int8) bool {
+	return x%(1<<4) == 0
+}
+
+//go:noinline
+func divisible_int8_2to5(x int8) bool {
+	return x%(1<<5) == 0
+}
+
+//go:noinline
+func divisible_int8_2to6(x int8) bool {
+	return x%(1<<6) == 0
+}
+
+//go:noinline
+func divisible_int16_2to1(x int16) bool {
+	return x%(1<<1) == 0
+}
+
+//go:noinline
+func divisible_int16_2to2(x int16) bool {
+	return x%(1<<2) == 0
+}
+
+//go:noinline
+func divisible_int16_2to3(x int16) bool {
+	return x%(1<<3) == 0
+}
+
+//go:noinline
+func divisible_int16_2to4(x int16) bool {
+	return x%(1<<4) == 0
+}
+
+//go:noinline
+func divisible_int16_2to5(x int16) bool {
+	return x%(1<<5) == 0
+}
+
+//go:noinline
+func divisible_int16_2to6(x int16) bool {
+	return x%(1<<6) == 0
+}
+
+//go:noinline
+func divisible_int16_2to7(x int16) bool {
+	return x%(1<<7) == 0
+}
+
+//go:noinline
+func divisible_int16_2to8(x int16) bool {
+	return x%(1<<8) == 0
+}
+
+//go:noinline
+func divisible_int16_2to9(x int16) bool {
+	return x%(1<<9) == 0
+}
+
+//go:noinline
+func divisible_int16_2to10(x int16) bool {
+	return x%(1<<10) == 0
+}
+
+//go:noinline
+func divisible_int16_2to11(x int16) bool {
+	return x%(1<<11) == 0
+}
+
+//go:noinline
+func divisible_int16_2to12(x int16) bool {
+	return x%(1<<12) == 0
+}
+
+//go:noinline
+func divisible_int16_2to13(x int16) bool {
+	return x%(1<<13) == 0
+}
+
+//go:noinline
+func divisible_int16_2to14(x int16) bool {
+	return x%(1<<14) == 0
+}
+
+//go:noinline
+func divisible_int32_2to4(x int32) bool {
+	return x%(1<<4) == 0
+}
+
+//go:noinline
+func divisible_int32_2to15(x int32) bool {
+	return x%(1<<15) == 0
+}
+
+//go:noinline
+func divisible_int32_2to26(x int32) bool {
+	return x%(1<<26) == 0
+}
+
+//go:noinline
+func divisible_int64_2to4(x int64) bool {
+	return x%(1<<4) == 0
+}
+
+//go:noinline
+func divisible_int64_2to15(x int64) bool {
+	return x%(1<<15) == 0
+}
+
+//go:noinline
+func divisible_int64_2to26(x int64) bool {
+	return x%(1<<26) == 0
+}
+
+//go:noinline
+func divisible_int64_2to34(x int64) bool {
+	return x%(1<<34) == 0
+}
+
+//go:noinline
+func divisible_int64_2to48(x int64) bool {
+	return x%(1<<48) == 0
+}
+
+//go:noinline
+func divisible_int64_2to57(x int64) bool {
+	return x%(1<<57) == 0
+}
+
+// testDivisibleSignedPow2 confirms that x%(1<<k)==0 is rewritten correctly
+func testDivisibleSignedPow2(t *testing.T) {
+	var i int64
+	var pow2 = []int64{
+		1,
+		1 << 1,
+		1 << 2,
+		1 << 3,
+		1 << 4,
+		1 << 5,
+		1 << 6,
+		1 << 7,
+		1 << 8,
+		1 << 9,
+		1 << 10,
+		1 << 11,
+		1 << 12,
+		1 << 13,
+		1 << 14,
+	}
+	// exhaustive test for int8
+	for i = math.MinInt8; i <= math.MaxInt8; i++ {
+		if want, got := int8(i)%int8(pow2[1]) == 0, divisible_int8_2to1(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to1(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int8(i)%int8(pow2[2]) == 0, divisible_int8_2to2(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to2(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int8(i)%int8(pow2[3]) == 0, divisible_int8_2to3(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to3(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int8(i)%int8(pow2[4]) == 0, divisible_int8_2to4(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to4(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int8(i)%int8(pow2[5]) == 0, divisible_int8_2to5(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to5(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int8(i)%int8(pow2[6]) == 0, divisible_int8_2to6(int8(i)); got != want {
+			t.Errorf("divisible_int8_2to6(%d) = %v want %v", i, got, want)
+		}
+	}
+	// exhaustive test for int16
+	for i = math.MinInt16; i <= math.MaxInt16; i++ {
+		if want, got := int16(i)%int16(pow2[1]) == 0, divisible_int16_2to1(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to1(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[2]) == 0, divisible_int16_2to2(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to2(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[3]) == 0, divisible_int16_2to3(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to3(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[4]) == 0, divisible_int16_2to4(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to4(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[5]) == 0, divisible_int16_2to5(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to5(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[6]) == 0, divisible_int16_2to6(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to6(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[7]) == 0, divisible_int16_2to7(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to7(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[8]) == 0, divisible_int16_2to8(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to8(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[9]) == 0, divisible_int16_2to9(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to9(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[10]) == 0, divisible_int16_2to10(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to10(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[11]) == 0, divisible_int16_2to11(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to11(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[12]) == 0, divisible_int16_2to12(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to12(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[13]) == 0, divisible_int16_2to13(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to13(%d) = %v want %v", i, got, want)
+		}
+		if want, got := int16(i)%int16(pow2[14]) == 0, divisible_int16_2to14(int16(i)); got != want {
+			t.Errorf("divisible_int16_2to14(%d) = %v want %v", i, got, want)
+		}
+	}
+	// spot check for int32 and int64
+	var (
+		two4  int64 = 1 << 4
+		two15 int64 = 1 << 15
+		two26 int64 = 1 << 26
+		two34 int64 = 1 << 34
+		two48 int64 = 1 << 48
+		two57 int64 = 1 << 57
+	)
+	var xs = []int64{two4, two4 + 3, -3 * two4, -3*two4 + 1,
+		two15, two15 + 3, -3 * two15, -3*two15 + 1,
+		two26, two26 + 37, -5 * two26, -5*two26 + 2,
+		two34, two34 + 356, -7 * two34, -7*two34 + 13,
+		two48, two48 + 3000, -12 * two48, -12*two48 + 1111,
+		two57, two57 + 397654, -15 * two57, -15*two57 + 11234,
+	}
+	for _, x := range xs {
+		if int64(int32(x)) == x {
+			if want, got := int32(x)%int32(two4) == 0, divisible_int32_2to4(int32(x)); got != want {
+				t.Errorf("divisible_int32_2to4(%d) = %v want %v", x, got, want)
+			}
+
+			if want, got := int32(x)%int32(two15) == 0, divisible_int32_2to15(int32(x)); got != want {
+				t.Errorf("divisible_int32_2to15(%d) = %v want %v", x, got, want)
+			}
+
+			if want, got := int32(x)%int32(two26) == 0, divisible_int32_2to26(int32(x)); got != want {
+				t.Errorf("divisible_int32_2to26(%d) = %v want %v", x, got, want)
+			}
+		}
+		// spot check for int64
+		if want, got := x%two4 == 0, divisible_int64_2to4(x); got != want {
+			t.Errorf("divisible_int64_2to4(%d) = %v want %v", x, got, want)
+		}
+
+		if want, got := x%two15 == 0, divisible_int64_2to15(x); got != want {
+			t.Errorf("divisible_int64_2to15(%d) = %v want %v", x, got, want)
+		}
+
+		if want, got := x%two26 == 0, divisible_int64_2to26(x); got != want {
+			t.Errorf("divisible_int64_2to26(%d) = %v want %v", x, got, want)
+		}
+
+		if want, got := x%two34 == 0, divisible_int64_2to34(x); got != want {
+			t.Errorf("divisible_int64_2to34(%d) = %v want %v", x, got, want)
+		}
+
+		if want, got := x%two48 == 0, divisible_int64_2to48(x); got != want {
+			t.Errorf("divisible_int64_2to48(%d) = %v want %v", x, got, want)
+		}
+
+		if want, got := x%two57 == 0, divisible_int64_2to57(x); got != want {
+			t.Errorf("divisible_int64_2to57(%d) = %v want %v", x, got, want)
+		}
+
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -1163,6 +1163,41 @@
 (Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK(64,c)
   -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
 
+// Divisibility check for signed integers for power of two constant are simple mask.
+// However, we must match against the rewritten n%c == 0 -> n - c*(n/c) == 0 -> n == c *(n/c)
+// where n/c contains fixup code to handle signed n.
+(Eq8 n (Lsh8x64
+  (Rsh8x64
+    (Add8  <t> n (Rsh8Ux64  <t> (Rsh8x64  <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [kbar])))
+    (Const64 <typ.UInt64> [k]))
+	(Const64 <typ.UInt64> [k]))
+) && k > 0 && k < 7 && kbar == 8 - k
+  -> (Eq8 (And8 <t> n (Const8 <t> [int64(1<<uint(k)-1)])) (Const8 <t> [0]))
+
+(Eq16 n (Lsh16x64
+  (Rsh16x64
+    (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [kbar])))
+    (Const64 <typ.UInt64> [k]))
+	(Const64 <typ.UInt64> [k]))
+) && k > 0 && k < 15 && kbar == 16 - k
+  -> (Eq16 (And16 <t> n (Const16 <t> [int64(1<<uint(k)-1)])) (Const16 <t> [0]))
+
+(Eq32 n (Lsh32x64
+  (Rsh32x64
+    (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [kbar])))
+    (Const64 <typ.UInt64> [k]))
+	(Const64 <typ.UInt64> [k]))
+) && k > 0 && k < 31 && kbar == 32 - k
+  -> (Eq32 (And32 <t> n (Const32 <t> [int64(1<<uint(k)-1)])) (Const32 <t> [0]))
+
+(Eq64 n (Lsh64x64
+  (Rsh64x64
+    (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [kbar])))
+    (Const64 <typ.UInt64> [k]))
+	(Const64 <typ.UInt64> [k]))
+) && k > 0 && k < 63 && kbar == 64 - k
+  -> (Eq64 (And64 <t> n (Const64 <t> [int64(1<<uint(k)-1)])) (Const64 <t> [0]))
+
 (Eq(8|16|32|64)  s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Eq(8|16|32|64)  x y)
 (Neq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Neq(8|16|32|64) x y)