Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8266054: VectorAPI rotate operation optimization #3720

Closed
wants to merge 19 commits into from
Closed
Changes from 2 commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
@@ -655,9 +655,9 @@ ByteVector lanewiseTemplate(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (byte)((a << (n & (Byte.SIZE-1))) | (a >>> (Byte.SIZE - (n & (Byte.SIZE-1))))));
v0.bOp(v1, (i, a, n) ->(byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (byte)((a >>> (n & (Byte.SIZE-1))) | (a << (Byte.SIZE - (n & (Byte.SIZE-1))))));
v0.bOp(v1, (i, a, n) ->(byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1)))));
default: return null;
}}));
}
@@ -817,9 +817,9 @@ ByteVector lanewise(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v, n) ->
v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> (byte)((a << (n & (Byte.SIZE-1))) | (a >>> (Byte.SIZE - (n & (Byte.SIZE-1))))));
v.uOp((i, a) ->(byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> (byte)((a >>> (n & (Byte.SIZE-1))) | (a << (Byte.SIZE - (n & (Byte.SIZE-1))))));
v.uOp((i, a) ->(byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1)))));
default: return null;
}}));
}
@@ -655,9 +655,9 @@ IntVector lanewiseTemplate(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (int)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (int)((a << (n & (Integer.SIZE-1))) | (a >>> (Integer.SIZE - (n & (Integer.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Integer.rotateLeft(a, n));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (int)((a >>> (n & (Integer.SIZE-1))) | (a << (Integer.SIZE - (n & (Integer.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Integer.rotateRight(a, n));
default: return null;
}}));
}
@@ -817,9 +817,9 @@ IntVector lanewise(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v, n) ->
v.uOp((i, a) -> (int)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> (int)((a << (n & (Integer.SIZE-1))) | (a >>> (Integer.SIZE - (n & (Integer.SIZE-1))))));
v.uOp((i, a) -> Integer.rotateLeft(a, n));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> (int)((a >>> (n & (Integer.SIZE-1))) | (a << (Integer.SIZE - (n & (Integer.SIZE-1))))));
v.uOp((i, a) -> Integer.rotateRight(a, n));
default: return null;
}}));
}
@@ -613,9 +613,9 @@ LongVector lanewiseTemplate(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (long)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (long)((a << (n & (Long.SIZE-1))) | (a >>> (Long.SIZE - (n & (Long.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Long.rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (long)((a >>> (n & (Long.SIZE-1))) | (a << (Long.SIZE - (n & (Long.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Long.rotateRight(a, (int)n));
default: return null;
}}));
}
@@ -735,9 +735,9 @@ LongVector lanewise(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v, n) ->
v.uOp((i, a) -> (long)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> (long)((a << (n & (Long.SIZE-1))) | (a >>> (Long.SIZE - (n & (Long.SIZE-1))))));
v.uOp((i, a) -> Long.rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> (long)((a >>> (n & (Long.SIZE-1))) | (a << (Long.SIZE - (n & (Long.SIZE-1))))));
v.uOp((i, a) -> Long.rotateRight(a, (int)n));
default: return null;
}}));
}
@@ -655,9 +655,9 @@ ShortVector lanewiseTemplate(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (short)((a << (n & (Short.SIZE-1))) | (a >>> (Short.SIZE - (n & (Short.SIZE-1))))));
v0.bOp(v1, (i, a, n) ->(short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> (short)((a >>> (n & (Short.SIZE-1))) | (a << (Short.SIZE - (n & (Short.SIZE-1))))));
v0.bOp(v1, (i, a, n) ->(short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1)))));
default: return null;
}}));
}
@@ -817,9 +817,9 @@ ShortVector lanewise(VectorOperators.Binary op,
case VECTOR_OP_URSHIFT: return (v, n) ->
v.uOp((i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> (short)((a << (n & (Short.SIZE-1))) | (a >>> (Short.SIZE - (n & (Short.SIZE-1))))));
v.uOp((i, a) ->(short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> (short)((a >>> (n & (Short.SIZE-1))) | (a << (Short.SIZE - (n & (Short.SIZE-1))))));
v.uOp((i, a) ->(short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1)))));
default: return null;
}}));
}
@@ -725,10 +725,24 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
v0.bOp(v1, (i, a, n) -> ($type$)(a >> n));
case VECTOR_OP_URSHIFT: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
#if[long]

This comment has been minimized.

Loading
@PaulSandoz

PaulSandoz Apr 30, 2021
Member Outdated

I recommend you create new methods in IntVector etc called rotateLeft and rotateRight that do what is in the lambda expressions. Then you can collapse this to non-conditional cases calling those methods.

Do the same for the tests (like i did with the unsigned support), see

and

gen_compare_op "UNSIGNED_LT" "ult" "BITWISE"

That will avoid the embedding of complex expressions.

case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> Long.rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> Long.rotateRight(a, (int)n));
#else[long]
#if[int]
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> ($type$)((a << (n & ($Boxtype$.SIZE-1))) | (a >>> ($Boxtype$.SIZE - (n & ($Boxtype$.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Integer.rotateLeft(a, n));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) -> ($type$)((a >>> (n & ($Boxtype$.SIZE-1))) | (a << ($Boxtype$.SIZE - (n & ($Boxtype$.SIZE-1))))));
v0.bOp(v1, (i, a, n) -> Integer.rotateRight(a, n));
#else[int]
case VECTOR_OP_LROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) ->($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v0, v1) ->
v0.bOp(v1, (i, a, n) ->($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))));
#end[int]
#end[long]
#end[BITWISE]
default: return null;
}}));
@@ -897,10 +911,24 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
v.uOp((i, a) -> ($type$)(a >> n));
case VECTOR_OP_URSHIFT: return (v, n) ->
v.uOp((i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
#if[long]
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> Long.rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> Long.rotateRight(a, (int)n));
#else[long]
#if[int]
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) -> ($type$)((a << (n & ($Boxtype$.SIZE-1))) | (a >>> ($Boxtype$.SIZE - (n & ($Boxtype$.SIZE-1))))));
v.uOp((i, a) -> Integer.rotateLeft(a, n));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) -> ($type$)((a >>> (n & ($Boxtype$.SIZE-1))) | (a << ($Boxtype$.SIZE - (n & ($Boxtype$.SIZE-1))))));
v.uOp((i, a) -> Integer.rotateRight(a, n));
#else[int]
case VECTOR_OP_LROTATE: return (v, n) ->
v.uOp((i, a) ->($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))));
case VECTOR_OP_RROTATE: return (v, n) ->
v.uOp((i, a) ->($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))));
#end[int]
#end[long]
default: return null;
}}));
}
@@ -2427,6 +2427,94 @@ static void ASHRByte128VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte



static byte ROL_unary(byte a, byte b) {
return (byte)((byte)(((((byte)a) & 0xFF) << (b & 7)) | ((((byte)a) & 0xFF) >>> ((8 - (b & 7)) & 7))));
}

@Test(dataProvider = "byteBinaryOpProvider")
static void ROLByte128VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, Byte128VectorTests::ROL_unary);
}



@Test(dataProvider = "byteBinaryOpMaskProvider")
static void ROLByte128VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb,
IntFunction<boolean[]> fm) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());
boolean[] mask = fm.apply(SPECIES.length());
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, mask, Byte128VectorTests::ROL_unary);
}






static byte ROR_unary(byte a, byte b) {
return (byte)((byte)(((((byte)a) & 0xFF) >>> (b & 7)) | ((((byte)a) & 0xFF) << ((8 - (b & 7)) & 7))));
}

@Test(dataProvider = "byteBinaryOpProvider")
static void RORByte128VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, Byte128VectorTests::ROR_unary);
}



@Test(dataProvider = "byteBinaryOpMaskProvider")
static void RORByte128VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb,
IntFunction<boolean[]> fm) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());
boolean[] mask = fm.apply(SPECIES.length());
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, mask, Byte128VectorTests::ROR_unary);
}



static byte MIN(byte a, byte b) {
return (byte)(Math.min(a, b));
@@ -2427,6 +2427,94 @@ static void ASHRByte256VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte



static byte ROL_unary(byte a, byte b) {
return (byte)((byte)(((((byte)a) & 0xFF) << (b & 7)) | ((((byte)a) & 0xFF) >>> ((8 - (b & 7)) & 7))));
}

@Test(dataProvider = "byteBinaryOpProvider")
static void ROLByte256VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, Byte256VectorTests::ROL_unary);
}



@Test(dataProvider = "byteBinaryOpMaskProvider")
static void ROLByte256VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb,
IntFunction<boolean[]> fm) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());
boolean[] mask = fm.apply(SPECIES.length());
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, mask, Byte256VectorTests::ROL_unary);
}






static byte ROR_unary(byte a, byte b) {
return (byte)((byte)(((((byte)a) & 0xFF) >>> (b & 7)) | ((((byte)a) & 0xFF) << ((8 - (b & 7)) & 7))));
}

@Test(dataProvider = "byteBinaryOpProvider")
static void RORByte256VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, Byte256VectorTests::ROR_unary);
}



@Test(dataProvider = "byteBinaryOpMaskProvider")
static void RORByte256VectorTestsShift(IntFunction<byte[]> fa, IntFunction<byte[]> fb,
IntFunction<boolean[]> fm) {
byte[] a = fa.apply(SPECIES.length());
byte[] b = fb.apply(SPECIES.length());
byte[] r = fr.apply(SPECIES.length());
boolean[] mask = fm.apply(SPECIES.length());
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);

for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
}
}

assertShiftArraysEquals(r, a, b, mask, Byte256VectorTests::ROR_unary);
}



static byte MIN(byte a, byte b) {
return (byte)(Math.min(a, b));
Loading