Skip to content

Commit

Permalink
Arm64/Sve: Add SignExtend* and ZeroExtend* math APIs (dotnet#101702)
Browse files Browse the repository at this point in the history
* Add [Sign|Zero]Extend[8|16|32] APIs:

* Add API to instruction mapping

* eliminate extra movprfx for AllBitsSetMask

* Add test cases
  • Loading branch information
kunalspathak authored and matouskozak committed Apr 30, 2024
1 parent 7795c02 commit 0d25772
Show file tree
Hide file tree
Showing 7 changed files with 386 additions and 10 deletions.
3 changes: 1 addition & 2 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,14 +448,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
if (intrin.op3->isContained())
{
assert(intrin.op3->IsVectorZero());
if (intrin.op1->isContained())
if (intrin.op1->isContained() || intrin.op1->IsMaskAllBitsSet())
{
// We already skip importing ConditionalSelect if op1 == trueAll, however
// if we still see it here, it is because we wrapped the predicated instruction
// inside ConditionalSelect.
// As such, no need to move the `falseReg` to `targetReg`
// because the predicated instruction will eventually set it.
assert(intrin.op1->IsMaskAllBitsSet());
}
else
{
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,15 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)

HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, true, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, true, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,86 @@ public new abstract class Arm64 : AdvSimd.Arm64
/// </summary>
public static unsafe Vector<double> Multiply(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// SignExtend16 : Sign-extend the low 16 bits

/// <summary>
/// svint32_t svexth[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
/// SXTH Ztied.S, Pg/M, Zop.S
/// MOVPRFX Zresult, Zinactive; SXTH Zresult.S, Pg/M, Zop.S
/// svint32_t svexth[_s32]_x(svbool_t pg, svint32_t op)
/// SXTH Ztied.S, Pg/M, Ztied.S
/// MOVPRFX Zresult, Zop; SXTH Zresult.S, Pg/M, Zop.S
/// svint32_t svexth[_s32]_z(svbool_t pg, svint32_t op)
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; SXTH Zresult.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<int> SignExtend16(Vector<int> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svexth[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
/// SXTH Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; SXTH Zresult.D, Pg/M, Zop.D
/// svint64_t svexth[_s64]_x(svbool_t pg, svint64_t op)
/// SXTH Ztied.D, Pg/M, Ztied.D
/// MOVPRFX Zresult, Zop; SXTH Zresult.D, Pg/M, Zop.D
/// svint64_t svexth[_s64]_z(svbool_t pg, svint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTH Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<long> SignExtend16(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// SignExtend32 : Sign-extend the low 32 bits

/// <summary>
/// svint64_t svextw[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
/// SXTW Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; SXTW Zresult.D, Pg/M, Zop.D
/// svint64_t svextw[_s64]_x(svbool_t pg, svint64_t op)
/// SXTW Ztied.D, Pg/M, Ztied.D
/// MOVPRFX Zresult, Zop; SXTW Zresult.D, Pg/M, Zop.D
/// svint64_t svextw[_s64]_z(svbool_t pg, svint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTW Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<long> SignExtend32(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// SignExtend8 : Sign-extend the low 8 bits

/// <summary>
/// svint16_t svextb[_s16]_m(svint16_t inactive, svbool_t pg, svint16_t op)
/// SXTB Ztied.H, Pg/M, Zop.H
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.H, Pg/M, Zop.H
/// svint16_t svextb[_s16]_x(svbool_t pg, svint16_t op)
/// SXTB Ztied.H, Pg/M, Ztied.H
/// MOVPRFX Zresult, Zop; SXTB Zresult.H, Pg/M, Zop.H
/// svint16_t svextb[_s16]_z(svbool_t pg, svint16_t op)
/// MOVPRFX Zresult.H, Pg/Z, Zop.H; SXTB Zresult.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<short> SignExtend8(Vector<short> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svextb[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
/// SXTB Ztied.S, Pg/M, Zop.S
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.S, Pg/M, Zop.S
/// svint32_t svextb[_s32]_x(svbool_t pg, svint32_t op)
/// SXTB Ztied.S, Pg/M, Ztied.S
/// MOVPRFX Zresult, Zop; SXTB Zresult.S, Pg/M, Zop.S
/// svint32_t svextb[_s32]_z(svbool_t pg, svint32_t op)
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; SXTB Zresult.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<int> SignExtend8(Vector<int> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svextb[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
/// SXTB Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.D, Pg/M, Zop.D
/// svint64_t svextb[_s64]_x(svbool_t pg, svint64_t op)
/// SXTB Ztied.D, Pg/M, Ztied.D
/// MOVPRFX Zresult, Zop; SXTB Zresult.D, Pg/M, Zop.D
/// svint64_t svextb[_s64]_z(svbool_t pg, svint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTB Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }

/// Subtract : Subtract

/// <summary>
Expand Down Expand Up @@ -1248,6 +1328,84 @@ public new abstract class Arm64 : AdvSimd.Arm64
/// </summary>
public static unsafe Vector<ulong> UnzipOdd(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

/// ZeroExtend16 : Zero-extend the low 16 bits

/// <summary>
/// svuint32_t svexth[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
/// UXTH Ztied.S, Pg/M, Zop.S
/// MOVPRFX Zresult, Zinactive; UXTH Zresult.S, Pg/M, Zop.S
/// svuint32_t svexth[_u32]_x(svbool_t pg, svuint32_t op)
/// UXTH Ztied.S, Pg/M, Ztied.S
/// AND Ztied.S, Ztied.S, #65535
/// svuint32_t svexth[_u32]_z(svbool_t pg, svuint32_t op)
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; UXTH Zresult.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<uint> ZeroExtend16(Vector<uint> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svexth[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
/// UXTH Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; UXTH Zresult.D, Pg/M, Zop.D
/// svuint64_t svexth[_u64]_x(svbool_t pg, svuint64_t op)
/// UXTH Ztied.D, Pg/M, Ztied.D
/// AND Ztied.D, Ztied.D, #65535
/// svuint64_t svexth[_u64]_z(svbool_t pg, svuint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTH Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<ulong> ZeroExtend16(Vector<ulong> value) { throw new PlatformNotSupportedException(); }


/// ZeroExtend32 : Zero-extend the low 32 bits

/// <summary>
/// svuint64_t svextw[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
/// UXTW Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; UXTW Zresult.D, Pg/M, Zop.D
/// svuint64_t svextw[_u64]_x(svbool_t pg, svuint64_t op)
/// UXTW Ztied.D, Pg/M, Ztied.D
/// AND Ztied.D, Ztied.D, #4294967295
/// svuint64_t svextw[_u64]_z(svbool_t pg, svuint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTW Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<ulong> ZeroExtend32(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// ZeroExtend8 : Zero-extend the low 8 bits

/// <summary>
/// svuint16_t svextb[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
/// UXTB Ztied.H, Pg/M, Zop.H
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.H, Pg/M, Zop.H
/// svuint16_t svextb[_u16]_x(svbool_t pg, svuint16_t op)
/// UXTB Ztied.H, Pg/M, Ztied.H
/// AND Ztied.H, Ztied.H, #255
/// svuint16_t svextb[_u16]_z(svbool_t pg, svuint16_t op)
/// MOVPRFX Zresult.H, Pg/Z, Zop.H; UXTB Zresult.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<ushort> ZeroExtend8(Vector<ushort> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svextb[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
/// UXTB Ztied.S, Pg/M, Zop.S
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.S, Pg/M, Zop.S
/// svuint32_t svextb[_u32]_x(svbool_t pg, svuint32_t op)
/// UXTB Ztied.S, Pg/M, Ztied.S
/// AND Ztied.S, Ztied.S, #255
/// svuint32_t svextb[_u32]_z(svbool_t pg, svuint32_t op)
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; UXTB Zresult.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<uint> ZeroExtend8(Vector<uint> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svextb[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
/// UXTB Ztied.D, Pg/M, Zop.D
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.D, Pg/M, Zop.D
/// svuint64_t svextb[_u64]_x(svbool_t pg, svuint64_t op)
/// UXTB Ztied.D, Pg/M, Ztied.D
/// AND Ztied.D, Ztied.D, #255
/// svuint64_t svextb[_u64]_z(svbool_t pg, svuint64_t op)
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTB Zresult.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// ZipHigh : Interleave elements from high halves of two inputs

Expand Down
Loading

0 comments on commit 0d25772

Please sign in to comment.