Skip to content

Commit

Permalink
[ARM][AArch64] Introduce qrdmlah and qrdmlsh intrinsics
Browse files Browse the repository at this point in the history
Since it's introduction, the qrdmlah has been represented as a qrdmulh
and a sadd_sat. This doesn't produce the same result for all input
values though. This patch fixes that by introducing a qrdmlah (and
qrdmlsh) intrinsic specifically for the vqrdmlah and sqrdmlah
instructions. The old test cases will now produce a qrdmulh and sqadd,
as expected.

Fixes #53120 and #50905 and #51761.

Differential Revision: https://reviews.llvm.org/D117592
  • Loading branch information
davemgreen committed Jan 27, 2022
1 parent b75bdff commit 82973ed
Show file tree
Hide file tree
Showing 11 changed files with 842 additions and 450 deletions.
22 changes: 10 additions & 12 deletions clang/include/clang/Basic/arm_neon.td
Expand Up @@ -80,10 +80,8 @@ def OP_QDMULH_N : Op<(call "vqdmulh", $p0, (dup $p1))>;
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_QRDMULH_N : Op<(call "vqrdmulh", $p0, (dup $p1))>;
def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_QRDMLAH_LN : Op<(call "vqrdmlah", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
def OP_QRDMLSH_LN : Op<(call "vqrdmlsh", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
def OP_FMS_LN : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
Expand Down Expand Up @@ -185,10 +183,10 @@ def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">;
def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;

def OP_SCALAR_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1,
(call "vget_lane", $p2, $p3)))>;
def OP_SCALAR_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1,
(call "vget_lane", $p2, $p3)))>;
def OP_SCALAR_QRDMLAH_LN : Op<(call "vqrdmlah", $p0, $p1,
(call "vget_lane", $p2, $p3))>;
def OP_SCALAR_QRDMLSH_LN : Op<(call "vqrdmlsh", $p0, $p1,
(call "vget_lane", $p2, $p3))>;

def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
(call "vget_lane",
Expand Down Expand Up @@ -326,8 +324,8 @@ def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">;
def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
def VQRDMLAH : SOpInst<"vqrdmlah", "....", "siQsQi", OP_QRDMLAH>;
def VQRDMLSH : SOpInst<"vqrdmlsh", "....", "siQsQi", OP_QRDMLSH>;
def VQRDMLAH : SInst<"vqrdmlah", "....", "siQsQi">;
def VQRDMLSH : SInst<"vqrdmlsh", "....", "siQsQi">;
}

def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">;
Expand Down Expand Up @@ -1400,11 +1398,11 @@ def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">;
let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "1111", "SsSi", OP_QRDMLAH>;
def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">;

////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "1111", "SsSi", OP_QRDMLSH>;
def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -5874,6 +5874,10 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType),
NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType),
NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
Expand Down Expand Up @@ -6099,6 +6103,10 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType),
NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType),
NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
Expand Down Expand Up @@ -6301,6 +6309,10 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
Expand Down

0 comments on commit 82973ed

Please sign in to comment.