Skip to content

Commit

Permalink
[SelectionDAG][Mips][PowerPC][RISCV][WebAssembly] Teach computeKnownB…
Browse files Browse the repository at this point in the history
…its/ComputeNumSignBits about atomics

Unlike normal loads these don't have an extension field, but we know
from TargetLowering whether these are sign-extending or zero-extending,
and so can optimise away unnecessary extensions.

This was noticed on RISC-V, where sign extensions in the calling
convention would result in unnecessary explicit extension instructions,
but this also fixes some Mips inefficiencies. PowerPC sees churn in the
tests as all the zero extensions are only for promoting 32-bit to
64-bit, but these zero extensions are still not optimised away as they
should be, likely due to i32 being a legal type.

This also simplifies the WebAssembly code somewhat, which currently
works around the lack of target-independent combines with some ugly
patterns that break once they're optimised away.

Re-landed with correct handling in ComputeNumSignBits for Tmp == VTBits,
where zero-extending atomics were incorrectly returning 0 rather than
the (slightly confusing) required return value of 1.

Re-landed again after D102819 fixed PowerPC to correctly zero-extend all
of its atomics as it claimed to do, since the combination of that bug
and this optimisation caused buildbot regressions.

Reviewed By: RKSimon, atanasyan

Differential Revision: https://reviews.llvm.org/D101342
  • Loading branch information
jrtc27 committed May 20, 2021
1 parent 68b88ae commit e10958c
Show file tree
Hide file tree
Showing 9 changed files with 1,050 additions and 1,096 deletions.
64 changes: 63 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -3074,7 +3074,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SMULO:
case ISD::UMULO:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
Expand Down Expand Up @@ -3529,6 +3528,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smin(Known, Known2);
break;
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() == 1) {
// The boolean result conforms to getBooleanContents.
// If we know the result of a setcc has the top bits zero, use this info.
// We know that we have an integer-based boolean since these operations
// are only available for integer.
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
LLVM_FALLTHROUGH;
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD: {
unsigned MemBits =
cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
// If we are looking at the loaded value.
if (Op.getResNo() == 0) {
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
}
break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
Expand Down Expand Up @@ -4109,6 +4144,33 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD: {
Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
// If we are looking at the loaded value.
if (Op.getResNo() == 0) {
if (Tmp == VTBits)
return 1; // early-out
if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
return VTBits - Tmp + 1;
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
return VTBits - Tmp;
}
break;
}
}

// If we are looking at the loaded value of the SDNode.
Expand Down
49 changes: 11 additions & 38 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
Expand Up @@ -259,26 +259,20 @@ defm ATOMIC_LOAD32_U_I64 : AtomicLoad<I64, "i64.atomic.load32_u", 0x16>;
// therefore don't have the extension type field. So instead of matching that,
// we match the patterns that the type legalizer expands them to.

// We directly match zext patterns and select the zext atomic loads.
// i32 (zext (i8 (atomic_load_8))) gets legalized to
// i32 (and (i32 (atomic_load_8)), 255)
// These can be selected to a single zero-extending atomic load instruction.
def zext_aload_8_32 :
PatFrag<(ops node:$addr), (and (i32 (atomic_load_8 node:$addr)), 255)>;
def zext_aload_16_32 :
PatFrag<(ops node:$addr), (and (i32 (atomic_load_16 node:$addr)), 65535)>;
// Unlike regular loads, extension to i64 is handled differently than i32.
// i64 (zext (i8 (atomic_load_8))) gets legalized to
// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255)
// Extension to i32 is elided by SelectionDAG as our atomic loads are
// zero-extending.
def zext_aload_8_64 :
PatFrag<(ops node:$addr),
(and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>;
(i64 (zext (i32 (atomic_load_8 node:$addr))))>;
def zext_aload_16_64 :
PatFrag<(ops node:$addr),
(and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>;
(i64 (zext (i32 (atomic_load_16 node:$addr))))>;
def zext_aload_32_64 :
PatFrag<(ops node:$addr),
(zext (i32 (atomic_load node:$addr)))>;
(i64 (zext (i32 (atomic_load_32 node:$addr))))>;

// We don't have single sext atomic load instructions. So for sext loads, we
// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit
Expand All @@ -290,8 +284,6 @@ def sext_aload_16_64 :
PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;

// Select zero-extending loads with no constant offset.
defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
Expand All @@ -304,10 +296,6 @@ defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s

// Zero-extending loads with constant offset
defm : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">;
Expand All @@ -327,8 +315,6 @@ defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64

// Extending loads with just a constant offset
defm : LoadPatOffsetOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatOffsetOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
Expand All @@ -337,8 +323,6 @@ defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;

defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
Expand Down Expand Up @@ -651,22 +635,13 @@ defm : BinRMWPattern<atomic_swap_32, atomic_swap_64,
// These are combined patterns of truncating store patterns and zero-extending
// load patterns above.
class zext_bin_rmw_8_32<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(and (i32 (kind node:$addr, node:$val)), 255)>;
class zext_bin_rmw_16_32<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(and (i32 (kind node:$addr, node:$val)), 65535)>;
PatFrag<(ops node:$addr, node:$val), (i32 (kind node:$addr, node:$val))>;
class zext_bin_rmw_16_32<PatFrag kind> : zext_bin_rmw_8_32<kind>;
class zext_bin_rmw_8_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(and (i64 (anyext (i32 (kind node:$addr,
(i32 (trunc (i64 node:$val))))))), 255)>;
class zext_bin_rmw_16_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(and (i64 (anyext (i32 (kind node:$addr,
(i32 (trunc (i64 node:$val))))))), 65535)>;
class zext_bin_rmw_32_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(zext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
class zext_bin_rmw_16_64<PatFrag kind> : zext_bin_rmw_8_64<kind>;
class zext_bin_rmw_32_64<PatFrag kind> : zext_bin_rmw_8_64<kind>;

// Truncating & sign-extending binary RMW patterns.
// These are combined patterns of truncating store patterns and sign-extending
Expand Down Expand Up @@ -887,10 +862,8 @@ defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
// additional nodes such as anyext or assertzext depending on operand types.
class zext_ter_rmw_8_32<PatFrag kind> :
PatFrag<(ops node:$addr, node:$exp, node:$new),
(and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>;
class zext_ter_rmw_16_32<PatFrag kind> :
PatFrag<(ops node:$addr, node:$exp, node:$new),
(and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>;
(i32 (kind node:$addr, node:$exp, node:$new))>;
class zext_ter_rmw_16_32<PatFrag kind> : zext_ter_rmw_8_32<kind>;
class zext_ter_rmw_8_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$exp, node:$new),
(zext (i32 (assertzext (i32 (kind node:$addr,
Expand Down

0 comments on commit e10958c

Please sign in to comment.