diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index fdf0b7b4732a15..3c07a85b949654 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -684,6 +684,10 @@ Only G_LOAD is valid if the result is a vector type. If the result is larger than the memory size, the high elements are undefined (i.e. this is not a per-element, vector anyextload) +Unlike in SelectionDAG, atomic loads are expressed with the same +opcodes as regular loads. G_LOAD, G_SEXTLOAD and G_ZEXTLOAD may all +have atomic memory operands. + G_INDEXED_LOAD ^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index b1a07e8738789e..ef4fc85b245de7 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -184,6 +184,8 @@ def : GINodeEquiv { let CheckMMOIsNonAtomic = true; } def : GINodeEquiv { let CheckMMOIsNonAtomic = false; let CheckMMOIsAtomic = true; + let IfSignExtend = G_SEXTLOAD; + let IfZeroExtend = G_ZEXTLOAD; } // Operands are swapped for atomic_store vs. regular store diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 0f796a30d57105..171fdb1b98e006 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1627,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op; defm atomic_store : binary_atomic_op; defm atomic_cmp_swap : ternary_atomic_op; +/// Atomic load which zeroes the excess high bits. +def atomic_load_zext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsZeroExtLoad = true; +} + +/// Atomic load which sign extends the excess high bits. +def atomic_load_sext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsSignExtLoad = true; +} + def atomic_load_8 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i8; } + def atomic_load_16 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i16; } + def atomic_load_32 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { @@ -1652,6 +1668,40 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_zext_8 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_zext_16 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +def atomic_load_sext_8 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_sext_16 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_8 : PatFrags<(ops node:$op), + [(atomic_load_8 node:$op), + (atomic_load_zext_8 node:$op)]>; + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_16 : PatFrags<(ops node:$op), + [(atomic_load_16 node:$op), + (atomic_load_zext_16 node:$op)]>; + def nonext_masked_gather : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2c94f87804ac93..8a0579d25b163e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -698,13 +698,13 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, Register SrcReg = MI.getOperand(1).getReg(); GAnyLoad *LoadMI = getOpcodeDef(SrcReg, MRI); - if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || - !LoadMI->isSimple()) + if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg())) return false; Register LoadReg = LoadMI->getDstReg(); - LLT LoadTy = MRI.getType(LoadReg); + LLT RegTy = MRI.getType(LoadReg); Register PtrReg = LoadMI->getPointerReg(); + unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); unsigned MaskSizeBits = MaskVal.countTrailingOnes(); @@ -715,7 +715,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, // If the mask covers the whole destination register, there's nothing to // extend - if (MaskSizeBits >= LoadTy.getSizeInBits()) + if (MaskSizeBits >= RegSize) return false; // Most targets cannot deal with loads of size < 8 and need to re-legalize to @@ -725,17 +725,25 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, const MachineMemOperand &MMO = LoadMI->getMMO(); LegalityQuery::MemDesc MemDesc(MMO); - MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadMI->isSimple()) + MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) + return false; + if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) + {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}})) return false; MatchInfo = [=](MachineIRBuilder &B) { B.setInstrAndDebugLoc(*LoadMI); auto &MF = B.getMF(); auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy); B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); + LoadMI->eraseFromParent(); }; return true; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index c477a44b13b2a0..6839e73796a642 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; // An atomic load operation that does not need either acquire or release // semantics. -class relaxed_load +class relaxed_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquireOrStronger = 0; } // A atomic load operation that actually needs acquire semantics. -class acquiring_load +class acquiring_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquire = 1; } // An atomic load operation that needs sequential consistency. -class seq_cst_load +class seq_cst_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingSequentiallyConsistent = 1; @@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in { } // 8-bit loads -def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)), (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; -def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend8:$offset)), +def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; -def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)), +def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(relaxed_load +def : Pat<(relaxed_load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; // 16-bit loads -def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)), (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; -def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), +def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; -def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)), +def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(relaxed_load +def : Pat<(relaxed_load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), (LDURHHi GPR64sp:$Rn, simm9:$offset)>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 74ec9373ce9ef8..10f85103e468c5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32) .maxScalarIf(typeInSet(1, {s128}), 0, s64); - getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) + + for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { + auto &Actions = getActionDefinitionsBuilder(Op); + + if (Op == G_SEXTLOAD) + Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)); + + // Atomics have zero extending behavior. + Actions .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, @@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .unsupportedIfMemSizeNotPow2() // Lower anything left over into G_*EXT and G_LOAD .lower(); + } auto IsPtrVecPred = [=](const LegalityQuery &Query) { const LLT &ValTy = Query.Types[0]; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 8d5fde6927ed0f..db5a4c0a684c54 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -161,8 +161,7 @@ # DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: G_ZEXTLOAD (opcode 80): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir index e7fef25465aff7..a284483765ad15 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir @@ -88,12 +88,12 @@ body: | ... --- -name: test_load_s32_atomic +name: test_load_mask_s8_s32_atomic tracksRegLiveness: true body: | bb.0: liveins: $x0 - ; CHECK-LABEL: name: test_load_s32_atomic + ; CHECK-LABEL: name: test_load_mask_s8_s32_atomic ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 @@ -108,6 +108,49 @@ body: | $w0 = COPY %3 ... +# The mask is equal to the memory size. +--- +name: test_load_mask_s16_s16_atomic +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_mask_s16_s16_atomic + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load seq_cst (s16)) + ; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 65535 + %2:_(s32) = G_LOAD %0 :: (load seq_cst (s16)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... + +# The mask is smaller than the memory size which must be preserved, so +# there's little point to folding. +--- +name: test_load_mask_s8_s16_atomic +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_mask_s8_s16_atomic + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load seq_cst (s16)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 255 + %2:_(s32) = G_LOAD %0 :: (load seq_cst (s16)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... + --- name: test_load_mask_size_equals_dst_size tracksRegLiveness: true @@ -272,13 +315,32 @@ body: | ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (volatile load (s8)) + ; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 255 + %2:_(s32) = G_LOAD %0 :: (volatile load (s8)) + %3:_(s32) = G_AND %2, %1 + $w0 = COPY %3 +... + +--- +name: test_volatile_mask_smaller_mem +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_volatile_mask_smaller_mem + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8)) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s16)) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_LOAD %0 :: (volatile load (s8)) + %2:_(s32) = G_LOAD %0 :: (volatile load (s16)) %3:_(s32) = G_AND %2, %1 $w0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir index 4e746ed08e7f84..5b36d7ae5c91c6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir @@ -133,3 +133,163 @@ body: | RET_ReallyLR implicit $w0 ... + +--- +name: zextload_s32_from_s8_atomic_unordered +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_unordered + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load unordered (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDRBBui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_monotonic +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_monotonic + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load monotonic (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDRBBui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_acquire +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_acquire + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDARB]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s8_atomic_seq_cst +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s8_atomic_seq_cst + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load seq_cst (s8)) + ; CHECK-NEXT: $w0 = COPY [[LDARB]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s8)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_unordered +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_unordered + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load unordered (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDRHHui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load unordered (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_monotonic +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_monotonic + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load monotonic (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDRHHui]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load monotonic (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_acquire +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_acquire + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load acquire (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDARH]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load acquire (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... + +--- +name: zextload_s32_from_s16_atomic_seq_cst +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_atomic_seq_cst + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[LDARH:%[0-9]+]]:gpr32 = LDARH [[COPY]] :: (load seq_cst (s16)) + ; CHECK-NEXT: $w0 = COPY [[LDARH]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load seq_cst (s16)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir index 0b55aec8bef93d..73e06de1923dfe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir @@ -141,10 +141,8 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) %k:_(s32) = G_CONSTANT i32 255 @@ -183,10 +181,8 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) %k:_(s32) = G_CONSTANT i32 65535 diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 0f37875a319632..c15728ac7d2344 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -977,12 +977,15 @@ std::string TreePredicateFn::getPredCode() const { if (isAnyExtLoad()) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsAnyExtLoad requires IsLoad"); - if (isSignExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsSignExtLoad requires IsLoad"); - if (isZeroExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsZeroExtLoad requires IsLoad"); + + if (!isAtomic()) { + if (isSignExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsSignExtLoad requires IsLoad or IsAtomic"); + if (isZeroExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsZeroExtLoad requires IsLoad or IsAtomic"); + } } if (isStore()) { @@ -1003,8 +1006,9 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && getAddressSpaces() == nullptr && - !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && - !isAtomicOrderingAcquireRelease() && + // FIXME: Should atomic loads be IsLoad, IsAtomic, or both? + !isZeroExtLoad() && !isSignExtLoad() && !isAtomicOrderingAcquire() && + !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && !isAtomicOrderingAcquireOrStronger() && !isAtomicOrderingReleaseOrStronger() && @@ -1105,6 +1109,10 @@ std::string TreePredicateFn::getPredCode() const { Code += "if (isReleaseOrStronger(cast(N)->getMergedOrdering())) " "return false;\n"; + // TODO: Handle atomic sextload/zextload normally when ATOMIC_LOAD is removed. + if (isAtomic() && (isZeroExtLoad() || isSignExtLoad())) + Code += "return false;\n"; + if (isLoad() || isStore()) { StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode"; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index f92e1973629579..123db21af8d4b7 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -3785,10 +3785,12 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { for (const TreePredicateCall &Call : N->getPredicateCalls()) { const TreePredicateFn &Predicate = Call.Fn; - if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfSignExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isSignExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend")); - if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfZeroExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isZeroExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend")); }