diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 9c72b90522a047..3ae7cb8ea67c4b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -486,6 +486,9 @@ class CombinerHelper { bool applyLoadOrCombine(MachineInstr &MI, std::function &MatchInfo); + bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); + bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8db4e2c5b78707..bbd1ca35af3c38 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1929,6 +1929,14 @@ class TargetInstrInfo : public MCInstrInfo { virtual Optional describeLoadedValue(const MachineInstr &MI, Register Reg) const; + /// Given the generic extension instruction \p ExtMI, returns true if this + /// extension is a likely candidate for being folded into an another + /// instruction. + virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, + MachineRegisterInfo &MRI) const { + return false; + } + /// Return MIR formatter to format/parse MIR operands. Target can override /// this virtual function and return target specific MIR formatter. virtual const MIRFormatter *getMIRFormatter() const { diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index e2c7a90a1b16a5..07b331d713570e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -553,6 +553,13 @@ def load_or_combine : GICombineRule< [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), (apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>; +def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; +def extend_through_phis : GICombineRule< + (defs root:$root, extend_through_phis_matchdata:$matchinfo), + (match (wip_match_opcode G_PHI):$root, + [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>; + // Currently only the one combine above. def insert_vec_elt_combines : GICombineGroup< [combine_insert_vec_elts_build_vector]>; @@ -579,6 +586,8 @@ def known_bits_simplifications : GICombineGroup<[ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>; +def phi_combines : GICombineGroup<[extend_through_phis]>; + def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, @@ -586,7 +595,7 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, - undef_combines, identity_combines, simplify_add_to_sub, + undef_combines, identity_combines, phi_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 29b617d027a361..2a2c505fdfedd2 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -13,6 +14,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -3560,6 +3562,108 @@ bool CombinerHelper::matchLoadOrCombine( return true; } +bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, + MachineInstr *&ExtMI) { + assert(MI.getOpcode() == TargetOpcode::G_PHI); + + Register DstReg = MI.getOperand(0).getReg(); + + // TODO: Extending a vector may be expensive, don't do this until heuristics + // are better. + if (MRI.getType(DstReg).isVector()) + return false; + + // Try to match a phi, whose only use is an extend. + if (!MRI.hasOneNonDBGUse(DstReg)) + return false; + ExtMI = &*MRI.use_instr_nodbg_begin(DstReg); + switch (ExtMI->getOpcode()) { + case TargetOpcode::G_ANYEXT: + return true; // G_ANYEXT is usually free. + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + break; + default: + return false; + } + + // If the target is likely to fold this extend away, don't propagate. + if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI)) + return false; + + // We don't want to propagate the extends unless there's a good chance that + // they'll be optimized in some way. + // Collect the unique incoming values. + SmallPtrSet InSrcs; + for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { + auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI); + switch (DefMI->getOpcode()) { + case TargetOpcode::G_LOAD: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_CONSTANT: + InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI)); + // Don't try to propagate if there are too many places to create new + // extends, chances are it'll increase code size. + if (InSrcs.size() > 2) + return false; + break; + default: + return false; + } + } + return true; +} + +bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, + MachineInstr *&ExtMI) { + assert(MI.getOpcode() == TargetOpcode::G_PHI); + Register DstReg = ExtMI->getOperand(0).getReg(); + LLT ExtTy = MRI.getType(DstReg); + + // Propagate the extension into the block of each incoming reg's block. + // Use a SetVector here because PHIs can have duplicate edges, and we want + // deterministic iteration order. + SmallSetVector SrcMIs; + SmallDenseMap OldToNewSrcMap; + for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) { + auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg()); + if (!SrcMIs.insert(SrcMI)) + continue; + + // Build an extend after each src inst. + auto *MBB = SrcMI->getParent(); + MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator(); + if (InsertPt != MBB->end() && InsertPt->isPHI()) + InsertPt = MBB->getFirstNonPHI(); + + Builder.setInsertPt(*SrcMI->getParent(), InsertPt); + Builder.setDebugLoc(MI.getDebugLoc()); + auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, + SrcMI->getOperand(0).getReg()); + OldToNewSrcMap[SrcMI] = NewExt; + } + + // Create a new phi with the extended inputs. + Builder.setInstrAndDebugLoc(MI); + auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); + NewPhi.addDef(DstReg); + for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { + auto &MO = MI.getOperand(SrcIdx); + if (!MO.isReg()) { + NewPhi.addMBB(MO.getMBB()); + continue; + } + auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())]; + NewPhi.addUse(NewSrc->getOperand(0).getReg()); + } + Builder.insertInstr(NewPhi); + ExtMI->eraseFromParent(); + return true; +} + bool CombinerHelper::applyLoadOrCombine( MachineInstr &MI, std::function &MatchInfo) { Builder.setInstrAndDebugLoc(MI); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0e60795caf47c5..6fd0dc58a47002 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7171,6 +7171,26 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, return TargetInstrInfo::describeLoadedValue(MI, Reg); } +bool AArch64InstrInfo::isExtendLikelyToBeFolded( + MachineInstr &ExtMI, MachineRegisterInfo &MRI) const { + assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT || + ExtMI.getOpcode() == TargetOpcode::G_ZEXT || + ExtMI.getOpcode() == TargetOpcode::G_ANYEXT); + + // Anyexts are nops. + if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT) + return true; + + Register DefReg = ExtMI.getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(DefReg)) + return false; + + // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an + // addressing mode. + auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg); + return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD; +} + uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { return get(Opc).TSFlags & AArch64::ElementSizeMask; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 3406bc175132f3..ed38dda208c852 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -301,6 +301,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override; + bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, + MachineRegisterInfo &MRI) const override; + static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir new file mode 100644 index 00000000000000..1653d9157732f0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir @@ -0,0 +1,448 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-darwin -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we propagate the G_SEXT to the sources of the phi operand. +--- +name: sext_icst_through_phi +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sext_icst_through_phi + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32) + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32) + ; CHECK: bb.3: + ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2 + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %cst32_4:_(s32) = G_CONSTANT i32 4 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_SEXT %phi + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +... + +# Check that we propagate the G_ZEXT to the sources of the phi operand. +--- +name: zext_icst_through_phi +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: zext_icst_through_phi + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %cst32_4(s32) + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %cst32_10(s32) + ; CHECK: bb.3: + ; CHECK: %ext:_(s64) = G_PHI [[ZEXT]](s64), %bb.1, [[ZEXT1]](s64), %bb.2 + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %cst32_4:_(s32) = G_CONSTANT i32 4 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_ZEXT %phi + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +... + +# Don't handle vectors because of potential cost issues. +--- +name: sext_load_through_phi_vector +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sext_load_through_phi_vector + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0, $q0, $q1 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %cmp:_(s1) = G_IMPLICIT_DEF + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + ; CHECK: bb.3: + ; CHECK: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2 + ; CHECK: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>) + ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16) + ; CHECK: RET_ReallyLR + bb.1.entry: + liveins: $x0, $q0, $q1 + + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %ptr:_(p0) = COPY $x0 + %cmp:_(s1) = G_IMPLICIT_DEF + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + G_BR %bb.4 + + bb.3: + %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + + bb.4: + %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.2, %ld2(<4 x s32>), %bb.3 + %ext:_(<4 x s64>) = G_SEXT %phi + G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16) + RET_ReallyLR + +... + + +# Check that we don't propagate if the extend is used by a G_PTR_ADD, which on +# AArch64 has a good chance of folding in the extend. +--- +name: sext_icst_through_phi_used_by_ptradd +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: %base:_(p0) = COPY $x2 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: bb.3: + ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2 + ; CHECK: %ext:_(s64) = G_SEXT %phi(s32) + ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) + ; CHECK: $x0 = COPY %ptr(p0) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1, $x2 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %base:_(p0) = COPY $x2 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %cst32_4:_(s32) = G_CONSTANT i32 4 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_SEXT %phi + %ptr:_(p0) = G_PTR_ADD %base, %ext + $x0 = COPY %ptr(p0) + RET_ReallyLR implicit $x0 + +... + +# Same as above but we do it here because the extend has multiple users, so the +# it probably won't cost extra instructions if we remove it. +--- +name: sext_icst_through_phi_used_by_ptradd_multiuse +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd_multiuse + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: %base:_(p0) = COPY $x2 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32) + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32) + ; CHECK: bb.3: + ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2 + ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) + ; CHECK: $x0 = COPY %ptr(p0) + ; CHECK: $x1 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1, $x2 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %base:_(p0) = COPY $x2 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %cst32_4:_(s32) = G_CONSTANT i32 4 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_SEXT %phi + %ptr:_(p0) = G_PTR_ADD %base, %ext + $x0 = COPY %ptr(p0) + $x1 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +... + +# Check we don't propagate if there are more than 2 unique incoming values in the phi. +# Doing so might cause too much code bloat. +--- +name: zext_icst_through_phi_too_many_incoming +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: zext_icst_through_phi_too_many_incoming + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF + ; CHECK: G_BRCOND %cond(s1), %bb.3 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.2: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: %cst32_42:_(s32) = G_CONSTANT i32 42 + ; CHECK: bb.4: + ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3 + ; CHECK: %ext:_(s64) = G_ZEXT %phi(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %cst32_4:_(s32) = G_CONSTANT i32 4 + %cond:_(s1) = G_IMPLICIT_DEF + G_BRCOND %cond, %bb.5 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + G_BR %bb.4 + + bb.5: + %cst32_42:_(s32) = G_CONSTANT i32 42 + + bb.4: + %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3, %cst32_42(s32), %bb.5 + %ext:_(s64) = G_ZEXT %phi + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +... + +# Check that we don't propagate if the extension would be of a non-allowed inst. +--- +name: sext_add_through_phi +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sext_add_through_phi + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: bb.3: + ; CHECK: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2 + ; CHECK: %ext:_(s64) = G_SEXT %phi(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %add:_(s32) = G_ADD %0, %1 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_SEXT %phi + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +... + +# Same as above but allowed with a G_ANYEXT. +--- +name: anyext_add_through_phi +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: anyext_add_through_phi + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK: G_BRCOND %cmp(s1), %bb.2 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32) + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %cst32_10(s32) + ; CHECK: bb.3: + ; CHECK: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[ANYEXT1]](s64), %bb.2 + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 2 + %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %add:_(s32) = G_ADD %0, %1 + G_BR %bb.4 + + bb.3: + %cst32_10:_(s32) = G_CONSTANT i32 10 + + bb.4: + %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3 + %ext:_(s64) = G_ANYEXT %phi + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + +...