diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e7debc652a0a8..dcc1a4580b14a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -769,9 +769,6 @@ class CombinerHelper { bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); - /// Fold boolean selects to logical operations. - bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); /// Transform G_ADD(x, G_SUB(y, x)) to y. @@ -814,6 +811,9 @@ class CombinerHelper { // Given a binop \p MI, commute operands 1 and 2. void applyCommuteBinOpOperands(MachineInstr &MI); + /// Combine selects. + bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; @@ -904,6 +904,18 @@ class CombinerHelper { /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0 bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, Register FalseVal, BuildFnTy &MatchInfo); + + /// Try to fold selects to logical operations. + bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo); + + bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + + bool isOneOrOneSplat(Register Src, bool AllowUndefs); + bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); + bool isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs); + + std::optional getConstantOrConstantSplatVector(Register Src); }; } // namespace llvm diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 77db371adaf77..6bda80681432a 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -437,13 +437,6 @@ def select_constant_cmp: GICombineRule< (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) >; -def select_to_logical : GICombineRule< - (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (wip_match_opcode G_SELECT):$root, - [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) ->; - // Fold (C op x) -> (x op C) // TODO: handle more isCommutable opcodes // TODO: handle compares (currently not marked as isCommutable) @@ -1242,6 +1235,12 @@ def select_to_minmax: GICombineRule< [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def match_selects : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1282,7 +1281,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, def phi_combines : GICombineGroup<[extend_through_phis]>; def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, - select_to_logical]>; + match_selects]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one, idempotent_prop]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 91a64d59e154d..8b15bdb0aca30 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5940,62 +5940,6 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( return false; } -bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, - BuildFnTy &MatchInfo) { - GSelect &Sel = cast(MI); - Register DstReg = Sel.getReg(0); - Register Cond = Sel.getCondReg(); - Register TrueReg = Sel.getTrueReg(); - Register FalseReg = Sel.getFalseReg(); - - auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); - auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); - - const LLT CondTy = MRI.getType(Cond); - const LLT OpTy = MRI.getType(TrueReg); - if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) - return false; - - // We have a boolean select. - - // select Cond, Cond, F --> or Cond, F - // select Cond, 1, F --> or Cond, F - auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); - if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, Cond, FalseReg); - }; - return true; - } - - // select Cond, T, Cond --> and Cond, T - // select Cond, T, 0 --> and Cond, T - auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); - if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, Cond, TrueReg); - }; - return true; - } - - // select Cond, T, 1 --> or (not Cond), T - if (MaybeCstFalse && MaybeCstFalse->isOne()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); - }; - return true; - } - - // select Cond, 0, F --> and (not Cond), F - if (MaybeCstTrue && MaybeCstTrue->isZero()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); - }; - return true; - } - return false; -} - bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &IdxToPropagate) { bool PropagateNaN; @@ -6318,3 +6262,300 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { MI.getOperand(2).setReg(LHSReg); Observer.changedInstr(MI); } + +bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 1, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 1; + } + return false; // scalable vector +} + +bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 0, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 0; + } + return false; // scalable vector +} + +// Ignores COPYs during conformance checks. +// FIXME scalable vectors. +bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs) { + GBuildVector *BuildVector = getOpcodeDef(Src, MRI); + if (!BuildVector) + return false; + unsigned NumSources = BuildVector->getNumSources(); + + for (unsigned I = 0; I < NumSources; ++I) { + GImplicitDef *ImplicitDef = + getOpcodeDef(BuildVector->getSourceReg(I), MRI); + if (ImplicitDef && AllowUndefs) + continue; + if (ImplicitDef && !AllowUndefs) + return false; + std::optional IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (IConstant && IConstant->Value == SplatValue) + continue; + return false; + } + return true; +} + +// Ignores COPYs during lookups. +// FIXME scalable vectors +std::optional +CombinerHelper::getConstantOrConstantSplatVector(Register Src) { + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + if (IConstant) + return IConstant->Value; + + GBuildVector *BuildVector = getOpcodeDef(Src, MRI); + if (!BuildVector) + return std::nullopt; + unsigned NumSources = BuildVector->getNumSources(); + + std::optional Value = std::nullopt; + for (unsigned I = 0; I < NumSources; ++I) { + std::optional IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (!IConstant) + return std::nullopt; + if (!Value) + Value = IConstant->Value; + else if (*Value != IConstant->Value) + return std::nullopt; + } + return Value; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register Dest = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // We only do this combine for scalar boolean conditions. + if (CondTy != LLT::scalar(1)) + return false; + + // Both are scalars. + std::optional TrueOpt = + getIConstantVRegValWithLookThrough(True, MRI); + std::optional FalseOpt = + getIConstantVRegValWithLookThrough(False, MRI); + + if (!TrueOpt || !FalseOpt) + return false; + + APInt TrueValue = TrueOpt->Value; + APInt FalseValue = FalseOpt->Value; + + // select Cond, 1, 0 --> zext (Cond) + if (TrueValue.isOne() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildZExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, -1, 0 --> sext (Cond) + if (TrueValue.isAllOnes() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildSExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, 0, 1 --> zext (!Cond) + if (TrueValue.isZero() && FalseValue.isOne()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildZExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, 0, -1 --> sext (!Cond) + if (TrueValue.isZero() && FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildSExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (TrueValue - 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (TrueValue + 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (TrueValue.isPowerOf2() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + // The shift amount must be scalar. + LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy; + auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2()); + B.buildShl(Dest, Inner, ShAmtC, Flags); + }; + return true; + } + // select Cond, -1, C --> or (sext Cond), C + if (TrueValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildOr(Dest, Inner, False, Flags); + }; + return true; + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Not = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Not, Cond); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Not); + B.buildOr(Dest, Inner, True, Flags); + }; + return true; + } + + return false; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // Boolean or fixed vector of booleans. + if (CondTy.isScalableVector() || + (CondTy.isFixedVector() && + CondTy.getElementType().getScalarSizeInBits() != 1) || + CondTy.getScalarSizeInBits() != 1) + return false; + + if (CondTy != TrueTy) + return false; + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildOr(DstReg, Ext, False, Flags); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildAnd(DstReg, Ext, True); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (isOneOrOneSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildOr(DstReg, Ext, True, Flags); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildAnd(DstReg, Ext, False); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { + GSelect *Select = cast(&MI); + + if (tryFoldSelectOfConstants(Select, MatchInfo)) + return true; + + if (tryFoldBoolSelectToLogic(Select, MatchInfo)) + return true; + + return false; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index 81d38a5b08047..be2de620fa456 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -298,3 +298,249 @@ body: | %ext:_(s32) = G_ANYEXT %sel $w0 = COPY %ext(s32) ... +--- +# select cond, 1, 0 --> zext(Cond) +name: select_cond_1_0_to_zext_cond +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_1_0_to_zext_cond + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %c(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %zero:_(s1) = G_CONSTANT i1 0 + %one:_(s1) = G_CONSTANT i1 1 + %sel:_(s1) = G_SELECT %c, %one, %zero + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 0, 1 --> zext(!Cond) +name: select_cond_0_1_to_sext_not_cond +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_0_1_to_sext_not_cond + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %zero:_(s1) = G_CONSTANT i1 0 + %one:_(s1) = G_CONSTANT i1 1 + %sel:_(s1) = G_SELECT %c, %zero, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 2, 1 --> and (zext Cond), false +name: select_cond_2_1_to_and_zext_cond_false +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_2_1_to_and_zext_cond_false + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 101 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1) + ; CHECK-NEXT: %sel:_(s8) = G_ADD [[ZEXT]], %one + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 102 + %one:_(s8) = G_CONSTANT i8 101 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 1, 2 --> and (ext Cond), false +name: select_cond_1_2_to_and_sext_cond_false +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_1_2_to_and_sext_cond_false + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 102 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT %c(s1) + ; CHECK-NEXT: %sel:_(s8) = G_ADD [[SEXT]], %one + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 101 + %one:_(s8) = G_CONSTANT i8 102 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 64, 0 --> (zext Cond) << log2(Pow2) +name: select_cond_64_0_to_shift +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_64_0_to_shift + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK-NEXT: %sel:_(s8) = G_SHL [[ZEXT]], [[C]](s8) + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 64 + %one:_(s8) = G_CONSTANT i8 0 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, -1, 0 --> sext Cond +name: select_cond_minus_1_0_to_sext_cond +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_minus_1_0_to_sext_cond + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %ext:_(s32) = G_SEXT %c(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 255 + %one:_(s8) = G_CONSTANT i8 0 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 0, -1 --> sext (!Cond) +name: select_cond_0_minus_1_to_sext_not_cond +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_0_minus_1_to_sext_not_cond + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]] + ; CHECK-NEXT: %ext:_(s32) = G_SEXT [[XOR]](s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 0 + %one:_(s8) = G_CONSTANT i8 255 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, -1, 101 --> or (sext Cond), 101 +name: select_cond_minus_1_101_to_or_sext_cond_101 +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_minus_1_101_to_or_sext_cond_101 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 101 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT %c(s1) + ; CHECK-NEXT: %sel:_(s8) = G_OR [[SEXT]], %one + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 255 + %one:_(s8) = G_CONSTANT i8 101 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select cond, 101, -1 --> or (sext (not Cond), 101 +name: select_cond_101_minus_1_to_or_sext_not_cond_101 +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_cond_101_minus_1_to_or_sext_not_cond_101 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %two:_(s8) = G_CONSTANT i8 101 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT [[XOR]](s1) + ; CHECK-NEXT: %sel:_(s8) = G_OR [[SEXT]], %two + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %two:_(s8) = G_CONSTANT i8 101 + %one:_(s8) = G_CONSTANT i8 255 + %sel:_(s8) = G_SELECT %c, %two, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll index 9a7fa04982990..cbacd17c846d4 100644 --- a/llvm/test/CodeGen/AArch64/andcompare.ll +++ b/llvm/test/CodeGen/AArch64/andcompare.ll @@ -2451,7 +2451,7 @@ define i32 @cmp_to_ands3(i32 %num, i32 %a) { ; ; GISEL-LABEL: cmp_to_ands3: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #23 +; GISEL-NEXT: mov w8, #23 // =0x17 ; GISEL-NEXT: and w8, w0, w8 ; GISEL-NEXT: cmp w8, #7 ; GISEL-NEXT: csel w0, w1, wzr, hi diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 821f6e403a271..446526986b883 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB0_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -42,7 +42,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp { ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; SDISEL-NEXT: LBB1_2: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_different: @@ -55,7 +55,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp { ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; GISEL-NEXT: LBB1_2: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sle i32 %a, 5 @@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; SDISEL-NEXT: LBB2_3: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_flagclobber: @@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; GISEL-NEXT: LBB2_3: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB3_3: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { ; SDISEL-NEXT: ccmp w8, #16, #0, ge ; SDISEL-NEXT: b.le LBB4_2 ; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; SDISEL-NEXT: LBB4_2: ; %if.then ; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: speculate_division: @@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { ; GISEL-NEXT: ccmp w8, #17, #0, gt ; GISEL-NEXT: b.lt LBB4_2 ; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB4_2: ; %if.then ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 @@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { ; SDISEL-NEXT: fccmp s0, s1, #8, ge ; SDISEL-NEXT: b.ge LBB5_2 ; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; SDISEL-NEXT: LBB5_2: ; %if.then ; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_fcmp: @@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { ; GISEL-NEXT: fccmp s0, s1, #8, gt ; GISEL-NEXT: b.ge LBB5_2 ; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB5_2: ; %if.then ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 @@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB7_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: cmp w1, #32 ; CHECK-NEXT: b.eq LBB8_3 ; CHECK-NEXT: ; %bb.2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB8_3: ; %if.then ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB9_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB10_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -466,7 +466,7 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { ; ; GISEL-LABEL: select_and: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #0, ne ; GISEL-NEXT: csel x0, x2, x3, lt @@ -488,7 +488,7 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { ; ; GISEL-LABEL: select_or: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #8, eq ; GISEL-NEXT: csel x0, x2, x3, lt @@ -510,7 +510,7 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) { ; ; GISEL-LABEL: select_or_float: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #8, eq ; GISEL-NEXT: fcsel s0, s0, s1, lt @@ -528,17 +528,22 @@ define i64 @gccbug(i64 %x0, i64 %x1) { ; SDISEL-NEXT: cmp x0, #2 ; SDISEL-NEXT: ccmp x0, #4, #4, ne ; SDISEL-NEXT: ccmp x1, #0, #0, eq -; SDISEL-NEXT: mov w8, #1 +; SDISEL-NEXT: mov w8, #1 ; =0x1 ; SDISEL-NEXT: cinc x0, x8, eq ; SDISEL-NEXT: ret ; ; GISEL-LABEL: gccbug: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #2 +; GISEL-NEXT: cmp x1, #0 +; GISEL-NEXT: cset w8, eq ; GISEL-NEXT: cmp x0, #2 -; GISEL-NEXT: ccmp x0, #4, #4, ne -; GISEL-NEXT: ccmp x1, #0, #0, eq -; GISEL-NEXT: csinc x0, x8, xzr, eq +; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: cmp x0, #4 +; GISEL-NEXT: cset w10, eq +; GISEL-NEXT: orr w9, w10, w9 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: and x8, x8, #0x1 +; GISEL-NEXT: add x0, x8, #1 ; GISEL-NEXT: ret %cmp0 = icmp eq i64 %x1, 0 %cmp1 = icmp eq i64 %x0, 2 @@ -592,7 +597,7 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) { ; SDISEL-LABEL: select_andor32: ; SDISEL: ; %bb.0: ; SDISEL-NEXT: cmp w1, w2 -; SDISEL-NEXT: mov w8, #32 +; SDISEL-NEXT: mov w8, #32 ; =0x20 ; SDISEL-NEXT: ccmp w0, w8, #4, lt ; SDISEL-NEXT: ccmp w0, w1, #0, eq ; SDISEL-NEXT: csel w0, w0, w1, eq @@ -600,7 +605,7 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) { ; ; GISEL-LABEL: select_andor32: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #32 +; GISEL-NEXT: mov w8, #32 ; =0x20 ; GISEL-NEXT: cmp w1, w2 ; GISEL-NEXT: ccmp w0, w8, #4, lt ; GISEL-NEXT: ccmp w0, w1, #0, eq @@ -701,11 +706,11 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) { ; SDISEL-NEXT: ccmp w0, #13, #0, ge ; SDISEL-NEXT: cset w8, gt ; SDISEL-NEXT: cmp w0, #22 -; SDISEL-NEXT: mov w9, #44 +; SDISEL-NEXT: mov w9, #44 ; =0x2c ; SDISEL-NEXT: ccmp w0, w9, #0, ge ; SDISEL-NEXT: csel w8, wzr, w8, le ; SDISEL-NEXT: cmp w0, #99 -; SDISEL-NEXT: mov w9, #77 +; SDISEL-NEXT: mov w9, #77 ; =0x4d ; SDISEL-NEXT: ccmp w0, w9, #4, ne ; SDISEL-NEXT: cset w9, eq ; SDISEL-NEXT: tst w8, w9 diff --git a/llvm/test/CodeGen/AArch64/call-rv-marker.ll b/llvm/test/CodeGen/AArch64/call-rv-marker.ll index fc06809ad09fb..de8f5bbfb484d 100644 --- a/llvm/test/CodeGen/AArch64/call-rv-marker.ll +++ b/llvm/test/CodeGen/AArch64/call-rv-marker.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -o - %s | FileCheck --check-prefix=SELDAG --check-prefix=CHECK %s ; RUN: llc -global-isel -o - %s | FileCheck --check-prefix=GISEL --check-prefix=CHECK %s @@ -25,37 +26,93 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) @fptr = dso_local global ptr null, align 8 define dso_local ptr @rv_marker_1_retain() { -; CHECK-LABEL: _rv_marker_1_retain: -; CHECK: bl _foo1 -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-LABEL: rv_marker_1_retain: +; SELDAG: ; %bb.0: ; %entry +; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 16 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: bl _foo1 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; SELDAG-NEXT: ret ; +; GISEL-LABEL: rv_marker_1_retain: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: bl _foo1 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; GISEL-NEXT: ret entry: %call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ] ret ptr %call } define dso_local ptr @rv_marker_1_unsafeClaim() { -; CHECK-LABEL: _rv_marker_1_unsafeClaim: -; CHECK: bl _foo1 -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue +; SELDAG-LABEL: rv_marker_1_unsafeClaim: +; SELDAG: ; %bb.0: ; %entry +; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 16 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: bl _foo1 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue +; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; SELDAG-NEXT: ret ; +; GISEL-LABEL: rv_marker_1_unsafeClaim: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: bl _foo1 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue +; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; GISEL-NEXT: ret entry: %call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_unsafeClaimAutoreleasedReturnValue) ] ret ptr %call } define dso_local void @rv_marker_2_select(i32 %c) { -; CHECK-LABEL: _rv_marker_2_select: -; SELDAG: cinc w0, w8, eq -; GISEL: csinc w0, w8, wzr, eq -; CHECK-NEXT: bl _foo0 -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue -; CHECK-NEXT: ldp x29, x30, [sp], #16 -; CHECK-NEXT: b _foo2 +; SELDAG-LABEL: rv_marker_2_select: +; SELDAG: ; %bb.0: ; %entry +; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 16 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: mov w8, #1 ; =0x1 +; SELDAG-NEXT: cmp w0, #0 +; SELDAG-NEXT: cinc w0, w8, eq +; SELDAG-NEXT: bl _foo0 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; SELDAG-NEXT: b _foo2 ; +; GISEL-LABEL: rv_marker_2_select: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: mov w8, #1 ; =0x1 +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cinc w0, w8, eq +; GISEL-NEXT: bl _foo0 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; GISEL-NEXT: b _foo2 entry: %tobool.not = icmp eq i32 %c, 0 %.sink = select i1 %tobool.not, i32 2, i32 1 @@ -65,11 +122,121 @@ entry: } define dso_local void @rv_marker_3() personality ptr @__gxx_personality_v0 { -; CHECK-LABEL: _rv_marker_3: -; CHECK: bl _foo1 -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-LABEL: rv_marker_3: +; SELDAG: Lfunc_begin0: +; SELDAG-NEXT: .cfi_startproc +; SELDAG-NEXT: .cfi_personality 155, ___gxx_personality_v0 +; SELDAG-NEXT: .cfi_lsda 16, Lexception0 +; SELDAG-NEXT: ; %bb.0: ; %entry +; SELDAG-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; SELDAG-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 32 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: .cfi_offset w19, -24 +; SELDAG-NEXT: .cfi_offset w20, -32 +; SELDAG-NEXT: bl _foo1 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: mov x19, x0 +; SELDAG-NEXT: Ltmp0: +; SELDAG-NEXT: bl _objc_object +; SELDAG-NEXT: Ltmp1: +; SELDAG-NEXT: ; %bb.1: ; %invoke.cont +; SELDAG-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; SELDAG-NEXT: mov x0, x19 +; SELDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; SELDAG-NEXT: b _objc_release +; SELDAG-NEXT: LBB3_2: ; %lpad +; SELDAG-NEXT: Ltmp2: +; SELDAG-NEXT: mov x20, x0 +; SELDAG-NEXT: mov x0, x19 +; SELDAG-NEXT: bl _objc_release +; SELDAG-NEXT: mov x0, x20 +; SELDAG-NEXT: bl __Unwind_Resume +; SELDAG-NEXT: Lfunc_end0: +; SELDAG-NEXT: .cfi_endproc +; SELDAG-NEXT: .section __TEXT,__gcc_except_tab +; SELDAG-NEXT: .p2align 2, 0x0 +; SELDAG-NEXT: GCC_except_table3: +; SELDAG-NEXT: Lexception0: +; SELDAG-NEXT: .byte 255 ; @LPStart Encoding = omit +; SELDAG-NEXT: .byte 255 ; @TType Encoding = omit +; SELDAG-NEXT: .byte 1 ; Call site Encoding = uleb128 +; SELDAG-NEXT: .uleb128 Lcst_end0-Lcst_begin0 +; SELDAG-NEXT: Lcst_begin0: +; SELDAG-NEXT: .uleb128 Lfunc_begin0-Lfunc_begin0 ; >> Call Site 1 << +; SELDAG-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; Call between Lfunc_begin0 and Ltmp0 +; SELDAG-NEXT: .byte 0 ; has no landing pad +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; >> Call Site 2 << +; SELDAG-NEXT: .uleb128 Ltmp1-Ltmp0 ; Call between Ltmp0 and Ltmp1 +; SELDAG-NEXT: .uleb128 Ltmp2-Lfunc_begin0 ; jumps to Ltmp2 +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: .uleb128 Ltmp1-Lfunc_begin0 ; >> Call Site 3 << +; SELDAG-NEXT: .uleb128 Lfunc_end0-Ltmp1 ; Call between Ltmp1 and Lfunc_end0 +; SELDAG-NEXT: .byte 0 ; has no landing pad +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: Lcst_end0: +; SELDAG-NEXT: .p2align 2, 0x0 ; +; GISEL-LABEL: rv_marker_3: +; GISEL: Lfunc_begin0: +; GISEL-NEXT: .cfi_startproc +; GISEL-NEXT: .cfi_personality 155, ___gxx_personality_v0 +; GISEL-NEXT: .cfi_lsda 16, Lexception0 +; GISEL-NEXT: ; %bb.0: ; %entry +; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 32 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: bl _foo1 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: mov x19, x0 +; GISEL-NEXT: Ltmp0: +; GISEL-NEXT: bl _objc_object +; GISEL-NEXT: Ltmp1: +; GISEL-NEXT: ; %bb.1: ; %invoke.cont +; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; GISEL-NEXT: b _objc_release +; GISEL-NEXT: LBB3_2: ; %lpad +; GISEL-NEXT: Ltmp2: +; GISEL-NEXT: mov x20, x0 +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: bl _objc_release +; GISEL-NEXT: mov x0, x20 +; GISEL-NEXT: bl __Unwind_Resume +; GISEL-NEXT: Lfunc_end0: +; GISEL-NEXT: .cfi_endproc +; GISEL-NEXT: .section __TEXT,__gcc_except_tab +; GISEL-NEXT: .p2align 2, 0x0 +; GISEL-NEXT: GCC_except_table3: +; GISEL-NEXT: Lexception0: +; GISEL-NEXT: .byte 255 ; @LPStart Encoding = omit +; GISEL-NEXT: .byte 255 ; @TType Encoding = omit +; GISEL-NEXT: .byte 1 ; Call site Encoding = uleb128 +; GISEL-NEXT: .uleb128 Lcst_end0-Lcst_begin0 +; GISEL-NEXT: Lcst_begin0: +; GISEL-NEXT: .uleb128 Lfunc_begin0-Lfunc_begin0 ; >> Call Site 1 << +; GISEL-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; Call between Lfunc_begin0 and Ltmp0 +; GISEL-NEXT: .byte 0 ; has no landing pad +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; >> Call Site 2 << +; GISEL-NEXT: .uleb128 Ltmp1-Ltmp0 ; Call between Ltmp0 and Ltmp1 +; GISEL-NEXT: .uleb128 Ltmp2-Lfunc_begin0 ; jumps to Ltmp2 +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: .uleb128 Ltmp1-Lfunc_begin0 ; >> Call Site 3 << +; GISEL-NEXT: .uleb128 Lfunc_end0-Ltmp1 ; Call between Ltmp1 and Lfunc_end0 +; GISEL-NEXT: .byte 0 ; has no landing pad +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: Lcst_end0: +; GISEL-NEXT: .p2align 2, 0x0 entry: %call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ] invoke void @objc_object(ptr %call) #5 @@ -87,13 +254,151 @@ lpad: ; preds = %entry } define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 { -; CHECK-LABEL: _rv_marker_4: -; CHECK: Ltmp3: -; CHECK-NEXT: bl _foo1 -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue -; CHECK-NEXT: Ltmp4: +; SELDAG-LABEL: rv_marker_4: +; SELDAG: Lfunc_begin1: +; SELDAG-NEXT: .cfi_startproc +; SELDAG-NEXT: .cfi_personality 155, ___gxx_personality_v0 +; SELDAG-NEXT: .cfi_lsda 16, Lexception1 +; SELDAG-NEXT: ; %bb.0: ; %entry +; SELDAG-NEXT: sub sp, sp, #48 +; SELDAG-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; SELDAG-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 48 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: .cfi_offset w19, -24 +; SELDAG-NEXT: .cfi_offset w20, -32 +; SELDAG-NEXT: Ltmp3: +; SELDAG-NEXT: bl _foo1 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: Ltmp4: +; SELDAG-NEXT: ; %bb.1: ; %invoke.cont +; SELDAG-NEXT: Ltmp6: +; SELDAG-NEXT: mov x19, x0 +; SELDAG-NEXT: bl _objc_object +; SELDAG-NEXT: Ltmp7: +; SELDAG-NEXT: ; %bb.2: ; %invoke.cont2 +; SELDAG-NEXT: mov x0, x19 +; SELDAG-NEXT: bl _objc_release +; SELDAG-NEXT: add x0, sp, #15 +; SELDAG-NEXT: bl __ZN1SD1Ev +; SELDAG-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; SELDAG-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; SELDAG-NEXT: add sp, sp, #48 +; SELDAG-NEXT: ret +; SELDAG-NEXT: LBB4_3: ; %lpad1 +; SELDAG-NEXT: Ltmp8: +; SELDAG-NEXT: mov x20, x0 +; SELDAG-NEXT: mov x0, x19 +; SELDAG-NEXT: bl _objc_release +; SELDAG-NEXT: b LBB4_5 +; SELDAG-NEXT: LBB4_4: ; %lpad +; SELDAG-NEXT: Ltmp5: +; SELDAG-NEXT: mov x20, x0 +; SELDAG-NEXT: LBB4_5: ; %ehcleanup +; SELDAG-NEXT: add x0, sp, #15 +; SELDAG-NEXT: bl __ZN1SD1Ev +; SELDAG-NEXT: mov x0, x20 +; SELDAG-NEXT: bl __Unwind_Resume +; SELDAG-NEXT: Lfunc_end1: +; SELDAG-NEXT: .cfi_endproc +; SELDAG-NEXT: .section __TEXT,__gcc_except_tab +; SELDAG-NEXT: .p2align 2, 0x0 +; SELDAG-NEXT: GCC_except_table4: +; SELDAG-NEXT: Lexception1: +; SELDAG-NEXT: .byte 255 ; @LPStart Encoding = omit +; SELDAG-NEXT: .byte 255 ; @TType Encoding = omit +; SELDAG-NEXT: .byte 1 ; Call site Encoding = uleb128 +; SELDAG-NEXT: .uleb128 Lcst_end1-Lcst_begin1 +; SELDAG-NEXT: Lcst_begin1: +; SELDAG-NEXT: .uleb128 Ltmp3-Lfunc_begin1 ; >> Call Site 1 << +; SELDAG-NEXT: .uleb128 Ltmp4-Ltmp3 ; Call between Ltmp3 and Ltmp4 +; SELDAG-NEXT: .uleb128 Ltmp5-Lfunc_begin1 ; jumps to Ltmp5 +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: .uleb128 Ltmp6-Lfunc_begin1 ; >> Call Site 2 << +; SELDAG-NEXT: .uleb128 Ltmp7-Ltmp6 ; Call between Ltmp6 and Ltmp7 +; SELDAG-NEXT: .uleb128 Ltmp8-Lfunc_begin1 ; jumps to Ltmp8 +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: .uleb128 Ltmp7-Lfunc_begin1 ; >> Call Site 3 << +; SELDAG-NEXT: .uleb128 Lfunc_end1-Ltmp7 ; Call between Ltmp7 and Lfunc_end1 +; SELDAG-NEXT: .byte 0 ; has no landing pad +; SELDAG-NEXT: .byte 0 ; On action: cleanup +; SELDAG-NEXT: Lcst_end1: +; SELDAG-NEXT: .p2align 2, 0x0 ; +; GISEL-LABEL: rv_marker_4: +; GISEL: Lfunc_begin1: +; GISEL-NEXT: .cfi_startproc +; GISEL-NEXT: .cfi_personality 155, ___gxx_personality_v0 +; GISEL-NEXT: .cfi_lsda 16, Lexception1 +; GISEL-NEXT: ; %bb.0: ; %entry +; GISEL-NEXT: sub sp, sp, #48 +; GISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 48 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: Ltmp3: +; GISEL-NEXT: bl _foo1 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: Ltmp4: +; GISEL-NEXT: ; %bb.1: ; %invoke.cont +; GISEL-NEXT: Ltmp6: +; GISEL-NEXT: mov x19, x0 +; GISEL-NEXT: bl _objc_object +; GISEL-NEXT: Ltmp7: +; GISEL-NEXT: ; %bb.2: ; %invoke.cont2 +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: bl _objc_release +; GISEL-NEXT: add x0, sp, #15 +; GISEL-NEXT: bl __ZN1SD1Ev +; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: add sp, sp, #48 +; GISEL-NEXT: ret +; GISEL-NEXT: LBB4_3: ; %lpad1 +; GISEL-NEXT: Ltmp8: +; GISEL-NEXT: mov x20, x0 +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: bl _objc_release +; GISEL-NEXT: b LBB4_5 +; GISEL-NEXT: LBB4_4: ; %lpad +; GISEL-NEXT: Ltmp5: +; GISEL-NEXT: mov x20, x0 +; GISEL-NEXT: LBB4_5: ; %ehcleanup +; GISEL-NEXT: add x0, sp, #15 +; GISEL-NEXT: bl __ZN1SD1Ev +; GISEL-NEXT: mov x0, x20 +; GISEL-NEXT: bl __Unwind_Resume +; GISEL-NEXT: Lfunc_end1: +; GISEL-NEXT: .cfi_endproc +; GISEL-NEXT: .section __TEXT,__gcc_except_tab +; GISEL-NEXT: .p2align 2, 0x0 +; GISEL-NEXT: GCC_except_table4: +; GISEL-NEXT: Lexception1: +; GISEL-NEXT: .byte 255 ; @LPStart Encoding = omit +; GISEL-NEXT: .byte 255 ; @TType Encoding = omit +; GISEL-NEXT: .byte 1 ; Call site Encoding = uleb128 +; GISEL-NEXT: .uleb128 Lcst_end1-Lcst_begin1 +; GISEL-NEXT: Lcst_begin1: +; GISEL-NEXT: .uleb128 Ltmp3-Lfunc_begin1 ; >> Call Site 1 << +; GISEL-NEXT: .uleb128 Ltmp4-Ltmp3 ; Call between Ltmp3 and Ltmp4 +; GISEL-NEXT: .uleb128 Ltmp5-Lfunc_begin1 ; jumps to Ltmp5 +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: .uleb128 Ltmp6-Lfunc_begin1 ; >> Call Site 2 << +; GISEL-NEXT: .uleb128 Ltmp7-Ltmp6 ; Call between Ltmp6 and Ltmp7 +; GISEL-NEXT: .uleb128 Ltmp8-Lfunc_begin1 ; jumps to Ltmp8 +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: .uleb128 Ltmp7-Lfunc_begin1 ; >> Call Site 3 << +; GISEL-NEXT: .uleb128 Lfunc_end1-Ltmp7 ; Call between Ltmp7 and Lfunc_end1 +; GISEL-NEXT: .byte 0 ; has no landing pad +; GISEL-NEXT: .byte 0 ; On action: cleanup +; GISEL-NEXT: Lcst_end1: +; GISEL-NEXT: .p2align 2, 0x0 entry: %s = alloca %struct.S, align 1 call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %s) #2 @@ -129,11 +434,53 @@ ehcleanup: ; preds = %lpad1, %lpad } define dso_local ptr @rv_marker_5_indirect_call() { -; CHECK-LABEL: _rv_marker_5_indirect_call: -; CHECK: ldr [[ADDR:x[0-9]+]], [ -; CHECK-NEXT: blr [[ADDR]] -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-LABEL: rv_marker_5_indirect_call: +; SELDAG: ; %bb.0: ; %entry +; SELDAG-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; SELDAG-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 32 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: .cfi_offset w19, -24 +; SELDAG-NEXT: .cfi_offset w20, -32 +; SELDAG-NEXT: Lloh0: +; SELDAG-NEXT: adrp x8, _fptr@PAGE +; SELDAG-NEXT: Lloh1: +; SELDAG-NEXT: ldr x8, [x8, _fptr@PAGEOFF] +; SELDAG-NEXT: blr x8 +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: mov x19, x0 +; SELDAG-NEXT: bl _foo2 +; SELDAG-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; SELDAG-NEXT: mov x0, x19 +; SELDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; SELDAG-NEXT: ret +; SELDAG-NEXT: .loh AdrpLdr Lloh0, Lloh1 +; +; GISEL-LABEL: rv_marker_5_indirect_call: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 32 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: Lloh0: +; GISEL-NEXT: adrp x8, _fptr@PAGE +; GISEL-NEXT: Lloh1: +; GISEL-NEXT: ldr x8, [x8, _fptr@PAGEOFF] +; GISEL-NEXT: blr x8 +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: mov x19, x0 +; GISEL-NEXT: bl _foo2 +; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; GISEL-NEXT: ret +; GISEL-NEXT: .loh AdrpLdr Lloh0, Lloh1 entry: %0 = load ptr, ptr @fptr, align 8 %call = call ptr %0() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ] @@ -144,13 +491,35 @@ entry: declare ptr @foo(i64, i64, i64) define dso_local void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: _rv_marker_multiarg: -; CHECK: mov [[TMP:x[0-9]+]], x0 -; CHECK-NEXT: mov x0, x2 -; CHECK-NEXT: mov x2, [[TMP]] -; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x29, x29 -; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-LABEL: rv_marker_multiarg: +; SELDAG: ; %bb.0: +; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; SELDAG-NEXT: .cfi_def_cfa_offset 16 +; SELDAG-NEXT: .cfi_offset w30, -8 +; SELDAG-NEXT: .cfi_offset w29, -16 +; SELDAG-NEXT: mov x8, x0 +; SELDAG-NEXT: mov x0, x2 +; SELDAG-NEXT: mov x2, x8 +; SELDAG-NEXT: bl _foo +; SELDAG-NEXT: mov x29, x29 +; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue +; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; SELDAG-NEXT: ret +; +; GISEL-LABEL: rv_marker_multiarg: +; GISEL: ; %bb.0: +; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: mov x3, x0 +; GISEL-NEXT: mov x0, x2 +; GISEL-NEXT: mov x2, x3 +; GISEL-NEXT: bl _foo +; GISEL-NEXT: mov x29, x29 +; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue +; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; GISEL-NEXT: ret call ptr @foo(i64 %c, i64 %b, i64 %a) [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ] ret void } @@ -158,3 +527,5 @@ define dso_local void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) { declare ptr @objc_retainAutoreleasedReturnValue(ptr) declare ptr @objc_unsafeClaimAutoreleasedReturnValue(ptr) declare i32 @__gxx_personality_v0(...) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir index 9f3ad8b444446..96a776f6fbb69 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir @@ -450,8 +450,9 @@ body: | ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %and:_(s32) = G_SELECT %cond(s1), %zero, %variable + ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero + ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) + ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) %reg:_(s32) = COPY $vgpr0 %variable:_(s32) = COPY $vgpr0 @@ -476,7 +477,8 @@ body: | ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %and:_(s32) = G_SELECT %cond(s1), %variable, %zero + ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) + ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) %reg:_(s32) = COPY $vgpr0 %variable:_(s32) = COPY $vgpr0 @@ -500,9 +502,9 @@ body: | ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: %or:_(s32) = G_SELECT %cond(s1), %variable, %neg1 + ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero + ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) + ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) %reg:_(s32) = COPY $vgpr0 %variable:_(s32) = COPY $vgpr0 @@ -527,8 +529,8 @@ body: | ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: %or:_(s32) = G_SELECT %cond(s1), %neg1, %variable + ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) + ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) %reg:_(s32) = COPY $vgpr0 %variable:_(s32) = COPY $vgpr0 @@ -667,9 +669,9 @@ body: | ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: %otherconst:_(s32) = G_CONSTANT i32 123 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %neg1, %otherconst + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %cond(s1) + ; CHECK-NEXT: %select:_(s32) = G_OR [[SEXT]], %otherconst ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) %reg:_(s32) = COPY $vgpr0 @@ -749,8 +751,7 @@ body: | ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: %srem:_(s32) = G_SELECT %cond(s1), [[C]], %zero + ; CHECK-NEXT: %srem:_(s32) = G_ZEXT %cond(s1) ; CHECK-NEXT: S_ENDPGM 0, implicit %srem(s32) %reg:_(s32) = COPY $vgpr0 %zero:_(s32) = G_CONSTANT i32 0 @@ -802,8 +803,7 @@ body: | ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: %udiv:_(s32) = G_SELECT %cond(s1), [[C]], %zero + ; CHECK-NEXT: %udiv:_(s32) = G_ZEXT %cond(s1) ; CHECK-NEXT: S_ENDPGM 0, implicit %udiv(s32) %reg:_(s32) = COPY $vgpr0 %zero:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll index ccf4e84fbbbd1..4ac1fad6deecd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll @@ -37,7 +37,8 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch(ptr addrspace(1) %out, i3 ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2 ; GFX10-NEXT: global_store_dword v[3:4], v5, off ; GFX10-NEXT: .LBB0_3: ; %exit -; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm A: @@ -72,7 +73,8 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch_simple(ptr addrspace(1) % ; GFX10-NEXT: .LBB1_2: ; %B ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2 ; GFX10-NEXT: .LBB1_3: ; %exit -; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm A: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll index afd271c995770..c1f3924e466d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll @@ -14,7 +14,8 @@ define amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2 ; GFX10-NEXT: ; %bb.2: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm A: @@ -51,7 +52,8 @@ define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, ; GFX10-NEXT: v_cmp_le_u32_e64 s0, 1, v2 ; GFX10-NEXT: ; %bb.4: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll index 794b10eea58b9..0cd409f726af2 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll @@ -1517,7 +1517,8 @@ define float @v_recip_sqrt_f32_ulp25(float %x) { ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -1558,7 +1559,8 @@ define float @v_recip_sqrt_f32_ulp25(float %x) { ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; IR-IEEE-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; IR-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index 97216b6c94693..b516660f3bdc6 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -230,15 +230,16 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 ; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 ; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; VI-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4 ; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12 ; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0 ; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6 ; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13 ; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12 +; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 ; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 ; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7 ; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2 ; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 @@ -358,20 +359,21 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 ; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX10-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00 ; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000 +; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 ; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12 +; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 ; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7 +; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 ; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 ; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 ; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 ; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 @@ -497,24 +499,24 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe ; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX11-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00 ; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000 +; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 ; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12 +; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 ; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7 +; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 ; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 ; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 ; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 ; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll index 046f262469695..31e481bf7aa4d 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll @@ -1850,7 +1850,8 @@ define float @v_sqrt_f32_ulp2(float %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -1886,7 +1887,8 @@ define float @v_sqrt_f32_ulp25(float %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -1922,7 +1924,8 @@ define float @v_sqrt_f32_ulp3(float %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -1957,7 +1960,8 @@ define float @v_sqrt_f32_ulp2_fabs(float %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5] @@ -2090,10 +2094,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 @@ -2232,10 +2238,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, v2 +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7] ; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[6:7] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 @@ -2328,7 +2336,8 @@ define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -2425,7 +2434,8 @@ define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc @@ -2509,7 +2519,8 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc @@ -2589,7 +2600,8 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc @@ -2658,10 +2670,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 @@ -2802,10 +2816,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x flo ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 @@ -2929,10 +2945,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2 ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 32, s[4:5] +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 @@ -3029,7 +3047,8 @@ define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x) ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3064,7 +3083,8 @@ define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub) ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3099,7 +3119,8 @@ define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3698,7 +3719,8 @@ define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero n ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3733,7 +3755,8 @@ define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3768,7 +3791,8 @@ define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc @@ -3911,7 +3935,8 @@ define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) { ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 ; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll index 196a3705ac818..1a3d00211ca9b 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -40,8 +40,8 @@ define double @v_sqrt_f64(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -100,8 +100,8 @@ define double @v_sqrt_f64_fneg(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -161,8 +161,8 @@ define double @v_sqrt_f64_fabs(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -222,8 +222,8 @@ define double @v_sqrt_f64_fneg_fabs(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -284,8 +284,8 @@ define double @v_sqrt_f64_ninf(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -344,8 +344,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -404,8 +404,8 @@ define double @v_sqrt_f64_nnan(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -464,8 +464,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) { ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -533,8 +533,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -602,8 +602,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) { ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -671,8 +671,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -740,8 +740,8 @@ define double @v_sqrt_f64_nsz(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -800,8 +800,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -860,8 +860,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -920,8 +920,8 @@ define double @v_sqrt_f64_afn(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -980,8 +980,8 @@ define double @v_sqrt_f64_afn_nsz(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1062,10 +1062,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) { ; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1139,8 +1140,8 @@ define double @v_sqrt_f64_afn_nnan(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1199,8 +1200,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1260,8 +1261,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1342,10 +1343,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1419,8 +1421,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1479,8 +1481,8 @@ define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1539,8 +1541,8 @@ define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1599,8 +1601,8 @@ define double @v_sqrt_f64__unsafe_attr(double %x) #4 { ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1681,10 +1683,11 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) { ; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1796,16 +1799,18 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) { ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: s_brev_b32 s5, 8 ; GISEL-NEXT: v_mov_b32_e32 v6, s4 -; GISEL-NEXT: v_mov_b32_e32 v7, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] +; GISEL-NEXT: v_mov_b32_e32 v7, s5 ; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7] ; GISEL-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[6:7] -; GISEL-NEXT: v_mov_b32_e32 v8, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, v8, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v8, s[6:7] -; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v9 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7] +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8 ; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[2:3] @@ -1824,8 +1829,8 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) { ; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[20:21], v[8:9] ; GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[20:21], v[14:15] ; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[22:23], v[10:11] -; GISEL-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17] ; GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[6:7], v[0:1] +; GISEL-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17] ; GISEL-NEXT: v_fma_f64 v[20:21], -v[8:9], v[8:9], v[2:3] ; GISEL-NEXT: v_fma_f64 v[22:23], -v[10:11], v[10:11], v[4:5] ; GISEL-NEXT: v_fma_f64 v[6:7], v[18:19], v[12:13], v[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll index b3912aea55f79..fcc57b8bb7075 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll @@ -102,9 +102,9 @@ define amdgpu_cs void @vgpr_inverse_ballot(i64 %input, ptr addrspace(1) %out) { ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GISEL-NEXT: v_readfirstlane_b32 s1, v1 -; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off +; GISEL-NEXT: v_mov_b32_e32 v5, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1] +; GISEL-NEXT: global_store_b64 v[2:3], v[4:5], off ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GISEL-NEXT: s_endpgm @@ -164,8 +164,8 @@ define amdgpu_cs void @phi_uniform(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace ; GISEL-NEXT: s_add_u32 s0, s0, 1 ; GISEL-NEXT: s_addc_u32 s1, s1, 0 ; GISEL-NEXT: .LBB5_2: ; %endif -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index 3dc565ceed0d0..3ad98719c689c 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -62,12 +62,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -146,8 +146,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -242,12 +242,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -326,8 +326,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -423,12 +423,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -507,8 +507,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -603,12 +603,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -687,8 +687,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -784,12 +784,12 @@ define double @v_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -866,8 +866,8 @@ define double @v_rsq_f64(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -953,12 +953,12 @@ define double @v_rsq_f64_fabs(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -1035,8 +1035,8 @@ define double @v_rsq_f64_fabs(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1123,12 +1123,12 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -1205,8 +1205,8 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1292,12 +1292,12 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -1374,8 +1374,8 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1461,12 +1461,12 @@ define double @v_neg_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -1543,8 +1543,8 @@ define double @v_neg_rsq_f64(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1664,26 +1664,27 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 -; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0x3ff00000 +; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1815,10 +1816,11 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1965,26 +1967,27 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 -; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0xbff00000 +; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2116,10 +2119,11 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2234,17 +2238,17 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2252,7 +2256,8 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2356,10 +2361,11 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2507,17 +2513,17 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2525,7 +2531,8 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2659,10 +2666,11 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2775,12 +2783,12 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -2857,8 +2865,8 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -2946,12 +2954,12 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -3028,8 +3036,8 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3107,12 +3115,12 @@ define double @v_rsq_f64__afn_fdiv(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3177,8 +3185,8 @@ define double @v_rsq_f64__afn_fdiv(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3252,12 +3260,12 @@ define double @v_rsq_f64__afn(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3322,8 +3330,8 @@ define double @v_rsq_f64__afn(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3398,12 +3406,12 @@ define double @v_neg_rsq_f64__afn(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3470,8 +3478,8 @@ define double @v_neg_rsq_f64__afn(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3546,12 +3554,12 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3616,8 +3624,8 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3691,12 +3699,12 @@ define double @v_rsq_f64__afn_nnan(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3761,8 +3769,8 @@ define double @v_rsq_f64__afn_nnan(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3836,12 +3844,12 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -3906,8 +3914,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3982,12 +3990,12 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -4054,8 +4062,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -4138,12 +4146,12 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -4220,8 +4228,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -4325,13 +4333,15 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -4339,30 +4349,29 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8 ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc +; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11] -; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13 -; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5 ; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260 +; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5 +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13 -; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9] ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] +; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9] ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9] ; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] @@ -4451,10 +4460,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -4550,12 +4560,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -4622,8 +4632,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -4706,12 +4716,12 @@ define double @v_rsq_f64_unsafe(double %x) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -4776,8 +4786,8 @@ define double @v_rsq_f64_unsafe(double %x) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -5112,12 +5122,12 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5193,8 +5203,8 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) { ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5279,12 +5289,12 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5360,8 +5370,8 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) { ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5446,12 +5456,12 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5527,8 +5537,8 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) { ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5616,17 +5626,17 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 -; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: s_mov_b32 s6, 0 ; SI-GISEL-NEXT: s_mov_b32 s7, 0x40700000 +; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5702,10 +5712,10 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_mov_b32 s5, 0x40700000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5