diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 9cc6c6a706c58..c500357c396c6 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -82,7 +82,7 @@ class FRemExpander { } static FRemExpander create(IRBuilder<> &B, Type *Ty) { - assert(canExpandType(Ty)); + assert(canExpandType(Ty) && "Expected supported floating point type"); // The type to use for the computation of the remainder. This may be // wider than the input/result type which affects the ... @@ -356,8 +356,9 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y, static bool expandFRem(BinaryOperator &I, std::optional &SQ) { LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n'); - Type *ReturnTy = I.getType(); - assert(FRemExpander::canExpandType(ReturnTy->getScalarType())); + Type *Ty = I.getType(); + assert(FRemExpander::canExpandType(Ty) && + "Expected supported floating point type"); FastMathFlags FMF = I.getFastMathFlags(); // TODO Make use of those flags for optimization? @@ -368,32 +369,10 @@ static bool expandFRem(BinaryOperator &I, std::optional &SQ) { B.setFastMathFlags(FMF); B.SetCurrentDebugLocation(I.getDebugLoc()); - Type *ElemTy = ReturnTy->getScalarType(); - const FRemExpander Expander = FRemExpander::create(B, ElemTy); - - Value *Ret; - if (ReturnTy->isFloatingPointTy()) - Ret = FMF.approxFunc() - ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) - : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); - else { - auto *VecTy = cast(ReturnTy); - - // This could use SplitBlockAndInsertForEachLane but the interface - // is a bit awkward for a constant number of elements and it will - // boil down to the same code. - // TODO Expand the FRem instruction only once and reuse the code. - Value *Nums = I.getOperand(0); - Value *Denums = I.getOperand(1); - Ret = PoisonValue::get(I.getType()); - for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) { - Value *Num = B.CreateExtractElement(Nums, I); - Value *Denum = B.CreateExtractElement(Denums, I); - Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum) - : Expander.buildFRem(Num, Denum, SQ); - Ret = B.CreateInsertElement(Ret, Rem, I); - } - } + const FRemExpander Expander = FRemExpander::create(B, Ty); + Value *Ret = FMF.approxFunc() + ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) + : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); I.replaceAllUsesWith(Ret); Ret->takeName(&I); @@ -939,7 +918,8 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } -static void scalarize(Instruction *I, SmallVectorImpl &Replace) { +static void scalarize(Instruction *I, + SmallVectorImpl &Worklist) { VectorType *VTy = cast(I->getType()); IRBuilder<> Builder(I); @@ -948,12 +928,25 @@ static void scalarize(Instruction *I, SmallVectorImpl &Replace) { Value *Result = PoisonValue::get(VTy); for (unsigned Idx = 0; Idx < NumElements; ++Idx) { Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); - Value *Cast = Builder.CreateCast(cast(I)->getOpcode(), Ext, - I->getType()->getScalarType()); - Result = Builder.CreateInsertElement(Result, Cast, Idx); - if (isa(Cast)) - Replace.push_back(cast(Cast)); + + Value *NewOp = nullptr; + if (auto *BinOp = dyn_cast(I)) + NewOp = Builder.CreateBinOp( + BinOp->getOpcode(), Ext, + Builder.CreateExtractElement(I->getOperand(1), Idx)); + else if (auto *CastI = dyn_cast(I)) + NewOp = Builder.CreateCast(CastI->getOpcode(), Ext, + I->getType()->getScalarType()); + else + llvm_unreachable("Unsupported instruction type"); + + Result = Builder.CreateInsertElement(Result, NewOp, Idx); + if (auto *ScalarizedI = dyn_cast(NewOp)) { + ScalarizedI->copyIRFlags(I, true); + Worklist.push_back(ScalarizedI); + } } + I->replaceAllUsesWith(Result); I->dropAllReferences(); I->eraseFromParent(); @@ -989,10 +982,17 @@ static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); } +static void addToWorklist(Instruction &I, + SmallVector &Worklist) { + if (I.getOperand(0)->getType()->isVectorTy()) + scalarize(&I, Worklist); + else + Worklist.push_back(&I); +} + static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC) { - SmallVector Replace; - SmallVector ReplaceVector; + SmallVector Worklist; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -1003,56 +1003,39 @@ static bool runImpl(Function &F, const TargetLowering &TLI, if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) return false; - for (auto &I : instructions(F)) { - switch (I.getOpcode()) { - case Instruction::FRem: { - Type *Ty = I.getType(); - // TODO: This pass doesn't handle scalable vectors. - if (Ty->isScalableTy()) - continue; - - if (targetSupportsFrem(TLI, Ty) || - !FRemExpander::canExpandType(Ty->getScalarType())) - continue; - - Replace.push_back(&I); - Modified = true; + for (auto It = inst_begin(&F), End = inst_end(F); It != End;) { + Instruction &I = *It++; + Type *Ty = I.getType(); + // TODO: This pass doesn't handle scalable vectors. + if (Ty->isScalableTy()) + continue; + switch (I.getOpcode()) { + case Instruction::FRem: + if (!targetSupportsFrem(TLI, Ty) && + FRemExpander::canExpandType(Ty->getScalarType())) { + addToWorklist(I, Worklist); + Modified = true; + } break; - } case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - - auto *IntTy = cast(I.getType()->getScalarType()); + auto *IntTy = cast(Ty->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - auto *IntTy = cast(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); - Modified = true; + addToWorklist(I, Worklist); break; } default: @@ -1060,16 +1043,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI, } } - while (!ReplaceVector.empty()) { - Instruction *I = ReplaceVector.pop_back_val(); - scalarize(I, Replace); - } - - if (Replace.empty()) - return false; - - while (!Replace.empty()) { - Instruction *I = Replace.pop_back_val(); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); if (I->getOpcode() == Instruction::FRem) { auto SQ = [&]() -> std::optional { if (AC) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll index 302b2395642d0..c87cfbdfe87b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll @@ -1048,7 +1048,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v1, |s1| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v2, v1 ; CI-NEXT: s_cbranch_vccz .LBB9_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else20 ; CI-NEXT: s_and_b32 s2, s0, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v2, v1 ; CI-NEXT: v_mov_b32_e32 v0, s2 @@ -1059,7 +1059,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB9_8 -; CI-NEXT: ; %bb.3: ; %frem.compute +; CI-NEXT: ; %bb.3: ; %frem.compute19 ; CI-NEXT: v_frexp_mant_f32_e32 v3, v1 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v6, v1 ; CI-NEXT: v_ldexp_f32_e64 v1, v3, 1 @@ -1084,10 +1084,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v2 ; CI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_6 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; CI-NEXT: v_add_i32_e32 v2, vcc, 11, v5 ; CI-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CI-NEXT: .LBB9_5: ; %frem.loop_body +; CI-NEXT: .LBB9_5: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v5, v4 ; CI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -1103,7 +1103,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_7 ; CI-NEXT: .LBB9_6: ; CI-NEXT: v_mov_b32_e32 v5, v4 -; CI-NEXT: .LBB9_7: ; %frem.loop_exit +; CI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v2, vcc, -10, v2 ; CI-NEXT: v_ldexp_f32_e32 v2, v5, v2 ; CI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -1126,7 +1126,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; implicit-def: $vgpr1 ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v3, v2 ; CI-NEXT: s_cbranch_vccz .LBB9_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: s_and_b32 s4, s2, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v3, v2 ; CI-NEXT: v_mov_b32_e32 v1, s4 @@ -1137,7 +1137,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s4, s4, 1 ; CI-NEXT: s_cmp_lg_u32 s4, 0 ; CI-NEXT: s_cbranch_scc1 .LBB9_16 -; CI-NEXT: ; %bb.11: ; %frem.compute19 +; CI-NEXT: ; %bb.11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e32 v4, v2 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v7, v2 ; CI-NEXT: v_ldexp_f32_e64 v2, v4, 1 @@ -1162,10 +1162,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v3 ; CI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_14 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_add_i32_e32 v3, vcc, 11, v6 ; CI-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CI-NEXT: .LBB9_13: ; %frem.loop_body27 +; CI-NEXT: .LBB9_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v6, v5 ; CI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -1181,7 +1181,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_15 ; CI-NEXT: .LBB9_14: ; CI-NEXT: v_mov_b32_e32 v6, v5 -; CI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB9_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v3, vcc, -10, v3 ; CI-NEXT: v_ldexp_f32_e32 v3, v6, v3 ; CI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -1239,7 +1239,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v1, |s1| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v2, v1 ; VI-NEXT: s_cbranch_vccz .LBB9_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else20 ; VI-NEXT: s_and_b32 s2, s0, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v2, v1 ; VI-NEXT: v_mov_b32_e32 v0, s2 @@ -1250,7 +1250,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB9_8 -; VI-NEXT: ; %bb.3: ; %frem.compute +; VI-NEXT: ; %bb.3: ; %frem.compute19 ; VI-NEXT: v_frexp_mant_f32_e32 v3, v1 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v6, v1 ; VI-NEXT: v_ldexp_f32 v1, v3, 1 @@ -1275,10 +1275,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v2 ; VI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_6 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; VI-NEXT: v_add_u32_e32 v2, vcc, 11, v5 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v6 -; VI-NEXT: .LBB9_5: ; %frem.loop_body +; VI-NEXT: .LBB9_5: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v5, v4 ; VI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -1294,7 +1294,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_7 ; VI-NEXT: .LBB9_6: ; VI-NEXT: v_mov_b32_e32 v5, v4 -; VI-NEXT: .LBB9_7: ; %frem.loop_exit +; VI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v2, vcc, -10, v2 ; VI-NEXT: v_ldexp_f32 v2, v5, v2 ; VI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -1317,7 +1317,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; implicit-def: $vgpr1 ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v3, v2 ; VI-NEXT: s_cbranch_vccz .LBB9_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_and_b32 s3, s4, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v3, v2 ; VI-NEXT: v_mov_b32_e32 v1, s3 @@ -1328,7 +1328,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s3, s3, 1 ; VI-NEXT: s_cmp_lg_u32 s3, 0 ; VI-NEXT: s_cbranch_scc1 .LBB9_16 -; VI-NEXT: ; %bb.11: ; %frem.compute19 +; VI-NEXT: ; %bb.11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e32 v4, v2 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v7, v2 ; VI-NEXT: v_ldexp_f32 v2, v4, 1 @@ -1353,10 +1353,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v3 ; VI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_14 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_add_u32_e32 v3, vcc, 11, v6 ; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v7 -; VI-NEXT: .LBB9_13: ; %frem.loop_body27 +; VI-NEXT: .LBB9_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v6, v5 ; VI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -1372,7 +1372,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_15 ; VI-NEXT: .LBB9_14: ; VI-NEXT: v_mov_b32_e32 v6, v5 -; VI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB9_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v3, vcc, -10, v3 ; VI-NEXT: v_ldexp_f32 v3, v6, v3 ; VI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -1427,7 +1427,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v1, |s2| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v2, v1 ; CI-NEXT: s_cbranch_vccz .LBB10_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else86 ; CI-NEXT: s_and_b32 s0, s4, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v2, v1 ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -1438,7 +1438,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s0, s0, 1 ; CI-NEXT: s_cmp_lg_u32 s0, 0 ; CI-NEXT: s_cbranch_scc1 .LBB10_8 -; CI-NEXT: ; %bb.3: ; %frem.compute +; CI-NEXT: ; %bb.3: ; %frem.compute85 ; CI-NEXT: v_frexp_mant_f32_e32 v3, v1 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v6, v1 ; CI-NEXT: v_ldexp_f32_e64 v1, v3, 1 @@ -1463,10 +1463,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v2 ; CI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_6 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; CI-NEXT: v_add_i32_e32 v2, vcc, 11, v5 ; CI-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CI-NEXT: .LBB10_5: ; %frem.loop_body +; CI-NEXT: .LBB10_5: ; %frem.loop_body93 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v5, v4 ; CI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -1482,7 +1482,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_7 ; CI-NEXT: .LBB10_6: ; CI-NEXT: v_mov_b32_e32 v5, v4 -; CI-NEXT: .LBB10_7: ; %frem.loop_exit +; CI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; CI-NEXT: v_add_i32_e32 v2, vcc, -10, v2 ; CI-NEXT: v_ldexp_f32_e32 v2, v5, v2 ; CI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -1505,7 +1505,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; implicit-def: $vgpr1 ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v3, v2 ; CI-NEXT: s_cbranch_vccz .LBB10_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else53 ; CI-NEXT: s_and_b32 s1, s6, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v3, v2 ; CI-NEXT: v_mov_b32_e32 v1, s1 @@ -1516,7 +1516,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s1, s1, 1 ; CI-NEXT: s_cmp_lg_u32 s1, 0 ; CI-NEXT: s_cbranch_scc1 .LBB10_16 -; CI-NEXT: ; %bb.11: ; %frem.compute19 +; CI-NEXT: ; %bb.11: ; %frem.compute52 ; CI-NEXT: v_frexp_mant_f32_e32 v4, v2 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v7, v2 ; CI-NEXT: v_ldexp_f32_e64 v2, v4, 1 @@ -1541,10 +1541,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v3 ; CI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_14 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; CI-NEXT: v_add_i32_e32 v3, vcc, 11, v6 ; CI-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CI-NEXT: .LBB10_13: ; %frem.loop_body27 +; CI-NEXT: .LBB10_13: ; %frem.loop_body60 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v6, v5 ; CI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -1560,7 +1560,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_15 ; CI-NEXT: .LBB10_14: ; CI-NEXT: v_mov_b32_e32 v6, v5 -; CI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; CI-NEXT: v_add_i32_e32 v3, vcc, -10, v3 ; CI-NEXT: v_ldexp_f32_e32 v3, v6, v3 ; CI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -1581,7 +1581,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; implicit-def: $vgpr2 ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; CI-NEXT: s_cbranch_vccz .LBB10_18 -; CI-NEXT: ; %bb.17: ; %frem.else53 +; CI-NEXT: ; %bb.17: ; %frem.else20 ; CI-NEXT: s_and_b32 s1, s5, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 ; CI-NEXT: v_mov_b32_e32 v2, s1 @@ -1592,7 +1592,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s1, s1, 1 ; CI-NEXT: s_cmp_lg_u32 s1, 0 ; CI-NEXT: s_cbranch_scc1 .LBB10_24 -; CI-NEXT: ; %bb.19: ; %frem.compute52 +; CI-NEXT: ; %bb.19: ; %frem.compute19 ; CI-NEXT: v_frexp_mant_f32_e32 v5, v3 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v8, v3 ; CI-NEXT: v_ldexp_f32_e64 v3, v5, 1 @@ -1617,10 +1617,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v4 ; CI-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_22 -; CI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; CI-NEXT: v_add_i32_e32 v4, vcc, 11, v7 ; CI-NEXT: v_sub_i32_e32 v4, vcc, v4, v8 -; CI-NEXT: .LBB10_21: ; %frem.loop_body60 +; CI-NEXT: .LBB10_21: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v7, v6 ; CI-NEXT: v_mul_f32_e32 v6, v7, v5 @@ -1636,7 +1636,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_23 ; CI-NEXT: .LBB10_22: ; CI-NEXT: v_mov_b32_e32 v7, v6 -; CI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; CI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v4, vcc, -10, v4 ; CI-NEXT: v_ldexp_f32_e32 v4, v7, v4 ; CI-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -1659,7 +1659,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; implicit-def: $vgpr3 ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v5, v4 ; CI-NEXT: s_cbranch_vccz .LBB10_26 -; CI-NEXT: ; %bb.25: ; %frem.else86 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: s_and_b32 s1, s7, 0x8000 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v5, v4 ; CI-NEXT: v_mov_b32_e32 v3, s1 @@ -1670,7 +1670,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s1, s1, 1 ; CI-NEXT: s_cmp_lg_u32 s1, 0 ; CI-NEXT: s_cbranch_scc1 .LBB10_32 -; CI-NEXT: ; %bb.27: ; %frem.compute85 +; CI-NEXT: ; %bb.27: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e32 v6, v4 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v9, v4 ; CI-NEXT: v_ldexp_f32_e64 v4, v6, 1 @@ -1695,10 +1695,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v5 ; CI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_30 -; CI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_add_i32_e32 v5, vcc, 11, v8 ; CI-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 -; CI-NEXT: .LBB10_29: ; %frem.loop_body93 +; CI-NEXT: .LBB10_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v8, v7 ; CI-NEXT: v_mul_f32_e32 v7, v8, v6 @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_31 ; CI-NEXT: .LBB10_30: ; CI-NEXT: v_mov_b32_e32 v8, v7 -; CI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; CI-NEXT: .LBB10_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v5, vcc, -10, v5 ; CI-NEXT: v_ldexp_f32_e32 v5, v8, v5 ; CI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -1794,7 +1794,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v1, |s6| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v2, v1 ; VI-NEXT: s_cbranch_vccz .LBB10_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else86 ; VI-NEXT: s_and_b32 s0, s8, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v2, v1 ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -1805,7 +1805,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s0, s0, 1 ; VI-NEXT: s_cmp_lg_u32 s0, 0 ; VI-NEXT: s_cbranch_scc1 .LBB10_8 -; VI-NEXT: ; %bb.3: ; %frem.compute +; VI-NEXT: ; %bb.3: ; %frem.compute85 ; VI-NEXT: v_frexp_mant_f32_e32 v3, v1 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v6, v1 ; VI-NEXT: v_ldexp_f32 v1, v3, 1 @@ -1830,10 +1830,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v2 ; VI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_6 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; VI-NEXT: v_add_u32_e32 v2, vcc, 11, v5 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v6 -; VI-NEXT: .LBB10_5: ; %frem.loop_body +; VI-NEXT: .LBB10_5: ; %frem.loop_body93 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v5, v4 ; VI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -1849,7 +1849,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_7 ; VI-NEXT: .LBB10_6: ; VI-NEXT: v_mov_b32_e32 v5, v4 -; VI-NEXT: .LBB10_7: ; %frem.loop_exit +; VI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; VI-NEXT: v_add_u32_e32 v2, vcc, -10, v2 ; VI-NEXT: v_ldexp_f32 v2, v5, v2 ; VI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -1872,7 +1872,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; implicit-def: $vgpr1 ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v3, v2 ; VI-NEXT: s_cbranch_vccz .LBB10_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else53 ; VI-NEXT: s_and_b32 s0, s4, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v3, v2 ; VI-NEXT: v_mov_b32_e32 v1, s0 @@ -1883,7 +1883,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s0, s0, 1 ; VI-NEXT: s_cmp_lg_u32 s0, 0 ; VI-NEXT: s_cbranch_scc1 .LBB10_16 -; VI-NEXT: ; %bb.11: ; %frem.compute19 +; VI-NEXT: ; %bb.11: ; %frem.compute52 ; VI-NEXT: v_frexp_mant_f32_e32 v4, v2 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v7, v2 ; VI-NEXT: v_ldexp_f32 v2, v4, 1 @@ -1908,10 +1908,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v3 ; VI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_14 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; VI-NEXT: v_add_u32_e32 v3, vcc, 11, v6 ; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v7 -; VI-NEXT: .LBB10_13: ; %frem.loop_body27 +; VI-NEXT: .LBB10_13: ; %frem.loop_body60 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v6, v5 ; VI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -1927,7 +1927,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_15 ; VI-NEXT: .LBB10_14: ; VI-NEXT: v_mov_b32_e32 v6, v5 -; VI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; VI-NEXT: v_add_u32_e32 v3, vcc, -10, v3 ; VI-NEXT: v_ldexp_f32 v3, v6, v3 ; VI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -1948,7 +1948,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; implicit-def: $vgpr2 ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; VI-NEXT: s_cbranch_vccz .LBB10_18 -; VI-NEXT: ; %bb.17: ; %frem.else53 +; VI-NEXT: ; %bb.17: ; %frem.else20 ; VI-NEXT: s_and_b32 s0, s9, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 ; VI-NEXT: v_mov_b32_e32 v2, s0 @@ -1959,7 +1959,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s0, s0, 1 ; VI-NEXT: s_cmp_lg_u32 s0, 0 ; VI-NEXT: s_cbranch_scc1 .LBB10_24 -; VI-NEXT: ; %bb.19: ; %frem.compute52 +; VI-NEXT: ; %bb.19: ; %frem.compute19 ; VI-NEXT: v_frexp_mant_f32_e32 v5, v3 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v8, v3 ; VI-NEXT: v_ldexp_f32 v3, v5, 1 @@ -1984,10 +1984,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v4 ; VI-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_22 -; VI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; VI-NEXT: v_add_u32_e32 v4, vcc, 11, v7 ; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v8 -; VI-NEXT: .LBB10_21: ; %frem.loop_body60 +; VI-NEXT: .LBB10_21: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v7, v6 ; VI-NEXT: v_mul_f32_e32 v6, v7, v5 @@ -2003,7 +2003,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_23 ; VI-NEXT: .LBB10_22: ; VI-NEXT: v_mov_b32_e32 v7, v6 -; VI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; VI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v4, vcc, -10, v4 ; VI-NEXT: v_ldexp_f32 v4, v7, v4 ; VI-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -2026,7 +2026,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; implicit-def: $vgpr3 ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v5, v4 ; VI-NEXT: s_cbranch_vccz .LBB10_26 -; VI-NEXT: ; %bb.25: ; %frem.else86 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_and_b32 s0, s12, 0x8000 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v5, v4 ; VI-NEXT: v_mov_b32_e32 v3, s0 @@ -2037,7 +2037,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s0, s0, 1 ; VI-NEXT: s_cmp_lg_u32 s0, 0 ; VI-NEXT: s_cbranch_scc1 .LBB10_32 -; VI-NEXT: ; %bb.27: ; %frem.compute85 +; VI-NEXT: ; %bb.27: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e32 v6, v4 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v9, v4 ; VI-NEXT: v_ldexp_f32 v4, v6, 1 @@ -2062,10 +2062,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 11, v5 ; VI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_30 -; VI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_add_u32_e32 v5, vcc, 11, v8 ; VI-NEXT: v_sub_u32_e32 v5, vcc, v5, v9 -; VI-NEXT: .LBB10_29: ; %frem.loop_body93 +; VI-NEXT: .LBB10_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v8, v7 ; VI-NEXT: v_mul_f32_e32 v7, v8, v6 @@ -2081,7 +2081,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_31 ; VI-NEXT: .LBB10_30: ; VI-NEXT: v_mov_b32_e32 v8, v7 -; VI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; VI-NEXT: .LBB10_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v5, vcc, -10, v5 ; VI-NEXT: v_ldexp_f32 v5, v8, v5 ; VI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -2147,7 +2147,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 vcc, |s2|, |v0| ; CI-NEXT: ; implicit-def: $vgpr0 ; CI-NEXT: s_cbranch_vccz .LBB11_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: s_and_b32 s6, s2, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v1, s4 ; CI-NEXT: v_mov_b32_e32 v0, s2 @@ -2159,7 +2159,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s6, s6, 1 ; CI-NEXT: s_cmp_lg_u32 s6, 0 ; CI-NEXT: s_cbranch_scc1 .LBB11_8 -; CI-NEXT: ; %bb.3: ; %frem.compute +; CI-NEXT: ; %bb.3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; CI-NEXT: v_ldexp_f32_e64 v1, v1, 1 ; CI-NEXT: v_div_scale_f32 v3, s[6:7], v1, v1, 1.0 @@ -2184,10 +2184,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v2 ; CI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_6 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_add_i32_e32 v2, vcc, 12, v5 ; CI-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CI-NEXT: .LBB11_5: ; %frem.loop_body +; CI-NEXT: .LBB11_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v5, v4 ; CI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -2203,7 +2203,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_7 ; CI-NEXT: .LBB11_6: ; CI-NEXT: v_mov_b32_e32 v5, v4 -; CI-NEXT: .LBB11_7: ; %frem.loop_exit +; CI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v2, vcc, -11, v2 ; CI-NEXT: v_ldexp_f32_e32 v2, v5, v2 ; CI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -2222,7 +2222,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_mov_b32 s6, 1 ; CI-NEXT: ; implicit-def: $vgpr1 ; CI-NEXT: s_cbranch_vccz .LBB11_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: s_and_b32 s6, s3, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v2, s5 ; CI-NEXT: v_mov_b32_e32 v1, s3 @@ -2234,7 +2234,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s6, s6, 1 ; CI-NEXT: s_cmp_lg_u32 s6, 0 ; CI-NEXT: s_cbranch_scc1 .LBB11_16 -; CI-NEXT: ; %bb.11: ; %frem.compute15 +; CI-NEXT: ; %bb.11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v2, |s5| ; CI-NEXT: v_ldexp_f32_e64 v2, v2, 1 ; CI-NEXT: v_div_scale_f32 v4, s[6:7], v2, v2, 1.0 @@ -2259,10 +2259,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v3 ; CI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_14 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_add_i32_e32 v3, vcc, 12, v6 ; CI-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CI-NEXT: .LBB11_13: ; %frem.loop_body23 +; CI-NEXT: .LBB11_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v6, v5 ; CI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -2278,7 +2278,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_15 ; CI-NEXT: .LBB11_14: ; CI-NEXT: v_mov_b32_e32 v6, v5 -; CI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB11_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v3, vcc, -11, v3 ; CI-NEXT: v_ldexp_f32_e32 v3, v6, v3 ; CI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -2320,7 +2320,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 vcc, |s2|, |v0| ; VI-NEXT: ; implicit-def: $vgpr0 ; VI-NEXT: s_cbranch_vccz .LBB11_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: s_and_b32 s6, s2, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v1, s4 ; VI-NEXT: v_mov_b32_e32 v0, s2 @@ -2332,7 +2332,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s6, s6, 1 ; VI-NEXT: s_cmp_lg_u32 s6, 0 ; VI-NEXT: s_cbranch_scc1 .LBB11_8 -; VI-NEXT: ; %bb.3: ; %frem.compute +; VI-NEXT: ; %bb.3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; VI-NEXT: v_ldexp_f32 v1, v1, 1 ; VI-NEXT: v_div_scale_f32 v3, s[6:7], v1, v1, 1.0 @@ -2357,10 +2357,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v2 ; VI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_6 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_add_u32_e32 v2, vcc, 12, v5 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v6 -; VI-NEXT: .LBB11_5: ; %frem.loop_body +; VI-NEXT: .LBB11_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v5, v4 ; VI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -2376,7 +2376,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_7 ; VI-NEXT: .LBB11_6: ; VI-NEXT: v_mov_b32_e32 v5, v4 -; VI-NEXT: .LBB11_7: ; %frem.loop_exit +; VI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v2, vcc, -11, v2 ; VI-NEXT: v_ldexp_f32 v2, v5, v2 ; VI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -2395,7 +2395,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_mov_b32 s6, 1 ; VI-NEXT: ; implicit-def: $vgpr1 ; VI-NEXT: s_cbranch_vccz .LBB11_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_and_b32 s6, s3, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v2, s5 ; VI-NEXT: v_mov_b32_e32 v1, s3 @@ -2407,7 +2407,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s6, s6, 1 ; VI-NEXT: s_cmp_lg_u32 s6, 0 ; VI-NEXT: s_cbranch_scc1 .LBB11_16 -; VI-NEXT: ; %bb.11: ; %frem.compute15 +; VI-NEXT: ; %bb.11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v2, |s5| ; VI-NEXT: v_ldexp_f32 v2, v2, 1 ; VI-NEXT: v_div_scale_f32 v4, s[6:7], v2, v2, 1.0 @@ -2432,10 +2432,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v3 ; VI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_14 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_add_u32_e32 v3, vcc, 12, v6 ; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v7 -; VI-NEXT: .LBB11_13: ; %frem.loop_body23 +; VI-NEXT: .LBB11_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v6, v5 ; VI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -2451,7 +2451,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_15 ; VI-NEXT: .LBB11_14: ; VI-NEXT: v_mov_b32_e32 v6, v5 -; VI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB11_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v3, vcc, -11, v3 ; VI-NEXT: v_ldexp_f32 v3, v6, v3 ; VI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -2501,7 +2501,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 vcc, |s4|, |v0| ; CI-NEXT: ; implicit-def: $vgpr0 ; CI-NEXT: s_cbranch_vccz .LBB12_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else78 ; CI-NEXT: s_and_b32 s2, s4, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v1, s8 ; CI-NEXT: v_mov_b32_e32 v0, s4 @@ -2513,7 +2513,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB12_8 -; CI-NEXT: ; %bb.3: ; %frem.compute +; CI-NEXT: ; %bb.3: ; %frem.compute77 ; CI-NEXT: v_frexp_mant_f32_e64 v1, |s8| ; CI-NEXT: v_ldexp_f32_e64 v1, v1, 1 ; CI-NEXT: v_div_scale_f32 v3, s[2:3], v1, v1, 1.0 @@ -2538,10 +2538,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v2 ; CI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_6 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; CI-NEXT: v_add_i32_e32 v2, vcc, 12, v5 ; CI-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CI-NEXT: .LBB12_5: ; %frem.loop_body +; CI-NEXT: .LBB12_5: ; %frem.loop_body85 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v5, v4 ; CI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -2557,7 +2557,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_7 ; CI-NEXT: .LBB12_6: ; CI-NEXT: v_mov_b32_e32 v5, v4 -; CI-NEXT: .LBB12_7: ; %frem.loop_exit +; CI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; CI-NEXT: v_add_i32_e32 v2, vcc, -11, v2 ; CI-NEXT: v_ldexp_f32_e32 v2, v5, v2 ; CI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -2576,7 +2576,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_mov_b32 s2, 1 ; CI-NEXT: ; implicit-def: $vgpr1 ; CI-NEXT: s_cbranch_vccz .LBB12_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else47 ; CI-NEXT: s_and_b32 s2, s5, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v2, s9 ; CI-NEXT: v_mov_b32_e32 v1, s5 @@ -2588,7 +2588,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB12_16 -; CI-NEXT: ; %bb.11: ; %frem.compute15 +; CI-NEXT: ; %bb.11: ; %frem.compute46 ; CI-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; CI-NEXT: v_ldexp_f32_e64 v2, v2, 1 ; CI-NEXT: v_div_scale_f32 v4, s[2:3], v2, v2, 1.0 @@ -2613,10 +2613,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v3 ; CI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_14 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; CI-NEXT: v_add_i32_e32 v3, vcc, 12, v6 ; CI-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CI-NEXT: .LBB12_13: ; %frem.loop_body23 +; CI-NEXT: .LBB12_13: ; %frem.loop_body54 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v6, v5 ; CI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -2632,7 +2632,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_15 ; CI-NEXT: .LBB12_14: ; CI-NEXT: v_mov_b32_e32 v6, v5 -; CI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; CI-NEXT: v_add_i32_e32 v3, vcc, -11, v3 ; CI-NEXT: v_ldexp_f32_e32 v3, v6, v3 ; CI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -2651,7 +2651,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_mov_b32 s2, 1 ; CI-NEXT: ; implicit-def: $vgpr2 ; CI-NEXT: s_cbranch_vccz .LBB12_18 -; CI-NEXT: ; %bb.17: ; %frem.else47 +; CI-NEXT: ; %bb.17: ; %frem.else16 ; CI-NEXT: s_and_b32 s2, s6, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v3, s10 ; CI-NEXT: v_mov_b32_e32 v2, s6 @@ -2663,7 +2663,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB12_24 -; CI-NEXT: ; %bb.19: ; %frem.compute46 +; CI-NEXT: ; %bb.19: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v3, |s10| ; CI-NEXT: v_ldexp_f32_e64 v3, v3, 1 ; CI-NEXT: v_div_scale_f32 v5, s[2:3], v3, v3, 1.0 @@ -2688,10 +2688,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v4 ; CI-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_22 -; CI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; CI-NEXT: v_add_i32_e32 v4, vcc, 12, v7 ; CI-NEXT: v_sub_i32_e32 v4, vcc, v4, v8 -; CI-NEXT: .LBB12_21: ; %frem.loop_body54 +; CI-NEXT: .LBB12_21: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v7, v6 ; CI-NEXT: v_mul_f32_e32 v6, v7, v5 @@ -2707,7 +2707,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_23 ; CI-NEXT: .LBB12_22: ; CI-NEXT: v_mov_b32_e32 v7, v6 -; CI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; CI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v4, vcc, -11, v4 ; CI-NEXT: v_ldexp_f32_e32 v4, v7, v4 ; CI-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -2726,7 +2726,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_mov_b32 s2, 1 ; CI-NEXT: ; implicit-def: $vgpr3 ; CI-NEXT: s_cbranch_vccz .LBB12_26 -; CI-NEXT: ; %bb.25: ; %frem.else78 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: s_and_b32 s2, s7, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v4, s11 ; CI-NEXT: v_mov_b32_e32 v3, s7 @@ -2738,7 +2738,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB12_32 -; CI-NEXT: ; %bb.27: ; %frem.compute77 +; CI-NEXT: ; %bb.27: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v4, |s11| ; CI-NEXT: v_ldexp_f32_e64 v4, v4, 1 ; CI-NEXT: v_div_scale_f32 v6, s[2:3], v4, v4, 1.0 @@ -2763,10 +2763,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v5 ; CI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_30 -; CI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_add_i32_e32 v5, vcc, 12, v8 ; CI-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 -; CI-NEXT: .LBB12_29: ; %frem.loop_body85 +; CI-NEXT: .LBB12_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v8, v7 ; CI-NEXT: v_mul_f32_e32 v7, v8, v6 @@ -2782,7 +2782,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_31 ; CI-NEXT: .LBB12_30: ; CI-NEXT: v_mov_b32_e32 v8, v7 -; CI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; CI-NEXT: .LBB12_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v5, vcc, -11, v5 ; CI-NEXT: v_ldexp_f32_e32 v5, v8, v5 ; CI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -2832,7 +2832,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 vcc, |s4|, |v0| ; VI-NEXT: ; implicit-def: $vgpr0 ; VI-NEXT: s_cbranch_vccz .LBB12_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else78 ; VI-NEXT: s_and_b32 s2, s4, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v1, s8 ; VI-NEXT: v_mov_b32_e32 v0, s4 @@ -2844,7 +2844,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB12_8 -; VI-NEXT: ; %bb.3: ; %frem.compute +; VI-NEXT: ; %bb.3: ; %frem.compute77 ; VI-NEXT: v_frexp_mant_f32_e64 v1, |s8| ; VI-NEXT: v_ldexp_f32 v1, v1, 1 ; VI-NEXT: v_div_scale_f32 v3, s[2:3], v1, v1, 1.0 @@ -2869,10 +2869,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v2 ; VI-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_6 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; VI-NEXT: v_add_u32_e32 v2, vcc, 12, v5 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v6 -; VI-NEXT: .LBB12_5: ; %frem.loop_body +; VI-NEXT: .LBB12_5: ; %frem.loop_body85 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v5, v4 ; VI-NEXT: v_mul_f32_e32 v4, v5, v3 @@ -2888,7 +2888,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_7 ; VI-NEXT: .LBB12_6: ; VI-NEXT: v_mov_b32_e32 v5, v4 -; VI-NEXT: .LBB12_7: ; %frem.loop_exit +; VI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; VI-NEXT: v_add_u32_e32 v2, vcc, -11, v2 ; VI-NEXT: v_ldexp_f32 v2, v5, v2 ; VI-NEXT: v_mul_f32_e32 v3, v2, v3 @@ -2907,7 +2907,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_mov_b32 s2, 1 ; VI-NEXT: ; implicit-def: $vgpr1 ; VI-NEXT: s_cbranch_vccz .LBB12_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else47 ; VI-NEXT: s_and_b32 s2, s5, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v2, s9 ; VI-NEXT: v_mov_b32_e32 v1, s5 @@ -2919,7 +2919,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB12_16 -; VI-NEXT: ; %bb.11: ; %frem.compute15 +; VI-NEXT: ; %bb.11: ; %frem.compute46 ; VI-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; VI-NEXT: v_ldexp_f32 v2, v2, 1 ; VI-NEXT: v_div_scale_f32 v4, s[2:3], v2, v2, 1.0 @@ -2944,10 +2944,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v3 ; VI-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_14 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; VI-NEXT: v_add_u32_e32 v3, vcc, 12, v6 ; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v7 -; VI-NEXT: .LBB12_13: ; %frem.loop_body23 +; VI-NEXT: .LBB12_13: ; %frem.loop_body54 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v6, v5 ; VI-NEXT: v_mul_f32_e32 v5, v6, v4 @@ -2963,7 +2963,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_15 ; VI-NEXT: .LBB12_14: ; VI-NEXT: v_mov_b32_e32 v6, v5 -; VI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; VI-NEXT: v_add_u32_e32 v3, vcc, -11, v3 ; VI-NEXT: v_ldexp_f32 v3, v6, v3 ; VI-NEXT: v_mul_f32_e32 v4, v3, v4 @@ -2982,7 +2982,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_mov_b32 s2, 1 ; VI-NEXT: ; implicit-def: $vgpr2 ; VI-NEXT: s_cbranch_vccz .LBB12_18 -; VI-NEXT: ; %bb.17: ; %frem.else47 +; VI-NEXT: ; %bb.17: ; %frem.else16 ; VI-NEXT: s_and_b32 s2, s6, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v3, s10 ; VI-NEXT: v_mov_b32_e32 v2, s6 @@ -2994,7 +2994,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB12_24 -; VI-NEXT: ; %bb.19: ; %frem.compute46 +; VI-NEXT: ; %bb.19: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v3, |s10| ; VI-NEXT: v_ldexp_f32 v3, v3, 1 ; VI-NEXT: v_div_scale_f32 v5, s[2:3], v3, v3, 1.0 @@ -3019,10 +3019,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v4 ; VI-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_22 -; VI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; VI-NEXT: v_add_u32_e32 v4, vcc, 12, v7 ; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v8 -; VI-NEXT: .LBB12_21: ; %frem.loop_body54 +; VI-NEXT: .LBB12_21: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v7, v6 ; VI-NEXT: v_mul_f32_e32 v6, v7, v5 @@ -3038,7 +3038,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_23 ; VI-NEXT: .LBB12_22: ; VI-NEXT: v_mov_b32_e32 v7, v6 -; VI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; VI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v4, vcc, -11, v4 ; VI-NEXT: v_ldexp_f32 v4, v7, v4 ; VI-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -3057,7 +3057,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_mov_b32 s2, 1 ; VI-NEXT: ; implicit-def: $vgpr3 ; VI-NEXT: s_cbranch_vccz .LBB12_26 -; VI-NEXT: ; %bb.25: ; %frem.else78 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_and_b32 s2, s7, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v4, s11 ; VI-NEXT: v_mov_b32_e32 v3, s7 @@ -3069,7 +3069,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB12_32 -; VI-NEXT: ; %bb.27: ; %frem.compute77 +; VI-NEXT: ; %bb.27: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v4, |s11| ; VI-NEXT: v_ldexp_f32 v4, v4, 1 ; VI-NEXT: v_div_scale_f32 v6, s[2:3], v4, v4, 1.0 @@ -3094,10 +3094,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 12, v5 ; VI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_30 -; VI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_add_u32_e32 v5, vcc, 12, v8 ; VI-NEXT: v_sub_u32_e32 v5, vcc, v5, v9 -; VI-NEXT: .LBB12_29: ; %frem.loop_body85 +; VI-NEXT: .LBB12_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v8, v7 ; VI-NEXT: v_mul_f32_e32 v7, v8, v6 @@ -3113,7 +3113,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_31 ; VI-NEXT: .LBB12_30: ; VI-NEXT: v_mov_b32_e32 v8, v7 -; VI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; VI-NEXT: .LBB12_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v5, vcc, -11, v5 ; VI-NEXT: v_ldexp_f32 v5, v8, v5 ; VI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -3172,7 +3172,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f64_e64 vcc, |s[4:5]|, |v[0:1]| ; CI-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CI-NEXT: s_cbranch_vccz .LBB13_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |s[4:5]|, |v[0:1]| @@ -3190,7 +3190,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB13_8 -; CI-NEXT: ; %bb.3: ; %frem.compute +; CI-NEXT: ; %bb.3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f64_e64 v[0:1], |s[4:5]| ; CI-NEXT: v_frexp_exp_i32_f64_e64 v6, |s[4:5]| ; CI-NEXT: v_frexp_exp_i32_f64_e64 v7, |s[8:9]| @@ -3213,10 +3213,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 26, v9 ; CI-NEXT: v_div_fixup_f64 v[2:3], v[2:3], v[0:1], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_6 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_add_i32_e32 v6, vcc, 26, v6 ; CI-NEXT: v_sub_i32_e32 v9, vcc, v6, v7 -; CI-NEXT: .LBB13_5: ; %frem.loop_body +; CI-NEXT: .LBB13_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v7, v5 ; CI-NEXT: v_mov_b32_e32 v6, v4 @@ -3235,7 +3235,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: .LBB13_6: ; CI-NEXT: v_mov_b32_e32 v7, v5 ; CI-NEXT: v_mov_b32_e32 v6, v4 -; CI-NEXT: .LBB13_7: ; %frem.loop_exit +; CI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe7, v9 ; CI-NEXT: v_ldexp_f64 v[4:5], v[6:7], v4 ; CI-NEXT: s_mov_b32 s2, 0 @@ -3259,7 +3259,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_mov_b32 s2, 1 ; CI-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CI-NEXT: s_cbranch_vccz .LBB13_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: v_mov_b32_e32 v2, s10 ; CI-NEXT: v_mov_b32_e32 v3, s11 ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |s[6:7]|, |v[2:3]| @@ -3277,7 +3277,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_xor_b32 s2, s2, 1 ; CI-NEXT: s_cmp_lg_u32 s2, 0 ; CI-NEXT: s_cbranch_scc1 .LBB13_16 -; CI-NEXT: ; %bb.11: ; %frem.compute15 +; CI-NEXT: ; %bb.11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f64_e64 v[2:3], |s[6:7]| ; CI-NEXT: v_frexp_exp_i32_f64_e64 v8, |s[6:7]| ; CI-NEXT: v_frexp_exp_i32_f64_e64 v9, |s[10:11]| @@ -3300,10 +3300,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ge_i32_e32 vcc, 26, v11 ; CI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_14 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_add_i32_e32 v8, vcc, 26, v8 ; CI-NEXT: v_sub_i32_e32 v11, vcc, v8, v9 -; CI-NEXT: .LBB13_13: ; %frem.loop_body23 +; CI-NEXT: .LBB13_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mov_b32_e32 v8, v6 @@ -3322,7 +3322,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: .LBB13_14: ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mov_b32_e32 v8, v6 -; CI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB13_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v6, vcc, 0xffffffe7, v11 ; CI-NEXT: v_ldexp_f64 v[6:7], v[8:9], v6 ; CI-NEXT: s_mov_b32 s2, 0 @@ -3374,7 +3374,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f64_e64 vcc, |s[4:5]|, |v[0:1]| ; VI-NEXT: ; implicit-def: $vgpr0_vgpr1 ; VI-NEXT: s_cbranch_vccz .LBB13_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |s[4:5]|, |v[0:1]| @@ -3392,7 +3392,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB13_8 -; VI-NEXT: ; %bb.3: ; %frem.compute +; VI-NEXT: ; %bb.3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f64_e64 v[0:1], |s[4:5]| ; VI-NEXT: v_frexp_exp_i32_f64_e64 v6, |s[4:5]| ; VI-NEXT: v_frexp_exp_i32_f64_e64 v7, |s[8:9]| @@ -3415,10 +3415,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 26, v9 ; VI-NEXT: v_div_fixup_f64 v[2:3], v[2:3], v[0:1], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_6 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_add_u32_e32 v6, vcc, 26, v6 ; VI-NEXT: v_sub_u32_e32 v9, vcc, v6, v7 -; VI-NEXT: .LBB13_5: ; %frem.loop_body +; VI-NEXT: .LBB13_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v7, v5 ; VI-NEXT: v_mov_b32_e32 v6, v4 @@ -3437,7 +3437,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: .LBB13_6: ; VI-NEXT: v_mov_b32_e32 v7, v5 ; VI-NEXT: v_mov_b32_e32 v6, v4 -; VI-NEXT: .LBB13_7: ; %frem.loop_exit +; VI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe7, v9 ; VI-NEXT: v_ldexp_f64 v[4:5], v[6:7], v4 ; VI-NEXT: s_mov_b32 s2, 0 @@ -3461,7 +3461,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_mov_b32 s2, 1 ; VI-NEXT: ; implicit-def: $vgpr2_vgpr3 ; VI-NEXT: s_cbranch_vccz .LBB13_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: v_mov_b32_e32 v2, s10 ; VI-NEXT: v_mov_b32_e32 v3, s11 ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |s[6:7]|, |v[2:3]| @@ -3479,7 +3479,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_xor_b32 s2, s2, 1 ; VI-NEXT: s_cmp_lg_u32 s2, 0 ; VI-NEXT: s_cbranch_scc1 .LBB13_16 -; VI-NEXT: ; %bb.11: ; %frem.compute15 +; VI-NEXT: ; %bb.11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f64_e64 v[2:3], |s[6:7]| ; VI-NEXT: v_frexp_exp_i32_f64_e64 v8, |s[6:7]| ; VI-NEXT: v_frexp_exp_i32_f64_e64 v9, |s[10:11]| @@ -3502,10 +3502,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ge_i32_e32 vcc, 26, v11 ; VI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_14 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_add_u32_e32 v8, vcc, 26, v8 ; VI-NEXT: v_sub_u32_e32 v11, vcc, v8, v9 -; VI-NEXT: .LBB13_13: ; %frem.loop_body23 +; VI-NEXT: .LBB13_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mov_b32_e32 v8, v6 @@ -3524,7 +3524,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: .LBB13_14: ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mov_b32_e32 v8, v6 -; VI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB13_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v6, vcc, 0xffffffe7, v11 ; VI-NEXT: v_ldexp_f64 v[6:7], v[8:9], v6 ; VI-NEXT: s_mov_b32 s2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index c4a38dcd7b5f3..02506087f6fce 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -4870,7 +4870,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: v_cvt_f16_f32_e32 v4, v2 ; SI-NEXT: s_cbranch_vccz .LBB9_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else20 ; SI-NEXT: v_bfi_b32 v7, s0, 0, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v4, v4 ; SI-NEXT: v_cmp_eq_f32_e32 vcc, v5, v6 @@ -4881,7 +4881,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB9_2: ; SI-NEXT: ; implicit-def: $vgpr4 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB9_3: ; %frem.compute +; SI-NEXT: .LBB9_3: ; %frem.compute19 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v5 @@ -4917,10 +4917,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB9_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB9_5: ; %frem.loop_body +; SI-NEXT: .LBB9_5: ; %frem.loop_body27 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v7, v5 ; SI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -4935,7 +4935,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB9_5 ; SI-NEXT: ; %bb.6: ; %Flow55 ; SI-NEXT: v_mov_b32_e32 v5, v7 -; SI-NEXT: .LBB9_7: ; %frem.loop_exit +; SI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v5, v5, s1 ; SI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -4956,7 +4956,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v7, |v7| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v7 ; SI-NEXT: s_cbranch_vccz .LBB9_10 -; SI-NEXT: ; %bb.9: ; %frem.else20 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v8, s0, 0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 @@ -4968,7 +4968,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB9_10: ; SI-NEXT: ; implicit-def: $vgpr5 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB9_11: ; %frem.compute19 +; SI-NEXT: .LBB9_11: ; %frem.compute ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v6 @@ -5004,10 +5004,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB9_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB9_13: ; %frem.loop_body27 +; SI-NEXT: .LBB9_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v8, v6 ; SI-NEXT: v_mul_f32_e32 v6, v8, v7 @@ -5022,7 +5022,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB9_13 ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; SI-NEXT: .LBB9_15: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v6, v6, s1 ; SI-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -5096,7 +5096,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_and_b32_e32 v5, 0x7fffffff, v3 ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB9_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else20 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 ; CI-NEXT: v_bfi_b32 v7, s0, 0, v2 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -5105,7 +5105,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_8 ; CI-NEXT: .LBB9_2: ; CI-NEXT: ; implicit-def: $vgpr4 -; CI-NEXT: .LBB9_3: ; %frem.compute +; CI-NEXT: .LBB9_3: ; %frem.compute19 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; CI-NEXT: v_frexp_mant_f32_e32 v4, v6 ; CI-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -5130,10 +5130,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10 ; CI-NEXT: v_add_i32_e32 v6, vcc, 11, v6 -; CI-NEXT: .LBB9_5: ; %frem.loop_body +; CI-NEXT: .LBB9_5: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -5148,7 +5148,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB9_5 ; CI-NEXT: ; %bb.6: ; %Flow55 ; CI-NEXT: v_mov_b32_e32 v7, v9 -; CI-NEXT: .LBB9_7: ; %frem.loop_exit +; CI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v6, vcc, -10, v6 ; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6 ; CI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -5169,7 +5169,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v6, |v6| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6 ; CI-NEXT: s_cbranch_vccz .LBB9_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v8, s0, 0, v0 @@ -5179,7 +5179,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_16 ; CI-NEXT: .LBB9_10: ; CI-NEXT: ; implicit-def: $vgpr5 -; CI-NEXT: .LBB9_11: ; %frem.compute19 +; CI-NEXT: .LBB9_11: ; %frem.compute ; CI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7 ; CI-NEXT: v_frexp_mant_f32_e32 v5, v7 ; CI-NEXT: v_frexp_mant_f32_e32 v7, v6 @@ -5204,10 +5204,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7 ; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11 ; CI-NEXT: v_add_i32_e32 v7, vcc, 11, v7 -; CI-NEXT: .LBB9_13: ; %frem.loop_body27 +; CI-NEXT: .LBB9_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v10, v8 ; CI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -5222,7 +5222,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB9_13 ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v8, v10 -; CI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB9_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v7, vcc, -10, v7 ; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7 ; CI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -5287,7 +5287,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v3, |v1| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; VI-NEXT: s_cbranch_vccz .LBB9_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else20 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v2, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 @@ -5296,7 +5296,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_8 ; VI-NEXT: .LBB9_2: ; VI-NEXT: ; implicit-def: $vgpr2 -; VI-NEXT: .LBB9_3: ; %frem.compute +; VI-NEXT: .LBB9_3: ; %frem.compute19 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v7, v4 ; VI-NEXT: v_frexp_mant_f32_e32 v2, v4 ; VI-NEXT: v_frexp_mant_f32_e32 v4, v3 @@ -5321,10 +5321,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4 ; VI-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; VI-NEXT: v_sub_u32_e32 v4, vcc, v7, v8 ; VI-NEXT: v_add_u32_e32 v4, vcc, 11, v4 -; VI-NEXT: .LBB9_5: ; %frem.loop_body +; VI-NEXT: .LBB9_5: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v7, v5 ; VI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -5339,7 +5339,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB9_5 ; VI-NEXT: ; %bb.6: ; %Flow55 ; VI-NEXT: v_mov_b32_e32 v5, v7 -; VI-NEXT: .LBB9_7: ; %frem.loop_exit +; VI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v4, vcc, -10, v4 ; VI-NEXT: v_ldexp_f32 v4, v5, v4 ; VI-NEXT: v_mul_f32_e32 v5, v4, v6 @@ -5359,7 +5359,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v6, |v4| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6 ; VI-NEXT: s_cbranch_vccz .LBB9_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v5, s2, 0, v3 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v7, v6 @@ -5368,7 +5368,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_16 ; VI-NEXT: .LBB9_10: ; VI-NEXT: ; implicit-def: $vgpr5 -; VI-NEXT: .LBB9_11: ; %frem.compute19 +; VI-NEXT: .LBB9_11: ; %frem.compute ; VI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7 ; VI-NEXT: v_frexp_mant_f32_e32 v5, v7 ; VI-NEXT: v_frexp_mant_f32_e32 v7, v6 @@ -5393,10 +5393,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7 ; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11 ; VI-NEXT: v_add_u32_e32 v7, vcc, 11, v7 -; VI-NEXT: .LBB9_13: ; %frem.loop_body27 +; VI-NEXT: .LBB9_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v10, v8 ; VI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -5411,7 +5411,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB9_13 ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v8, v10 -; VI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB9_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v7, vcc, -10, v7 ; VI-NEXT: v_ldexp_f32 v7, v8, v7 ; VI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -5455,7 +5455,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v3, |v0| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; GFX9-NEXT: s_cbranch_vccz .LBB9_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else20 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v2, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 @@ -5464,7 +5464,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB9_8 ; GFX9-NEXT: .LBB9_2: ; GFX9-NEXT: ; implicit-def: $vgpr2 -; GFX9-NEXT: .LBB9_3: ; %frem.compute +; GFX9-NEXT: .LBB9_3: ; %frem.compute19 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v7, v4 ; GFX9-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v3 @@ -5489,10 +5489,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4 ; GFX9-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX9-NEXT: v_sub_u32_e32 v4, v7, v8 ; GFX9-NEXT: v_add_u32_e32 v4, 11, v4 -; GFX9-NEXT: .LBB9_5: ; %frem.loop_body +; GFX9-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -5507,7 +5507,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB9_5 ; GFX9-NEXT: ; %bb.6: ; %Flow55 ; GFX9-NEXT: v_mov_b32_e32 v5, v7 -; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX9-NEXT: v_add_u32_e32 v4, -10, v4 ; GFX9-NEXT: v_ldexp_f32 v4, v5, v4 ; GFX9-NEXT: v_mul_f32_e32 v5, v4, v6 @@ -5526,7 +5526,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v5, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; GFX9-NEXT: s_cbranch_vccz .LBB9_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else20 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -5535,7 +5535,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB9_16 ; GFX9-NEXT: .LBB9_10: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB9_11: ; %frem.compute19 +; GFX9-NEXT: .LBB9_11: ; %frem.compute ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -5560,10 +5560,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 11, v6 -; GFX9-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX9-NEXT: .LBB9_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -5578,7 +5578,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB9_13 ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v6, -10, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -5624,7 +5624,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v3, |v0| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX10-NEXT: s_cbranch_vccz .LBB9_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else20 ; GFX10-NEXT: v_bfi_b32 v2, 0x7fff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc_lo @@ -5632,7 +5632,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB9_8 ; GFX10-NEXT: .LBB9_2: ; GFX10-NEXT: ; implicit-def: $vgpr2 -; GFX10-NEXT: .LBB9_3: ; %frem.compute +; GFX10-NEXT: .LBB9_3: ; %frem.compute19 ; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -5659,10 +5659,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX10-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB9_5: ; %frem.loop_body +; GFX10-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v7, v4 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -5678,7 +5678,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow55 ; GFX10-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-NEXT: v_mov_b32_e32 v4, v7 -; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX10-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX10-NEXT: v_ldexp_f32 v4, v4, v6 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -5696,7 +5696,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v6, |v3| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v4 ; GFX10-NEXT: s_cbranch_vccz .LBB9_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else20 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v5, 0x7fff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v4 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc_lo @@ -5704,7 +5704,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB9_16 ; GFX10-NEXT: .LBB9_10: ; GFX10-NEXT: ; implicit-def: $vgpr5 -; GFX10-NEXT: .LBB9_11: ; %frem.compute19 +; GFX10-NEXT: .LBB9_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e32 v5, v6 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 ; GFX10-NEXT: v_ldexp_f32 v6, v5, 11 @@ -5731,10 +5731,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX10-NEXT: .LBB9_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -5750,7 +5750,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -5794,7 +5794,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_2 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 @@ -5805,7 +5805,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB9_8 ; GFX11-TRUE16-NEXT: .LBB9_2: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -5841,11 +5841,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -5865,7 +5865,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -5892,7 +5892,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_10 -; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 @@ -5903,7 +5903,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB9_16 ; GFX11-TRUE16-NEXT: .LBB9_10: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v6 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v6 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -5939,11 +5939,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v7 @@ -5963,7 +5963,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, -10, v9 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v7, v7, v9 @@ -6014,7 +6014,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_2 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX11-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, v0 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -6023,7 +6023,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB9_8 ; GFX11-FAKE16-NEXT: .LBB9_2: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -6059,11 +6059,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -6083,7 +6083,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -6109,7 +6109,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v7, v5 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_10 -; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX11-FAKE16-NEXT: v_bfi_b32 v6, 0x7fff, 0, v3 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v7, v5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -6118,7 +6118,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB9_16 ; GFX11-FAKE16-NEXT: .LBB9_10: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 -; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v7 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v7 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) @@ -6154,11 +6154,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v7 @@ -6178,7 +6178,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, -10, v9 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v7, v7, v9 @@ -6232,7 +6232,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s6, s5 @@ -6244,7 +6244,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB9_8 ; GFX1150-TRUE16-NEXT: .LBB9_2: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6279,11 +6279,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s5, s6, s5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s5, s5, 11 -; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6305,7 +6305,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6335,7 +6335,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s7 @@ -6347,7 +6347,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB9_16 ; GFX1150-TRUE16-NEXT: .LBB9_10: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6382,11 +6382,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s7, s8, s7 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s7, s7, 11 -; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6408,7 +6408,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6471,7 +6471,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s6, s5 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6481,7 +6481,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB9_8 ; GFX1150-FAKE16-NEXT: .LBB9_2: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6516,11 +6516,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s5, s6, s5 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s5, s5, 11 -; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6542,7 +6542,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6571,7 +6571,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s7 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6581,7 +6581,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB9_16 ; GFX1150-FAKE16-NEXT: .LBB9_10: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6616,11 +6616,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s7, s8, s7 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s7, s7, 11 -; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6642,7 +6642,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6702,7 +6702,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s6, s5 @@ -6714,7 +6714,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB9_8 ; GFX1200-TRUE16-NEXT: .LBB9_2: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6749,11 +6749,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s5, s6, s5 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s5, s5, 11 -; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6777,7 +6777,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6811,7 +6811,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s7 @@ -6823,7 +6823,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB9_16 ; GFX1200-TRUE16-NEXT: .LBB9_10: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6859,11 +6859,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s7, s8, s7 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s7, s7, 11 -; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6887,7 +6887,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6952,7 +6952,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s6, s5 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6962,7 +6962,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB9_8 ; GFX1200-FAKE16-NEXT: .LBB9_2: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6998,11 +6998,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s5, s6, s5 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s5, s5, 11 -; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -7026,7 +7026,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -7059,7 +7059,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s7 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -7070,7 +7070,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB9_16 ; GFX1200-FAKE16-NEXT: .LBB9_10: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -7106,11 +7106,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s7, s8, s7 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s7, s7, 11 -; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -7134,7 +7134,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -7220,7 +7220,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: v_cvt_f16_f32_e32 v8, v6 ; SI-NEXT: s_cbranch_vccz .LBB10_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else86 ; SI-NEXT: v_bfi_b32 v11, s0, 0, v6 ; SI-NEXT: v_cvt_f32_f16_e32 v8, v8 ; SI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v10 @@ -7231,7 +7231,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_2: ; SI-NEXT: ; implicit-def: $vgpr8 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_3: ; %frem.compute +; SI-NEXT: .LBB10_3: ; %frem.compute85 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v9 @@ -7267,10 +7267,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_5: ; %frem.loop_body +; SI-NEXT: .LBB10_5: ; %frem.loop_body93 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v11, v9 ; SI-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -7285,7 +7285,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_5 ; SI-NEXT: ; %bb.6: ; %Flow133 ; SI-NEXT: v_mov_b32_e32 v9, v11 -; SI-NEXT: .LBB10_7: ; %frem.loop_exit +; SI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v9, v9, s1 ; SI-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -7306,7 +7306,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v11, |v11| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v11 ; SI-NEXT: s_cbranch_vccz .LBB10_10 -; SI-NEXT: ; %bb.9: ; %frem.else20 +; SI-NEXT: ; %bb.9: ; %frem.else53 ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v12, s0, 0, v4 ; SI-NEXT: v_cvt_f32_f16_e32 v9, v9 @@ -7318,7 +7318,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_10: ; SI-NEXT: ; implicit-def: $vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_11: ; %frem.compute19 +; SI-NEXT: .LBB10_11: ; %frem.compute52 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v10 @@ -7354,10 +7354,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_13: ; %frem.loop_body27 +; SI-NEXT: .LBB10_13: ; %frem.loop_body60 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v12, v10 ; SI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -7372,7 +7372,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_13 ; SI-NEXT: ; %bb.14: ; %Flow129 ; SI-NEXT: v_mov_b32_e32 v10, v12 -; SI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; SI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v10, v10, s1 ; SI-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -7393,7 +7393,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v12, |v12| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v12 ; SI-NEXT: s_cbranch_vccz .LBB10_18 -; SI-NEXT: ; %bb.17: ; %frem.else53 +; SI-NEXT: ; %bb.17: ; %frem.else20 ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v13, s0, 0, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v10, v10 @@ -7405,7 +7405,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_18: ; SI-NEXT: ; implicit-def: $vgpr10 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_19: ; %frem.compute52 +; SI-NEXT: .LBB10_19: ; %frem.compute19 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v11|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v11 @@ -7441,10 +7441,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_23 -; SI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; SI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_21: ; %frem.loop_body60 +; SI-NEXT: .LBB10_21: ; %frem.loop_body27 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v13, v11 ; SI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -7459,7 +7459,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_21 ; SI-NEXT: ; %bb.22: ; %Flow125 ; SI-NEXT: v_mov_b32_e32 v11, v13 -; SI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; SI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v11, v11, s1 ; SI-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -7480,7 +7480,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v13, |v13| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v13 ; SI-NEXT: s_cbranch_vccz .LBB10_26 -; SI-NEXT: ; %bb.25: ; %frem.else86 +; SI-NEXT: ; %bb.25: ; %frem.else ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v14, s0, 0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v11, v11 @@ -7492,7 +7492,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_26: ; SI-NEXT: ; implicit-def: $vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_27: ; %frem.compute85 +; SI-NEXT: .LBB10_27: ; %frem.compute ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v12|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v12 @@ -7528,10 +7528,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_31 -; SI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_29: ; %frem.loop_body93 +; SI-NEXT: .LBB10_29: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v14, v12 ; SI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -7546,7 +7546,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_29 ; SI-NEXT: ; %bb.30: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v14 -; SI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; SI-NEXT: .LBB10_31: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v12, v12, s1 ; SI-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -7650,7 +7650,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_and_b32_e32 v9, 0x7fffffff, v7 ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB10_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else86 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 ; CI-NEXT: v_bfi_b32 v11, s0, 0, v6 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9 @@ -7659,7 +7659,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_8 ; CI-NEXT: .LBB10_2: ; CI-NEXT: ; implicit-def: $vgpr8 -; CI-NEXT: .LBB10_3: ; %frem.compute +; CI-NEXT: .LBB10_3: ; %frem.compute85 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10 ; CI-NEXT: v_frexp_mant_f32_e32 v8, v10 ; CI-NEXT: v_frexp_mant_f32_e32 v10, v9 @@ -7684,10 +7684,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10 ; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14 ; CI-NEXT: v_add_i32_e32 v10, vcc, 11, v10 -; CI-NEXT: .LBB10_5: ; %frem.loop_body +; CI-NEXT: .LBB10_5: ; %frem.loop_body93 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v13, v11 ; CI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -7702,7 +7702,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_5 ; CI-NEXT: ; %bb.6: ; %Flow133 ; CI-NEXT: v_mov_b32_e32 v11, v13 -; CI-NEXT: .LBB10_7: ; %frem.loop_exit +; CI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; CI-NEXT: v_add_i32_e32 v10, vcc, -10, v10 ; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10 ; CI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -7723,7 +7723,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v10, |v10| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10 ; CI-NEXT: s_cbranch_vccz .LBB10_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else53 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v12, s0, 0, v4 @@ -7733,7 +7733,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_16 ; CI-NEXT: .LBB10_10: ; CI-NEXT: ; implicit-def: $vgpr9 -; CI-NEXT: .LBB10_11: ; %frem.compute19 +; CI-NEXT: .LBB10_11: ; %frem.compute52 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v14, v11 ; CI-NEXT: v_frexp_mant_f32_e32 v9, v11 ; CI-NEXT: v_frexp_mant_f32_e32 v11, v10 @@ -7758,10 +7758,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11 ; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v11, vcc, 11, v11 -; CI-NEXT: .LBB10_13: ; %frem.loop_body27 +; CI-NEXT: .LBB10_13: ; %frem.loop_body60 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v14, v12 ; CI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -7776,7 +7776,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_13 ; CI-NEXT: ; %bb.14: ; %Flow129 ; CI-NEXT: v_mov_b32_e32 v12, v14 -; CI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; CI-NEXT: v_add_i32_e32 v11, vcc, -10, v11 ; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11 ; CI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -7797,7 +7797,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v11, |v11| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v11 ; CI-NEXT: s_cbranch_vccz .LBB10_18 -; CI-NEXT: ; %bb.17: ; %frem.else53 +; CI-NEXT: ; %bb.17: ; %frem.else20 ; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v13, s0, 0, v2 @@ -7807,7 +7807,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_24 ; CI-NEXT: .LBB10_18: ; CI-NEXT: ; implicit-def: $vgpr10 -; CI-NEXT: .LBB10_19: ; %frem.compute52 +; CI-NEXT: .LBB10_19: ; %frem.compute19 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v15, v12 ; CI-NEXT: v_frexp_mant_f32_e32 v10, v12 ; CI-NEXT: v_frexp_mant_f32_e32 v12, v11 @@ -7832,10 +7832,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v12 ; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_23 -; CI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16 ; CI-NEXT: v_add_i32_e32 v12, vcc, 11, v12 -; CI-NEXT: .LBB10_21: ; %frem.loop_body60 +; CI-NEXT: .LBB10_21: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v13 ; CI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -7850,7 +7850,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_21 ; CI-NEXT: ; %bb.22: ; %Flow125 ; CI-NEXT: v_mov_b32_e32 v13, v15 -; CI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; CI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v12, vcc, -10, v12 ; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12 ; CI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -7871,7 +7871,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v12, |v12| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12 ; CI-NEXT: s_cbranch_vccz .LBB10_26 -; CI-NEXT: ; %bb.25: ; %frem.else86 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v14, s0, 0, v0 @@ -7881,7 +7881,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_32 ; CI-NEXT: .LBB10_26: ; CI-NEXT: ; implicit-def: $vgpr11 -; CI-NEXT: .LBB10_27: ; %frem.compute85 +; CI-NEXT: .LBB10_27: ; %frem.compute ; CI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13 ; CI-NEXT: v_frexp_mant_f32_e32 v11, v13 ; CI-NEXT: v_frexp_mant_f32_e32 v13, v12 @@ -7906,10 +7906,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13 ; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_31 -; CI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v13, vcc, 11, v13 -; CI-NEXT: .LBB10_29: ; %frem.loop_body93 +; CI-NEXT: .LBB10_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v16, v14 ; CI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -7924,7 +7924,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_29 ; CI-NEXT: ; %bb.30: ; %Flow ; CI-NEXT: v_mov_b32_e32 v14, v16 -; CI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; CI-NEXT: .LBB10_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v13, vcc, -10, v13 ; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13 ; CI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -8013,7 +8013,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v5, |v2| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; VI-NEXT: s_cbranch_vccz .LBB10_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else86 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -8022,7 +8022,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_8 ; VI-NEXT: .LBB10_2: ; VI-NEXT: ; implicit-def: $vgpr4 -; VI-NEXT: .LBB10_3: ; %frem.compute +; VI-NEXT: .LBB10_3: ; %frem.compute85 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; VI-NEXT: v_frexp_mant_f32_e32 v4, v6 ; VI-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -8047,10 +8047,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10 ; VI-NEXT: v_add_u32_e32 v6, vcc, 11, v6 -; VI-NEXT: .LBB10_5: ; %frem.loop_body +; VI-NEXT: .LBB10_5: ; %frem.loop_body93 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -8065,7 +8065,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_5 ; VI-NEXT: ; %bb.6: ; %Flow133 ; VI-NEXT: v_mov_b32_e32 v7, v9 -; VI-NEXT: .LBB10_7: ; %frem.loop_exit +; VI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; VI-NEXT: v_add_u32_e32 v6, vcc, -10, v6 ; VI-NEXT: v_ldexp_f32 v6, v7, v6 ; VI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -8085,7 +8085,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v8, |v6| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8 ; VI-NEXT: s_cbranch_vccz .LBB10_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else53 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v7, s2, 0, v5 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8 @@ -8094,7 +8094,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_16 ; VI-NEXT: .LBB10_10: ; VI-NEXT: ; implicit-def: $vgpr7 -; VI-NEXT: .LBB10_11: ; %frem.compute19 +; VI-NEXT: .LBB10_11: ; %frem.compute52 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v12, v9 ; VI-NEXT: v_frexp_mant_f32_e32 v7, v9 ; VI-NEXT: v_frexp_mant_f32_e32 v9, v8 @@ -8119,10 +8119,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9 ; VI-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; VI-NEXT: v_sub_u32_e32 v9, vcc, v12, v13 ; VI-NEXT: v_add_u32_e32 v9, vcc, 11, v9 -; VI-NEXT: .LBB10_13: ; %frem.loop_body27 +; VI-NEXT: .LBB10_13: ; %frem.loop_body60 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v12, v10 ; VI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -8137,7 +8137,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_13 ; VI-NEXT: ; %bb.14: ; %Flow129 ; VI-NEXT: v_mov_b32_e32 v10, v12 -; VI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; VI-NEXT: v_add_u32_e32 v9, vcc, -10, v9 ; VI-NEXT: v_ldexp_f32 v9, v10, v9 ; VI-NEXT: v_mul_f32_e32 v10, v9, v11 @@ -8155,7 +8155,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v9, |v3| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v9 ; VI-NEXT: s_cbranch_vccz .LBB10_18 -; VI-NEXT: ; %bb.17: ; %frem.else53 +; VI-NEXT: ; %bb.17: ; %frem.else20 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v8, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9 @@ -8164,7 +8164,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_24 ; VI-NEXT: .LBB10_18: ; VI-NEXT: ; implicit-def: $vgpr8 -; VI-NEXT: .LBB10_19: ; %frem.compute52 +; VI-NEXT: .LBB10_19: ; %frem.compute19 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10 ; VI-NEXT: v_frexp_mant_f32_e32 v8, v10 ; VI-NEXT: v_frexp_mant_f32_e32 v10, v9 @@ -8189,10 +8189,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10 ; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_23 -; VI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14 ; VI-NEXT: v_add_u32_e32 v10, vcc, 11, v10 -; VI-NEXT: .LBB10_21: ; %frem.loop_body60 +; VI-NEXT: .LBB10_21: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v13, v11 ; VI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -8207,7 +8207,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_21 ; VI-NEXT: ; %bb.22: ; %Flow125 ; VI-NEXT: v_mov_b32_e32 v11, v13 -; VI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; VI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v10, vcc, -10, v10 ; VI-NEXT: v_ldexp_f32 v10, v11, v10 ; VI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -8227,7 +8227,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v12, |v10| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12 ; VI-NEXT: s_cbranch_vccz .LBB10_26 -; VI-NEXT: ; %bb.25: ; %frem.else86 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v11, s2, 0, v9 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v13, v12 @@ -8236,7 +8236,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_32 ; VI-NEXT: .LBB10_26: ; VI-NEXT: ; implicit-def: $vgpr11 -; VI-NEXT: .LBB10_27: ; %frem.compute85 +; VI-NEXT: .LBB10_27: ; %frem.compute ; VI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13 ; VI-NEXT: v_frexp_mant_f32_e32 v11, v13 ; VI-NEXT: v_frexp_mant_f32_e32 v13, v12 @@ -8261,10 +8261,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13 ; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_31 -; VI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v13, vcc, 11, v13 -; VI-NEXT: .LBB10_29: ; %frem.loop_body93 +; VI-NEXT: .LBB10_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v16, v14 ; VI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -8279,7 +8279,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_29 ; VI-NEXT: ; %bb.30: ; %Flow ; VI-NEXT: v_mov_b32_e32 v14, v16 -; VI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; VI-NEXT: .LBB10_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v13, vcc, -10, v13 ; VI-NEXT: v_ldexp_f32 v13, v14, v13 ; VI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -8332,7 +8332,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v5, |v0| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; GFX9-NEXT: s_cbranch_vccz .LBB10_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else86 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v2 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -8341,7 +8341,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_8 ; GFX9-NEXT: .LBB10_2: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB10_3: ; %frem.compute +; GFX9-NEXT: .LBB10_3: ; %frem.compute85 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -8366,10 +8366,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 11, v6 -; GFX9-NEXT: .LBB10_5: ; %frem.loop_body +; GFX9-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -8384,7 +8384,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_5 ; GFX9-NEXT: ; %bb.6: ; %Flow133 ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX9-NEXT: v_add_u32_e32 v6, -10, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -8403,7 +8403,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v7, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v8, v7 ; GFX9-NEXT: s_cbranch_vccz .LBB10_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else20 +; GFX9-NEXT: ; %bb.9: ; %frem.else53 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v6, s2, 0, v5 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v8, v7 @@ -8412,7 +8412,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_16 ; GFX9-NEXT: .LBB10_10: ; GFX9-NEXT: ; implicit-def: $vgpr6 -; GFX9-NEXT: .LBB10_11: ; %frem.compute19 +; GFX9-NEXT: .LBB10_11: ; %frem.compute52 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v11, v8 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v8 ; GFX9-NEXT: v_frexp_mant_f32_e32 v8, v7 @@ -8437,10 +8437,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v8 ; GFX9-NEXT: v_div_fixup_f32 v10, v10, v7, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX9-NEXT: v_sub_u32_e32 v8, v11, v12 ; GFX9-NEXT: v_add_u32_e32 v8, 11, v8 -; GFX9-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX9-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v11, v9 ; GFX9-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -8455,7 +8455,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_13 ; GFX9-NEXT: ; %bb.14: ; %Flow129 ; GFX9-NEXT: v_mov_b32_e32 v9, v11 -; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX9-NEXT: v_add_u32_e32 v8, -10, v8 ; GFX9-NEXT: v_ldexp_f32 v8, v9, v8 ; GFX9-NEXT: v_mul_f32_e32 v9, v8, v10 @@ -8473,7 +8473,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v8, |v1| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8 ; GFX9-NEXT: s_cbranch_vccz .LBB10_18 -; GFX9-NEXT: ; %bb.17: ; %frem.else53 +; GFX9-NEXT: ; %bb.17: ; %frem.else20 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v7, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8 @@ -8482,7 +8482,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_24 ; GFX9-NEXT: .LBB10_18: ; GFX9-NEXT: ; implicit-def: $vgpr7 -; GFX9-NEXT: .LBB10_19: ; %frem.compute52 +; GFX9-NEXT: .LBB10_19: ; %frem.compute19 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v12, v9 ; GFX9-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v8 @@ -8507,10 +8507,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9 ; GFX9-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX9-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX9-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX9-NEXT: v_sub_u32_e32 v9, v12, v13 ; GFX9-NEXT: v_add_u32_e32 v9, 11, v9 -; GFX9-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX9-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v12, v10 ; GFX9-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -8525,7 +8525,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_21 ; GFX9-NEXT: ; %bb.22: ; %Flow125 ; GFX9-NEXT: v_mov_b32_e32 v10, v12 -; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX9-NEXT: v_add_u32_e32 v9, -10, v9 ; GFX9-NEXT: v_ldexp_f32 v9, v10, v9 ; GFX9-NEXT: v_mul_f32_e32 v10, v9, v11 @@ -8544,7 +8544,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v10, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10 ; GFX9-NEXT: s_cbranch_vccz .LBB10_26 -; GFX9-NEXT: ; %bb.25: ; %frem.else86 +; GFX9-NEXT: ; %bb.25: ; %frem.else ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v8 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v11, v10 @@ -8553,7 +8553,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_32 ; GFX9-NEXT: .LBB10_26: ; GFX9-NEXT: ; implicit-def: $vgpr9 -; GFX9-NEXT: .LBB10_27: ; %frem.compute85 +; GFX9-NEXT: .LBB10_27: ; %frem.compute ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v14, v11 ; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v11 ; GFX9-NEXT: v_frexp_mant_f32_e32 v11, v10 @@ -8578,10 +8578,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11 ; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX9-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v11, 11, v11 -; GFX9-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX9-NEXT: .LBB10_29: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v14, v12 ; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -8596,7 +8596,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_29 ; GFX9-NEXT: ; %bb.30: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v12, v14 -; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v11, -10, v11 ; GFX9-NEXT: v_ldexp_f32 v11, v12, v11 ; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -8652,7 +8652,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v5, |v0| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX10-NEXT: s_cbranch_vccz .LBB10_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else86 ; GFX10-NEXT: v_bfi_b32 v4, 0x7fff, 0, v2 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc_lo @@ -8660,7 +8660,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_8 ; GFX10-NEXT: .LBB10_2: ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: .LBB10_3: ; %frem.compute +; GFX10-NEXT: .LBB10_3: ; %frem.compute85 ; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX10-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -8687,10 +8687,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_5: ; %frem.loop_body +; GFX10-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8706,7 +8706,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow133 ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -8724,7 +8724,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v5| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v8, v7 ; GFX10-NEXT: s_cbranch_vccz .LBB10_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else20 +; GFX10-NEXT: ; %bb.9: ; %frem.else53 ; GFX10-NEXT: v_bfi_b32 v6, 0x7fff, 0, v5 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v8, v7 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc_lo @@ -8732,7 +8732,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_16 ; GFX10-NEXT: .LBB10_10: ; GFX10-NEXT: ; implicit-def: $vgpr6 -; GFX10-NEXT: .LBB10_11: ; %frem.compute19 +; GFX10-NEXT: .LBB10_11: ; %frem.compute52 ; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v8 ; GFX10-NEXT: v_frexp_mant_f32_e32 v10, v7 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v9, v8 @@ -8759,10 +8759,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v10 ; GFX10-NEXT: v_div_fixup_f32 v9, v9, v7, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX10-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v11, v8 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8778,7 +8778,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow129 ; GFX10-NEXT: v_mov_b32_e32 v10, s2 ; GFX10-NEXT: v_mov_b32_e32 v8, v11 -; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX10-NEXT: v_add_nc_u32_e32 v10, -10, v10 ; GFX10-NEXT: v_ldexp_f32 v8, v8, v10 ; GFX10-NEXT: v_mul_f32_e32 v9, v8, v9 @@ -8795,7 +8795,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v1| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX10-NEXT: s_cbranch_vccz .LBB10_18 -; GFX10-NEXT: ; %bb.17: ; %frem.else53 +; GFX10-NEXT: ; %bb.17: ; %frem.else20 ; GFX10-NEXT: v_bfi_b32 v7, 0x7fff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v3, v7, vcc_lo @@ -8803,7 +8803,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_24 ; GFX10-NEXT: .LBB10_18: ; GFX10-NEXT: ; implicit-def: $vgpr7 -; GFX10-NEXT: .LBB10_19: ; %frem.compute52 +; GFX10-NEXT: .LBB10_19: ; %frem.compute19 ; GFX10-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX10-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -8830,10 +8830,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v11 ; GFX10-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX10-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX10-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX10-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v12, v9 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8849,7 +8849,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.22: ; %Flow125 ; GFX10-NEXT: v_mov_b32_e32 v11, s2 ; GFX10-NEXT: v_mov_b32_e32 v9, v12 -; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX10-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX10-NEXT: v_ldexp_f32 v9, v9, v11 ; GFX10-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -8867,7 +8867,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v11, |v8| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v11, v10 ; GFX10-NEXT: s_cbranch_vccz .LBB10_26 -; GFX10-NEXT: ; %bb.25: ; %frem.else86 +; GFX10-NEXT: ; %bb.25: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v9, 0x7fff, 0, v8 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v11, v10 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc_lo @@ -8875,7 +8875,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_32 ; GFX10-NEXT: .LBB10_26: ; GFX10-NEXT: ; implicit-def: $vgpr9 -; GFX10-NEXT: .LBB10_27: ; %frem.compute85 +; GFX10-NEXT: .LBB10_27: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e32 v9, v11 ; GFX10-NEXT: v_frexp_mant_f32_e32 v13, v10 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v11 @@ -8902,10 +8902,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v13 ; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX10-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX10-NEXT: .LBB10_29: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v14, v11 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8921,7 +8921,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.30: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v13, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v14 -; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v13, -10, v13 ; GFX10-NEXT: v_ldexp_f32 v11, v11, v13 ; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -8975,7 +8975,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_2 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 @@ -8986,7 +8986,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_8 ; GFX11-TRUE16-NEXT: .LBB10_2: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -9022,11 +9022,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v6 @@ -9046,7 +9046,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v6, v6, v8 @@ -9073,7 +9073,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_10 -; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 @@ -9084,7 +9084,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_16 ; GFX11-TRUE16-NEXT: .LBB10_10: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -9120,11 +9120,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, v9 @@ -9144,7 +9144,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v11, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v12 -; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v9, v9, v11 @@ -9168,7 +9168,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_18 -; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9 @@ -9179,7 +9179,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_24 ; GFX11-TRUE16-NEXT: .LBB10_18: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 -; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v10 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v12, v9 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10 @@ -9215,11 +9215,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v10 @@ -9239,7 +9239,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, -10, v12 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v10, v10, v12 @@ -9266,7 +9266,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_26 -; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12 @@ -9277,7 +9277,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_32 ; GFX11-TRUE16-NEXT: .LBB10_26: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 -; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v13 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v15, v12 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13 @@ -9313,11 +9313,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v16, v13 @@ -9337,7 +9337,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, -10, v15 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v13, v13, v15 @@ -9400,7 +9400,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_2 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX11-FAKE16-NEXT: v_bfi_b32 v4, 0x7fff, 0, v0 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9409,7 +9409,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_8 ; GFX11-FAKE16-NEXT: .LBB10_2: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -9445,11 +9445,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v6 @@ -9469,7 +9469,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v6, v6, v8 @@ -9495,7 +9495,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_10 -; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX11-FAKE16-NEXT: v_bfi_b32 v7, 0x7fff, 0, v5 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9504,7 +9504,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_16 ; GFX11-FAKE16-NEXT: .LBB10_10: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -9540,11 +9540,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, v9 @@ -9564,7 +9564,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v11, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v12 -; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v9, v9, v11 @@ -9587,7 +9587,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_18 -; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX11-FAKE16-NEXT: v_bfi_b32 v8, 0x7fff, 0, v1 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9596,7 +9596,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_24 ; GFX11-FAKE16-NEXT: .LBB10_18: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 -; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v10 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v12, v9 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10 @@ -9632,11 +9632,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v10 @@ -9656,7 +9656,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, -10, v12 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v10, v10, v12 @@ -9682,7 +9682,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_26 -; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX11-FAKE16-NEXT: v_bfi_b32 v11, 0x7fff, 0, v9 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9691,7 +9691,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_32 ; GFX11-FAKE16-NEXT: .LBB10_26: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 -; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v13 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v15, v12 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13 @@ -9727,11 +9727,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v16, v13 @@ -9751,7 +9751,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, -10, v15 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v13, v13, v15 @@ -9816,7 +9816,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s6 @@ -9828,7 +9828,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_8 ; GFX1150-TRUE16-NEXT: .LBB10_2: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -9863,11 +9863,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s6, s8, s6 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s6, s6, 11 -; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -9889,7 +9889,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -9919,7 +9919,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -9931,7 +9931,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_16 ; GFX1150-TRUE16-NEXT: .LBB10_10: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -9966,11 +9966,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -9992,7 +9992,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10020,7 +10020,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10032,7 +10032,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_24 ; GFX1150-TRUE16-NEXT: .LBB10_18: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10067,11 +10067,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -10093,7 +10093,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -10123,7 +10123,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s12, s11 @@ -10135,7 +10135,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_32 ; GFX1150-TRUE16-NEXT: .LBB10_26: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3 -; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -10170,11 +10170,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s11, s12, s11 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s11, s11, 11 -; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v8, v5 @@ -10196,7 +10196,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -10277,7 +10277,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s6 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10287,7 +10287,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_8 ; GFX1150-FAKE16-NEXT: .LBB10_2: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -10322,11 +10322,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s6, s8, s6 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s6, s6, 11 -; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -10348,7 +10348,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -10377,7 +10377,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10387,7 +10387,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_16 ; GFX1150-FAKE16-NEXT: .LBB10_10: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -10422,11 +10422,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -10448,7 +10448,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10475,7 +10475,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10485,7 +10485,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_24 ; GFX1150-FAKE16-NEXT: .LBB10_18: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10520,11 +10520,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -10546,7 +10546,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -10575,7 +10575,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s12, s11 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10585,7 +10585,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_32 ; GFX1150-FAKE16-NEXT: .LBB10_26: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr3 -; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -10620,11 +10620,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s11, s12, s11 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s11, s11, 11 -; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v8, v5 @@ -10646,7 +10646,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -10724,7 +10724,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s6 @@ -10736,7 +10736,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_8 ; GFX1200-TRUE16-NEXT: .LBB10_2: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -10771,11 +10771,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s6, s8, s6 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s6, s6, 11 -; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -10799,7 +10799,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -10833,7 +10833,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10845,7 +10845,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_16 ; GFX1200-TRUE16-NEXT: .LBB10_10: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -10881,11 +10881,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -10909,7 +10909,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10940,7 +10940,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10953,7 +10953,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_24 ; GFX1200-TRUE16-NEXT: .LBB10_18: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10989,11 +10989,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -11017,7 +11017,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -11051,7 +11051,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s12, s11 @@ -11063,7 +11063,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_32 ; GFX1200-TRUE16-NEXT: .LBB10_26: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3 -; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -11099,11 +11099,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s11, s12, s11 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s11, s11, 11 -; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v8, v5 @@ -11127,7 +11127,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -11215,7 +11215,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s6 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11225,7 +11225,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_8 ; GFX1200-FAKE16-NEXT: .LBB10_2: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -11261,11 +11261,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s6, s8, s6 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s6, s6, 11 -; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -11289,7 +11289,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -11322,7 +11322,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11333,7 +11333,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_16 ; GFX1200-FAKE16-NEXT: .LBB10_10: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -11369,11 +11369,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -11397,7 +11397,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -11427,7 +11427,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11438,7 +11438,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_24 ; GFX1200-FAKE16-NEXT: .LBB10_18: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -11474,11 +11474,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -11502,7 +11502,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -11535,7 +11535,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s12, s11 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11546,7 +11546,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_32 ; GFX1200-FAKE16-NEXT: .LBB10_26: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr3 -; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -11582,11 +11582,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s11, s12, s11 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s11, s11, 11 -; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v8, v5 @@ -11610,7 +11610,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -11698,7 +11698,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB11_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else16 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -11709,7 +11709,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB11_2: ; SI-NEXT: ; implicit-def: $vgpr4 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB11_3: ; %frem.compute +; SI-NEXT: .LBB11_3: ; %frem.compute15 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 @@ -11745,10 +11745,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB11_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB11_5: ; %frem.loop_body +; SI-NEXT: .LBB11_5: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v7, v5 ; SI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -11763,7 +11763,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB11_5 ; SI-NEXT: ; %bb.6: ; %Flow51 ; SI-NEXT: v_mov_b32_e32 v5, v7 -; SI-NEXT: .LBB11_7: ; %frem.loop_exit +; SI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v5, v5, s3 ; SI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -11779,7 +11779,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB11_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -11790,7 +11790,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB11_10: ; SI-NEXT: ; implicit-def: $vgpr5 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB11_11: ; %frem.compute15 +; SI-NEXT: .LBB11_11: ; %frem.compute ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v1 @@ -11826,10 +11826,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB11_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB11_13: ; %frem.loop_body23 +; SI-NEXT: .LBB11_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v8, v6 ; SI-NEXT: v_mul_f32_e32 v6, v8, v7 @@ -11844,7 +11844,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB11_13 ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB11_15: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v6, v6, s3 ; SI-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -11889,7 +11889,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB11_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -11898,7 +11898,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_8 ; CI-NEXT: .LBB11_2: ; CI-NEXT: ; implicit-def: $vgpr4 -; CI-NEXT: .LBB11_3: ; %frem.compute +; CI-NEXT: .LBB11_3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; CI-NEXT: v_ldexp_f32_e64 v5, v5, 1 ; CI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -11923,10 +11923,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10 ; CI-NEXT: v_add_i32_e32 v6, vcc, 12, v6 -; CI-NEXT: .LBB11_5: ; %frem.loop_body +; CI-NEXT: .LBB11_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -11941,7 +11941,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB11_5 ; CI-NEXT: ; %bb.6: ; %Flow51 ; CI-NEXT: v_mov_b32_e32 v7, v9 -; CI-NEXT: .LBB11_7: ; %frem.loop_exit +; CI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v6, vcc, -11, v6 ; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6 ; CI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -11957,7 +11957,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB11_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -11966,7 +11966,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_16 ; CI-NEXT: .LBB11_10: ; CI-NEXT: ; implicit-def: $vgpr5 -; CI-NEXT: .LBB11_11: ; %frem.compute15 +; CI-NEXT: .LBB11_11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; CI-NEXT: v_ldexp_f32_e64 v6, v6, 1 ; CI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -11991,10 +11991,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11 ; CI-NEXT: v_add_i32_e32 v7, vcc, 12, v7 -; CI-NEXT: .LBB11_13: ; %frem.loop_body23 +; CI-NEXT: .LBB11_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v10, v8 ; CI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -12009,7 +12009,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB11_13 ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v8, v10 -; CI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB11_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v7, vcc, -11, v7 ; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7 ; CI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12054,7 +12054,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB11_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -12063,7 +12063,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_8 ; VI-NEXT: .LBB11_2: ; VI-NEXT: ; implicit-def: $vgpr4 -; VI-NEXT: .LBB11_3: ; %frem.compute +; VI-NEXT: .LBB11_3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; VI-NEXT: v_ldexp_f32 v5, v5, 1 ; VI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -12088,10 +12088,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10 ; VI-NEXT: v_add_u32_e32 v6, vcc, 12, v6 -; VI-NEXT: .LBB11_5: ; %frem.loop_body +; VI-NEXT: .LBB11_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -12106,7 +12106,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB11_5 ; VI-NEXT: ; %bb.6: ; %Flow51 ; VI-NEXT: v_mov_b32_e32 v7, v9 -; VI-NEXT: .LBB11_7: ; %frem.loop_exit +; VI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v6, vcc, -11, v6 ; VI-NEXT: v_ldexp_f32 v6, v7, v6 ; VI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -12122,7 +12122,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB11_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -12131,7 +12131,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_16 ; VI-NEXT: .LBB11_10: ; VI-NEXT: ; implicit-def: $vgpr5 -; VI-NEXT: .LBB11_11: ; %frem.compute15 +; VI-NEXT: .LBB11_11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; VI-NEXT: v_ldexp_f32 v6, v6, 1 ; VI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -12156,10 +12156,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11 ; VI-NEXT: v_add_u32_e32 v7, vcc, 12, v7 -; VI-NEXT: .LBB11_13: ; %frem.loop_body23 +; VI-NEXT: .LBB11_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v10, v8 ; VI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -12174,7 +12174,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB11_13 ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v8, v10 -; VI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB11_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v7, vcc, -11, v7 ; VI-NEXT: v_ldexp_f32 v7, v8, v7 ; VI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12214,7 +12214,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB11_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else16 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v0 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -12223,7 +12223,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB11_8 ; GFX9-NEXT: .LBB11_2: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB11_3: ; %frem.compute +; GFX9-NEXT: .LBB11_3: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX9-NEXT: v_ldexp_f32 v5, v5, 1 ; GFX9-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -12248,10 +12248,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 12, v6 -; GFX9-NEXT: .LBB11_5: ; %frem.loop_body +; GFX9-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -12266,7 +12266,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB11_5 ; GFX9-NEXT: ; %bb.6: ; %Flow51 ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX9-NEXT: v_add_u32_e32 v6, -11, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -12282,7 +12282,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB11_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v5, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -12291,7 +12291,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB11_16 ; GFX9-NEXT: .LBB11_10: ; GFX9-NEXT: ; implicit-def: $vgpr5 -; GFX9-NEXT: .LBB11_11: ; %frem.compute15 +; GFX9-NEXT: .LBB11_11: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX9-NEXT: v_ldexp_f32 v6, v6, 1 ; GFX9-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -12316,10 +12316,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; GFX9-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v7, v10, v11 ; GFX9-NEXT: v_add_u32_e32 v7, 12, v7 -; GFX9-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB11_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v10, v8 ; GFX9-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -12334,7 +12334,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB11_13 ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v8, v10 -; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v7, -11, v7 ; GFX9-NEXT: v_ldexp_f32 v7, v8, v7 ; GFX9-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12375,7 +12375,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB11_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else16 ; GFX10-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2| ; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc_lo @@ -12383,7 +12383,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB11_8 ; GFX10-NEXT: .LBB11_2: ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: .LBB11_3: ; %frem.compute +; GFX10-NEXT: .LBB11_3: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX10-NEXT: v_frexp_mant_f32_e64 v4, |v0| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v0 @@ -12410,10 +12410,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB11_5: ; %frem.loop_body +; GFX10-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -12429,7 +12429,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow51 ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX10-NEXT: v_add_nc_u32_e32 v8, -11, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -12444,7 +12444,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB11_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3| ; GFX10-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc_lo @@ -12452,7 +12452,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB11_16 ; GFX10-NEXT: .LBB11_10: ; GFX10-NEXT: ; implicit-def: $vgpr5 -; GFX10-NEXT: .LBB11_11: ; %frem.compute15 +; GFX10-NEXT: .LBB11_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v1| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v8, v1 @@ -12479,10 +12479,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v9 ; GFX10-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB11_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v10, v7 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -12498,7 +12498,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v9, s2 ; GFX10-NEXT: v_mov_b32_e32 v7, v10 -; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v9, -11, v9 ; GFX10-NEXT: v_ldexp_f32 v7, v7, v9 ; GFX10-NEXT: v_mul_f32_e32 v8, v7, v8 @@ -12536,7 +12536,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB11_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else16 ; GFX11-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -12545,7 +12545,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB11_8 ; GFX11-NEXT: .LBB11_2: ; GFX11-NEXT: ; implicit-def: $vgpr4 -; GFX11-NEXT: .LBB11_3: ; %frem.compute +; GFX11-NEXT: .LBB11_3: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX11-NEXT: v_frexp_mant_f32_e64 v4, |v0| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v7, v0 @@ -12581,11 +12581,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB11_5: ; %frem.loop_body +; GFX11-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v9, v6 @@ -12605,7 +12605,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow51 ; GFX11-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v8, -11, v8 ; GFX11-NEXT: v_ldexp_f32 v6, v6, v8 @@ -12625,7 +12625,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB11_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -12634,7 +12634,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB11_16 ; GFX11-NEXT: .LBB11_10: ; GFX11-NEXT: ; implicit-def: $vgpr5 -; GFX11-NEXT: .LBB11_11: ; %frem.compute15 +; GFX11-NEXT: .LBB11_11: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v1| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v8, v1 @@ -12670,11 +12670,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB11_13: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v10, v7 @@ -12694,7 +12694,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow ; GFX11-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v9, -11, v9 ; GFX11-NEXT: v_ldexp_f32 v7, v7, v9 @@ -12742,7 +12742,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s3, s8 ; GFX1150-NEXT: s_cbranch_scc0 .LBB11_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else16 ; GFX1150-NEXT: s_cmp_eq_f32 s3, s8 ; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12752,7 +12752,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB11_8 ; GFX1150-NEXT: .LBB11_2: ; GFX1150-NEXT: ; implicit-def: $vgpr0 -; GFX1150-NEXT: .LBB11_3: ; %frem.compute +; GFX1150-NEXT: .LBB11_3: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s6| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -12787,11 +12787,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s7, s7, s8 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s7, s7, 12 -; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v5, v2 @@ -12813,7 +12813,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow51 ; GFX1150-NEXT: v_mov_b32_e32 v4, s7 ; GFX1150-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4 @@ -12836,7 +12836,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s6, s8 ; GFX1150-NEXT: s_cbranch_scc0 .LBB11_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else ; GFX1150-NEXT: s_cmp_eq_f32 s6, s8 ; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12846,7 +12846,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB11_16 ; GFX1150-NEXT: .LBB11_10: ; GFX1150-NEXT: ; implicit-def: $vgpr1 -; GFX1150-NEXT: .LBB11_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB11_11: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s2| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s5| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s5 @@ -12881,11 +12881,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s7, s7, s8 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s7, s7, 12 -; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v6, v3 @@ -12907,7 +12907,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow ; GFX1150-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5 @@ -12962,7 +12962,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_cmp_ngt_f32 s3, s8 ; GFX1200-NEXT: s_cbranch_scc0 .LBB11_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else16 ; GFX1200-NEXT: s_cmp_eq_f32 s3, s8 ; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12972,7 +12972,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB11_8 ; GFX1200-NEXT: .LBB11_2: ; GFX1200-NEXT: ; implicit-def: $vgpr0 -; GFX1200-NEXT: .LBB11_3: ; %frem.compute +; GFX1200-NEXT: .LBB11_3: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s6| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -13008,11 +13008,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s7, s7, 12 -; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v5, v2 @@ -13036,7 +13036,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow51 ; GFX1200-NEXT: v_mov_b32_e32 v4, s7 ; GFX1200-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4 @@ -13060,7 +13060,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s6, s8 ; GFX1200-NEXT: s_cbranch_scc0 .LBB11_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else ; GFX1200-NEXT: s_cmp_eq_f32 s6, s8 ; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -13071,7 +13071,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB11_16 ; GFX1200-NEXT: .LBB11_10: ; GFX1200-NEXT: ; implicit-def: $vgpr1 -; GFX1200-NEXT: .LBB11_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB11_11: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s2| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s5| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s5 @@ -13107,11 +13107,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s7, s7, 12 -; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v6, v3 @@ -13135,7 +13135,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow ; GFX1200-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5 @@ -13199,7 +13199,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else78 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13210,7 +13210,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_2: ; SI-NEXT: ; implicit-def: $vgpr8 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_3: ; %frem.compute +; SI-NEXT: .LBB12_3: ; %frem.compute77 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v0 @@ -13246,10 +13246,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_5: ; %frem.loop_body +; SI-NEXT: .LBB12_5: ; %frem.loop_body85 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v11, v9 ; SI-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -13264,7 +13264,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_5 ; SI-NEXT: ; %bb.6: ; %Flow125 ; SI-NEXT: v_mov_b32_e32 v9, v11 -; SI-NEXT: .LBB12_7: ; %frem.loop_exit +; SI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v9, v9, s3 ; SI-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -13280,7 +13280,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else47 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13291,7 +13291,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_10: ; SI-NEXT: ; implicit-def: $vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_11: ; %frem.compute15 +; SI-NEXT: .LBB12_11: ; %frem.compute46 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v1 @@ -13327,10 +13327,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_13: ; %frem.loop_body23 +; SI-NEXT: .LBB12_13: ; %frem.loop_body54 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v12, v10 ; SI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -13345,7 +13345,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_13 ; SI-NEXT: ; %bb.14: ; %Flow121 ; SI-NEXT: v_mov_b32_e32 v10, v12 -; SI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v10, v10, s3 ; SI-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -13361,7 +13361,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_18 -; SI-NEXT: ; %bb.17: ; %frem.else47 +; SI-NEXT: ; %bb.17: ; %frem.else16 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -13372,7 +13372,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_18: ; SI-NEXT: ; implicit-def: $vgpr10 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_19: ; %frem.compute46 +; SI-NEXT: .LBB12_19: ; %frem.compute15 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v2 @@ -13408,10 +13408,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_23 -; SI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; SI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_21: ; %frem.loop_body54 +; SI-NEXT: .LBB12_21: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v13, v11 ; SI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13426,7 +13426,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_21 ; SI-NEXT: ; %bb.22: ; %Flow117 ; SI-NEXT: v_mov_b32_e32 v11, v13 -; SI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; SI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v11, v11, s3 ; SI-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -13442,7 +13442,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_26 -; SI-NEXT: ; %bb.25: ; %frem.else78 +; SI-NEXT: ; %bb.25: ; %frem.else ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -13453,7 +13453,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_26: ; SI-NEXT: ; implicit-def: $vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_27: ; %frem.compute77 +; SI-NEXT: .LBB12_27: ; %frem.compute ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v3 @@ -13489,10 +13489,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_31 -; SI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_29: ; %frem.loop_body85 +; SI-NEXT: .LBB12_29: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v14, v12 ; SI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13507,7 +13507,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_29 ; SI-NEXT: ; %bb.30: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v14 -; SI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; SI-NEXT: .LBB12_31: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v12, v12, s3 ; SI-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -13560,7 +13560,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else78 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13569,7 +13569,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_8 ; CI-NEXT: .LBB12_2: ; CI-NEXT: ; implicit-def: $vgpr8 -; CI-NEXT: .LBB12_3: ; %frem.compute +; CI-NEXT: .LBB12_3: ; %frem.compute77 ; CI-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; CI-NEXT: v_ldexp_f32_e64 v9, v9, 1 ; CI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -13594,10 +13594,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14 ; CI-NEXT: v_add_i32_e32 v10, vcc, 12, v10 -; CI-NEXT: .LBB12_5: ; %frem.loop_body +; CI-NEXT: .LBB12_5: ; %frem.loop_body85 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v13, v11 ; CI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13612,7 +13612,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_5 ; CI-NEXT: ; %bb.6: ; %Flow125 ; CI-NEXT: v_mov_b32_e32 v11, v13 -; CI-NEXT: .LBB12_7: ; %frem.loop_exit +; CI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; CI-NEXT: v_add_i32_e32 v10, vcc, -11, v10 ; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10 ; CI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -13628,7 +13628,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else47 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13637,7 +13637,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_16 ; CI-NEXT: .LBB12_10: ; CI-NEXT: ; implicit-def: $vgpr9 -; CI-NEXT: .LBB12_11: ; %frem.compute15 +; CI-NEXT: .LBB12_11: ; %frem.compute46 ; CI-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; CI-NEXT: v_ldexp_f32_e64 v10, v10, 1 ; CI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -13662,10 +13662,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v11, vcc, 12, v11 -; CI-NEXT: .LBB12_13: ; %frem.loop_body23 +; CI-NEXT: .LBB12_13: ; %frem.loop_body54 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v14, v12 ; CI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13680,7 +13680,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_13 ; CI-NEXT: ; %bb.14: ; %Flow121 ; CI-NEXT: v_mov_b32_e32 v12, v14 -; CI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; CI-NEXT: v_add_i32_e32 v11, vcc, -11, v11 ; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11 ; CI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -13696,7 +13696,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_18 -; CI-NEXT: ; %bb.17: ; %frem.else47 +; CI-NEXT: ; %bb.17: ; %frem.else16 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -13705,7 +13705,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_24 ; CI-NEXT: .LBB12_18: ; CI-NEXT: ; implicit-def: $vgpr10 -; CI-NEXT: .LBB12_19: ; %frem.compute46 +; CI-NEXT: .LBB12_19: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; CI-NEXT: v_ldexp_f32_e64 v11, v11, 1 ; CI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -13730,10 +13730,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_23 -; CI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16 ; CI-NEXT: v_add_i32_e32 v12, vcc, 12, v12 -; CI-NEXT: .LBB12_21: ; %frem.loop_body54 +; CI-NEXT: .LBB12_21: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v13 ; CI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -13748,7 +13748,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_21 ; CI-NEXT: ; %bb.22: ; %Flow117 ; CI-NEXT: v_mov_b32_e32 v13, v15 -; CI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; CI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v12, vcc, -11, v12 ; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12 ; CI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -13764,7 +13764,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_26 -; CI-NEXT: ; %bb.25: ; %frem.else78 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -13773,7 +13773,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_32 ; CI-NEXT: .LBB12_26: ; CI-NEXT: ; implicit-def: $vgpr11 -; CI-NEXT: .LBB12_27: ; %frem.compute77 +; CI-NEXT: .LBB12_27: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; CI-NEXT: v_ldexp_f32_e64 v12, v12, 1 ; CI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -13798,10 +13798,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_31 -; CI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v13, vcc, 12, v13 -; CI-NEXT: .LBB12_29: ; %frem.loop_body85 +; CI-NEXT: .LBB12_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v16, v14 ; CI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -13816,7 +13816,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_29 ; CI-NEXT: ; %bb.30: ; %Flow ; CI-NEXT: v_mov_b32_e32 v14, v16 -; CI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; CI-NEXT: .LBB12_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v13, vcc, -11, v13 ; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13 ; CI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -13869,7 +13869,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else78 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13878,7 +13878,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_8 ; VI-NEXT: .LBB12_2: ; VI-NEXT: ; implicit-def: $vgpr8 -; VI-NEXT: .LBB12_3: ; %frem.compute +; VI-NEXT: .LBB12_3: ; %frem.compute77 ; VI-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; VI-NEXT: v_ldexp_f32 v9, v9, 1 ; VI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -13903,10 +13903,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14 ; VI-NEXT: v_add_u32_e32 v10, vcc, 12, v10 -; VI-NEXT: .LBB12_5: ; %frem.loop_body +; VI-NEXT: .LBB12_5: ; %frem.loop_body85 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v13, v11 ; VI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13921,7 +13921,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_5 ; VI-NEXT: ; %bb.6: ; %Flow125 ; VI-NEXT: v_mov_b32_e32 v11, v13 -; VI-NEXT: .LBB12_7: ; %frem.loop_exit +; VI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; VI-NEXT: v_add_u32_e32 v10, vcc, -11, v10 ; VI-NEXT: v_ldexp_f32 v10, v11, v10 ; VI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -13937,7 +13937,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else47 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13946,7 +13946,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_16 ; VI-NEXT: .LBB12_10: ; VI-NEXT: ; implicit-def: $vgpr9 -; VI-NEXT: .LBB12_11: ; %frem.compute15 +; VI-NEXT: .LBB12_11: ; %frem.compute46 ; VI-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; VI-NEXT: v_ldexp_f32 v10, v10, 1 ; VI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -13971,10 +13971,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; VI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; VI-NEXT: v_sub_u32_e32 v11, vcc, v14, v15 ; VI-NEXT: v_add_u32_e32 v11, vcc, 12, v11 -; VI-NEXT: .LBB12_13: ; %frem.loop_body23 +; VI-NEXT: .LBB12_13: ; %frem.loop_body54 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v14, v12 ; VI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13989,7 +13989,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_13 ; VI-NEXT: ; %bb.14: ; %Flow121 ; VI-NEXT: v_mov_b32_e32 v12, v14 -; VI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; VI-NEXT: v_add_u32_e32 v11, vcc, -11, v11 ; VI-NEXT: v_ldexp_f32 v11, v12, v11 ; VI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -14005,7 +14005,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_18 -; VI-NEXT: ; %bb.17: ; %frem.else47 +; VI-NEXT: ; %bb.17: ; %frem.else16 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -14014,7 +14014,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_24 ; VI-NEXT: .LBB12_18: ; VI-NEXT: ; implicit-def: $vgpr10 -; VI-NEXT: .LBB12_19: ; %frem.compute46 +; VI-NEXT: .LBB12_19: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; VI-NEXT: v_ldexp_f32 v11, v11, 1 ; VI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -14039,10 +14039,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; VI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_23 -; VI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v12, vcc, v15, v16 ; VI-NEXT: v_add_u32_e32 v12, vcc, 12, v12 -; VI-NEXT: .LBB12_21: ; %frem.loop_body54 +; VI-NEXT: .LBB12_21: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v15, v13 ; VI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -14057,7 +14057,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_21 ; VI-NEXT: ; %bb.22: ; %Flow117 ; VI-NEXT: v_mov_b32_e32 v13, v15 -; VI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; VI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v12, vcc, -11, v12 ; VI-NEXT: v_ldexp_f32 v12, v13, v12 ; VI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -14073,7 +14073,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_26 -; VI-NEXT: ; %bb.25: ; %frem.else78 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -14082,7 +14082,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_32 ; VI-NEXT: .LBB12_26: ; VI-NEXT: ; implicit-def: $vgpr11 -; VI-NEXT: .LBB12_27: ; %frem.compute77 +; VI-NEXT: .LBB12_27: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; VI-NEXT: v_ldexp_f32 v12, v12, 1 ; VI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -14107,10 +14107,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_31 -; VI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v13, vcc, 12, v13 -; VI-NEXT: .LBB12_29: ; %frem.loop_body85 +; VI-NEXT: .LBB12_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v16, v14 ; VI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -14125,7 +14125,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_29 ; VI-NEXT: ; %bb.30: ; %Flow ; VI-NEXT: v_mov_b32_e32 v14, v16 -; VI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; VI-NEXT: .LBB12_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v13, vcc, -11, v13 ; VI-NEXT: v_ldexp_f32 v13, v14, v13 ; VI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -14173,7 +14173,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else78 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v8, s2, 0, v0 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -14182,7 +14182,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_8 ; GFX9-NEXT: .LBB12_2: ; GFX9-NEXT: ; implicit-def: $vgpr8 -; GFX9-NEXT: .LBB12_3: ; %frem.compute +; GFX9-NEXT: .LBB12_3: ; %frem.compute77 ; GFX9-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX9-NEXT: v_ldexp_f32 v9, v9, 1 ; GFX9-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -14207,10 +14207,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; GFX9-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX9-NEXT: v_sub_u32_e32 v10, v13, v14 ; GFX9-NEXT: v_add_u32_e32 v10, 12, v10 -; GFX9-NEXT: .LBB12_5: ; %frem.loop_body +; GFX9-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -14225,7 +14225,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_5 ; GFX9-NEXT: ; %bb.6: ; %Flow125 ; GFX9-NEXT: v_mov_b32_e32 v11, v13 -; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX9-NEXT: v_add_u32_e32 v10, -11, v10 ; GFX9-NEXT: v_ldexp_f32 v10, v11, v10 ; GFX9-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -14241,7 +14241,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else47 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -14250,7 +14250,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_16 ; GFX9-NEXT: .LBB12_10: ; GFX9-NEXT: ; implicit-def: $vgpr9 -; GFX9-NEXT: .LBB12_11: ; %frem.compute15 +; GFX9-NEXT: .LBB12_11: ; %frem.compute46 ; GFX9-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX9-NEXT: v_ldexp_f32 v10, v10, 1 ; GFX9-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -14275,10 +14275,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v11, 12, v11 -; GFX9-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v14, v12 ; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -14293,7 +14293,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_13 ; GFX9-NEXT: ; %bb.14: ; %Flow121 ; GFX9-NEXT: v_mov_b32_e32 v12, v14 -; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX9-NEXT: v_add_u32_e32 v11, -11, v11 ; GFX9-NEXT: v_ldexp_f32 v11, v12, v11 ; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -14309,7 +14309,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_18 -; GFX9-NEXT: ; %bb.17: ; %frem.else47 +; GFX9-NEXT: ; %bb.17: ; %frem.else16 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v10, s2, 0, v2 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -14318,7 +14318,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_24 ; GFX9-NEXT: .LBB12_18: ; GFX9-NEXT: ; implicit-def: $vgpr10 -; GFX9-NEXT: .LBB12_19: ; %frem.compute46 +; GFX9-NEXT: .LBB12_19: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX9-NEXT: v_ldexp_f32 v11, v11, 1 ; GFX9-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -14343,10 +14343,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; GFX9-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX9-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX9-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v12, v15, v16 ; GFX9-NEXT: v_add_u32_e32 v12, 12, v12 -; GFX9-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX9-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v15, v13 ; GFX9-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -14361,7 +14361,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_21 ; GFX9-NEXT: ; %bb.22: ; %Flow117 ; GFX9-NEXT: v_mov_b32_e32 v13, v15 -; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX9-NEXT: v_add_u32_e32 v12, -11, v12 ; GFX9-NEXT: v_ldexp_f32 v12, v13, v12 ; GFX9-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -14377,7 +14377,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_26 -; GFX9-NEXT: ; %bb.25: ; %frem.else78 +; GFX9-NEXT: ; %bb.25: ; %frem.else ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v11, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -14386,7 +14386,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_32 ; GFX9-NEXT: .LBB12_26: ; GFX9-NEXT: ; implicit-def: $vgpr11 -; GFX9-NEXT: .LBB12_27: ; %frem.compute77 +; GFX9-NEXT: .LBB12_27: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX9-NEXT: v_ldexp_f32 v12, v12, 1 ; GFX9-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -14411,10 +14411,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; GFX9-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX9-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v13, v16, v17 ; GFX9-NEXT: v_add_u32_e32 v13, 12, v13 -; GFX9-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX9-NEXT: .LBB12_29: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v16, v14 ; GFX9-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -14429,7 +14429,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_29 ; GFX9-NEXT: ; %bb.30: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v14, v16 -; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v13, -11, v13 ; GFX9-NEXT: v_ldexp_f32 v13, v14, v13 ; GFX9-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -14478,7 +14478,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else78 ; GFX10-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4| ; GFX10-NEXT: v_cndmask_b32_e32 v8, v0, v8, vcc_lo @@ -14486,7 +14486,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_8 ; GFX10-NEXT: .LBB12_2: ; GFX10-NEXT: ; implicit-def: $vgpr8 -; GFX10-NEXT: .LBB12_3: ; %frem.compute +; GFX10-NEXT: .LBB12_3: ; %frem.compute77 ; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX10-NEXT: v_frexp_mant_f32_e64 v8, |v0| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v11, v0 @@ -14513,10 +14513,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v12 ; GFX10-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_5: ; %frem.loop_body +; GFX10-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v13, v10 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14532,7 +14532,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow125 ; GFX10-NEXT: v_mov_b32_e32 v12, s2 ; GFX10-NEXT: v_mov_b32_e32 v10, v13 -; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX10-NEXT: v_add_nc_u32_e32 v12, -11, v12 ; GFX10-NEXT: v_ldexp_f32 v10, v10, v12 ; GFX10-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -14547,7 +14547,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else47 ; GFX10-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5| ; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v9, vcc_lo @@ -14555,7 +14555,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_16 ; GFX10-NEXT: .LBB12_10: ; GFX10-NEXT: ; implicit-def: $vgpr9 -; GFX10-NEXT: .LBB12_11: ; %frem.compute15 +; GFX10-NEXT: .LBB12_11: ; %frem.compute46 ; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v1| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v1 @@ -14582,10 +14582,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v13 ; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v14, v11 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14601,7 +14601,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow121 ; GFX10-NEXT: v_mov_b32_e32 v13, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v14 -; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX10-NEXT: v_add_nc_u32_e32 v13, -11, v13 ; GFX10-NEXT: v_ldexp_f32 v11, v11, v13 ; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -14616,7 +14616,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_18 -; GFX10-NEXT: ; %bb.17: ; %frem.else47 +; GFX10-NEXT: ; %bb.17: ; %frem.else16 ; GFX10-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6| ; GFX10-NEXT: v_cndmask_b32_e32 v10, v2, v10, vcc_lo @@ -14624,7 +14624,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_24 ; GFX10-NEXT: .LBB12_18: ; GFX10-NEXT: ; implicit-def: $vgpr10 -; GFX10-NEXT: .LBB12_19: ; %frem.compute46 +; GFX10-NEXT: .LBB12_19: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v2| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v13, v2 @@ -14651,10 +14651,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v14 ; GFX10-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX10-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX10-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX10-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v15, v12 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14670,7 +14670,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.22: ; %Flow117 ; GFX10-NEXT: v_mov_b32_e32 v14, s2 ; GFX10-NEXT: v_mov_b32_e32 v12, v15 -; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX10-NEXT: v_add_nc_u32_e32 v14, -11, v14 ; GFX10-NEXT: v_ldexp_f32 v12, v12, v14 ; GFX10-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -14685,7 +14685,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_26 -; GFX10-NEXT: ; %bb.25: ; %frem.else78 +; GFX10-NEXT: ; %bb.25: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7| ; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v11, vcc_lo @@ -14693,7 +14693,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_32 ; GFX10-NEXT: .LBB12_26: ; GFX10-NEXT: ; implicit-def: $vgpr11 -; GFX10-NEXT: .LBB12_27: ; %frem.compute77 +; GFX10-NEXT: .LBB12_27: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v3| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v14, v3 @@ -14720,10 +14720,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v15 ; GFX10-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX10-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX10-NEXT: .LBB12_29: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v16, v13 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14739,7 +14739,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.30: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v15, s2 ; GFX10-NEXT: v_mov_b32_e32 v13, v16 -; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v15, -11, v15 ; GFX10-NEXT: v_ldexp_f32 v13, v13, v15 ; GFX10-NEXT: v_mul_f32_e32 v14, v13, v14 @@ -14785,7 +14785,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else78 ; GFX11-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14794,7 +14794,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_8 ; GFX11-NEXT: .LBB12_2: ; GFX11-NEXT: ; implicit-def: $vgpr8 -; GFX11-NEXT: .LBB12_3: ; %frem.compute +; GFX11-NEXT: .LBB12_3: ; %frem.compute77 ; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX11-NEXT: v_frexp_mant_f32_e64 v8, |v0| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v11, v0 @@ -14830,11 +14830,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_5: ; %frem.loop_body +; GFX11-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v13, v10 @@ -14854,7 +14854,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow125 ; GFX11-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v12, -11, v12 ; GFX11-NEXT: v_ldexp_f32 v10, v10, v12 @@ -14874,7 +14874,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else47 ; GFX11-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14883,7 +14883,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_16 ; GFX11-NEXT: .LBB12_10: ; GFX11-NEXT: ; implicit-def: $vgpr9 -; GFX11-NEXT: .LBB12_11: ; %frem.compute15 +; GFX11-NEXT: .LBB12_11: ; %frem.compute46 ; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v1| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v12, v1 @@ -14919,11 +14919,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v14, v11 @@ -14943,7 +14943,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow121 ; GFX11-NEXT: v_mov_b32_e32 v13, s2 ; GFX11-NEXT: v_mov_b32_e32 v11, v14 -; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v13, -11, v13 ; GFX11-NEXT: v_ldexp_f32 v11, v11, v13 @@ -14963,7 +14963,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_18 -; GFX11-NEXT: ; %bb.17: ; %frem.else47 +; GFX11-NEXT: ; %bb.17: ; %frem.else16 ; GFX11-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14972,7 +14972,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_24 ; GFX11-NEXT: .LBB12_18: ; GFX11-NEXT: ; implicit-def: $vgpr10 -; GFX11-NEXT: .LBB12_19: ; %frem.compute46 +; GFX11-NEXT: .LBB12_19: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v2| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v13, v2 @@ -15008,11 +15008,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX11-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX11-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX11-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v15, v12 @@ -15032,7 +15032,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.22: ; %Flow117 ; GFX11-NEXT: v_mov_b32_e32 v14, s2 ; GFX11-NEXT: v_mov_b32_e32 v12, v15 -; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v14, -11, v14 ; GFX11-NEXT: v_ldexp_f32 v12, v12, v14 @@ -15052,7 +15052,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_26 -; GFX11-NEXT: ; %bb.25: ; %frem.else78 +; GFX11-NEXT: ; %bb.25: ; %frem.else ; GFX11-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -15061,7 +15061,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_32 ; GFX11-NEXT: .LBB12_26: ; GFX11-NEXT: ; implicit-def: $vgpr11 -; GFX11-NEXT: .LBB12_27: ; %frem.compute77 +; GFX11-NEXT: .LBB12_27: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v3| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v14, v3 @@ -15097,11 +15097,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX11-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX11-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX11-NEXT: .LBB12_29: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v16, v13 @@ -15121,7 +15121,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.30: ; %Flow ; GFX11-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v15, -11, v15 ; GFX11-NEXT: v_ldexp_f32 v13, v13, v15 @@ -15182,7 +15182,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s5, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else78 ; GFX1150-NEXT: s_cmp_eq_f32 s5, s12 ; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15192,7 +15192,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_8 ; GFX1150-NEXT: .LBB12_2: ; GFX1150-NEXT: ; implicit-def: $vgpr0 -; GFX1150-NEXT: .LBB12_3: ; %frem.compute +; GFX1150-NEXT: .LBB12_3: ; %frem.compute77 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s6| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s8| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -15227,11 +15227,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v5, v2 @@ -15253,7 +15253,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow125 ; GFX1150-NEXT: v_mov_b32_e32 v4, s11 ; GFX1150-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4 @@ -15276,7 +15276,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s8, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else47 ; GFX1150-NEXT: s_cmp_eq_f32 s8, s12 ; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15286,7 +15286,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_16 ; GFX1150-NEXT: .LBB12_10: ; GFX1150-NEXT: ; implicit-def: $vgpr1 -; GFX1150-NEXT: .LBB12_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB12_11: ; %frem.compute46 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s4| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s10| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -15321,11 +15321,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v6, v3 @@ -15347,7 +15347,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow121 ; GFX1150-NEXT: v_mov_b32_e32 v5, s11 ; GFX1150-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5 @@ -15370,7 +15370,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s10, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_18 -; GFX1150-NEXT: ; %bb.17: ; %frem.else47 +; GFX1150-NEXT: ; %bb.17: ; %frem.else16 ; GFX1150-NEXT: s_cmp_eq_f32 s10, s12 ; GFX1150-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15380,7 +15380,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_24 ; GFX1150-NEXT: .LBB12_18: ; GFX1150-NEXT: ; implicit-def: $vgpr2 -; GFX1150-NEXT: .LBB12_19: ; %frem.compute46 +; GFX1150-NEXT: .LBB12_19: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s3| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v5, s9 @@ -15415,11 +15415,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6 ; GFX1150-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v7, v4 @@ -15441,7 +15441,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.22: ; %Flow117 ; GFX1150-NEXT: v_mov_b32_e32 v6, s11 ; GFX1150-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v6, -11, v6 ; GFX1150-NEXT: v_ldexp_f32 v4, v4, v6 @@ -15464,7 +15464,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s9, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_26 -; GFX1150-NEXT: ; %bb.25: ; %frem.else78 +; GFX1150-NEXT: ; %bb.25: ; %frem.else ; GFX1150-NEXT: s_cmp_eq_f32 s9, s12 ; GFX1150-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15474,7 +15474,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_32 ; GFX1150-NEXT: .LBB12_26: ; GFX1150-NEXT: ; implicit-def: $vgpr3 -; GFX1150-NEXT: .LBB12_27: ; %frem.compute77 +; GFX1150-NEXT: .LBB12_27: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f32_e64 v4, |s2| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s7| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v6, s7 @@ -15509,11 +15509,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7 ; GFX1150-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v8, v5 @@ -15535,7 +15535,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.30: ; %Flow ; GFX1150-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v7, -11, v7 ; GFX1150-NEXT: v_ldexp_f32 v5, v5, v7 @@ -15609,7 +15609,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_cmp_ngt_f32 s5, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else78 ; GFX1200-NEXT: s_cmp_eq_f32 s5, s12 ; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15619,7 +15619,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_8 ; GFX1200-NEXT: .LBB12_2: ; GFX1200-NEXT: ; implicit-def: $vgpr0 -; GFX1200-NEXT: .LBB12_3: ; %frem.compute +; GFX1200-NEXT: .LBB12_3: ; %frem.compute77 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s6| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s8| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -15655,11 +15655,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1200-NEXT: v_mov_b32_e32 v5, v2 @@ -15682,7 +15682,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow125 ; GFX1200-NEXT: v_mov_b32_e32 v4, s11 ; GFX1200-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4 @@ -15706,7 +15706,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s8, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else47 ; GFX1200-NEXT: s_cmp_eq_f32 s8, s12 ; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15717,7 +15717,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_16 ; GFX1200-NEXT: .LBB12_10: ; GFX1200-NEXT: ; implicit-def: $vgpr1 -; GFX1200-NEXT: .LBB12_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB12_11: ; %frem.compute46 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s4| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s10| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -15753,11 +15753,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v6, v3 @@ -15781,7 +15781,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow121 ; GFX1200-NEXT: v_mov_b32_e32 v5, s11 ; GFX1200-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5 @@ -15805,7 +15805,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s10, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_18 -; GFX1200-NEXT: ; %bb.17: ; %frem.else47 +; GFX1200-NEXT: ; %bb.17: ; %frem.else16 ; GFX1200-NEXT: s_cmp_eq_f32 s10, s12 ; GFX1200-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15816,7 +15816,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_24 ; GFX1200-NEXT: .LBB12_18: ; GFX1200-NEXT: ; implicit-def: $vgpr2 -; GFX1200-NEXT: .LBB12_19: ; %frem.compute46 +; GFX1200-NEXT: .LBB12_19: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s3| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v5, s9 @@ -15852,11 +15852,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6 ; GFX1200-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v7, v4 @@ -15880,7 +15880,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.22: ; %Flow117 ; GFX1200-NEXT: v_mov_b32_e32 v6, s11 ; GFX1200-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v6, -11, v6 ; GFX1200-NEXT: v_ldexp_f32 v4, v4, v6 @@ -15904,7 +15904,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s9, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_26 -; GFX1200-NEXT: ; %bb.25: ; %frem.else78 +; GFX1200-NEXT: ; %bb.25: ; %frem.else ; GFX1200-NEXT: s_cmp_eq_f32 s9, s12 ; GFX1200-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15915,7 +15915,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_32 ; GFX1200-NEXT: .LBB12_26: ; GFX1200-NEXT: ; implicit-def: $vgpr3 -; GFX1200-NEXT: .LBB12_27: ; %frem.compute77 +; GFX1200-NEXT: .LBB12_27: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f32_e64 v4, |s2| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s7| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v6, s7 @@ -15951,11 +15951,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7 ; GFX1200-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v8, v5 @@ -15979,7 +15979,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.30: ; %Flow ; GFX1200-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v7, -11, v7 ; GFX1200-NEXT: v_ldexp_f32 v5, v5, v7 @@ -16060,7 +16060,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[0:1]|, |v[4:5]| ; SI-NEXT: s_and_b64 vcc, exec, s[0:1] ; SI-NEXT: s_cbranch_vccz .LBB13_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else16 ; SI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; SI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16071,7 +16071,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB13_2: ; SI-NEXT: ; implicit-def: $vgpr8_vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB13_3: ; %frem.compute +; SI-NEXT: .LBB13_3: ; %frem.compute15 ; SI-NEXT: s_brev_b32 s5, -2 ; SI-NEXT: v_and_b32_e32 v10, 0x7fffffff, v1 ; SI-NEXT: s_mov_b32 s0, 0 @@ -16117,13 +16117,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; SI-NEXT: s_cmp_lt_i32 s6, 27 ; SI-NEXT: s_cbranch_scc1 .LBB13_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s0, s3, s7 ; SI-NEXT: s_add_i32 s6, s0, 26 ; SI-NEXT: s_mov_b32 s3, 0x432fffff ; SI-NEXT: v_mov_b32_e32 v18, 0x43300000 ; SI-NEXT: v_mov_b32_e32 v14, 0 -; SI-NEXT: .LBB13_5: ; %frem.loop_body +; SI-NEXT: .LBB13_5: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v17, v11 ; SI-NEXT: v_mov_b32_e32 v16, v10 @@ -16146,7 +16146,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: ; %bb.6: ; %Flow51 ; SI-NEXT: v_mov_b32_e32 v10, v16 ; SI-NEXT: v_mov_b32_e32 v11, v17 -; SI-NEXT: .LBB13_7: ; %frem.loop_exit +; SI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; SI-NEXT: s_sub_i32 s0, s6, 25 ; SI-NEXT: v_ldexp_f64 v[10:11], v[10:11], s0 ; SI-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13] @@ -16172,7 +16172,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[2:3]|, |v[6:7]| ; SI-NEXT: s_and_b64 vcc, exec, s[0:1] ; SI-NEXT: s_cbranch_vccz .LBB13_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; SI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16183,7 +16183,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB13_10: ; SI-NEXT: ; implicit-def: $vgpr10_vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB13_11: ; %frem.compute15 +; SI-NEXT: .LBB13_11: ; %frem.compute ; SI-NEXT: s_brev_b32 s5, -2 ; SI-NEXT: v_and_b32_e32 v12, 0x7fffffff, v3 ; SI-NEXT: s_mov_b32 s0, 0 @@ -16229,13 +16229,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; SI-NEXT: s_cmp_lt_i32 s6, 27 ; SI-NEXT: s_cbranch_scc1 .LBB13_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s0, s3, s7 ; SI-NEXT: s_add_i32 s6, s0, 26 ; SI-NEXT: s_mov_b32 s3, 0x432fffff ; SI-NEXT: v_mov_b32_e32 v20, 0x43300000 ; SI-NEXT: v_mov_b32_e32 v16, 0 -; SI-NEXT: .LBB13_13: ; %frem.loop_body23 +; SI-NEXT: .LBB13_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v19, v13 ; SI-NEXT: v_mov_b32_e32 v18, v12 @@ -16258,7 +16258,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v18 ; SI-NEXT: v_mov_b32_e32 v13, v19 -; SI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB13_15: ; %frem.loop_exit ; SI-NEXT: s_sub_i32 s0, s6, 25 ; SI-NEXT: v_ldexp_f64 v[12:13], v[12:13], s0 ; SI-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15] @@ -16316,7 +16316,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB13_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; CI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; CI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16325,7 +16325,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB13_8 ; CI-NEXT: .LBB13_2: ; CI-NEXT: ; implicit-def: $vgpr8_vgpr9 -; CI-NEXT: .LBB13_3: ; %frem.compute +; CI-NEXT: .LBB13_3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; CI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; CI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16349,10 +16349,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; CI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v14, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v17, vcc, 26, v14 -; CI-NEXT: .LBB13_5: ; %frem.loop_body +; CI-NEXT: .LBB13_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v11 ; CI-NEXT: v_mov_b32_e32 v14, v10 @@ -16370,7 +16370,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; %bb.6: ; %Flow51 ; CI-NEXT: v_mov_b32_e32 v10, v14 ; CI-NEXT: v_mov_b32_e32 v11, v15 -; CI-NEXT: .LBB13_7: ; %frem.loop_exit +; CI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; CI-NEXT: v_subrev_i32_e32 v14, vcc, 25, v17 ; CI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; CI-NEXT: s_brev_b32 s2, -2 @@ -16387,7 +16387,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB13_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; CI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; CI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16396,7 +16396,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB13_16 ; CI-NEXT: .LBB13_10: ; CI-NEXT: ; implicit-def: $vgpr10_vgpr11 -; CI-NEXT: .LBB13_11: ; %frem.compute15 +; CI-NEXT: .LBB13_11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; CI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; CI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16420,10 +16420,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; CI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v16, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v19, vcc, 26, v16 -; CI-NEXT: .LBB13_13: ; %frem.loop_body23 +; CI-NEXT: .LBB13_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v17, v13 ; CI-NEXT: v_mov_b32_e32 v16, v12 @@ -16441,7 +16441,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v12, v16 ; CI-NEXT: v_mov_b32_e32 v13, v17 -; CI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB13_15: ; %frem.loop_exit ; CI-NEXT: v_subrev_i32_e32 v16, vcc, 25, v19 ; CI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; CI-NEXT: s_brev_b32 s2, -2 @@ -16490,7 +16490,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB13_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; VI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; VI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16499,7 +16499,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB13_8 ; VI-NEXT: .LBB13_2: ; VI-NEXT: ; implicit-def: $vgpr8_vgpr9 -; VI-NEXT: .LBB13_3: ; %frem.compute +; VI-NEXT: .LBB13_3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; VI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; VI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16523,10 +16523,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; VI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v14, vcc, v14, v15 ; VI-NEXT: v_add_u32_e32 v17, vcc, 26, v14 -; VI-NEXT: .LBB13_5: ; %frem.loop_body +; VI-NEXT: .LBB13_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v15, v11 ; VI-NEXT: v_mov_b32_e32 v14, v10 @@ -16544,7 +16544,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; %bb.6: ; %Flow51 ; VI-NEXT: v_mov_b32_e32 v10, v14 ; VI-NEXT: v_mov_b32_e32 v11, v15 -; VI-NEXT: .LBB13_7: ; %frem.loop_exit +; VI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; VI-NEXT: v_subrev_u32_e32 v14, vcc, 25, v17 ; VI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; VI-NEXT: s_brev_b32 s2, -2 @@ -16561,7 +16561,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB13_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; VI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; VI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16570,7 +16570,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB13_16 ; VI-NEXT: .LBB13_10: ; VI-NEXT: ; implicit-def: $vgpr10_vgpr11 -; VI-NEXT: .LBB13_11: ; %frem.compute15 +; VI-NEXT: .LBB13_11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; VI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; VI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16594,10 +16594,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; VI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v16, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v19, vcc, 26, v16 -; VI-NEXT: .LBB13_13: ; %frem.loop_body23 +; VI-NEXT: .LBB13_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v17, v13 ; VI-NEXT: v_mov_b32_e32 v16, v12 @@ -16615,7 +16615,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v12, v16 ; VI-NEXT: v_mov_b32_e32 v13, v17 -; VI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB13_15: ; %frem.loop_exit ; VI-NEXT: v_subrev_u32_e32 v16, vcc, 25, v19 ; VI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; VI-NEXT: s_brev_b32 s2, -2 @@ -16659,7 +16659,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB13_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else16 ; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; GFX9-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16668,7 +16668,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB13_8 ; GFX9-NEXT: .LBB13_2: ; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX9-NEXT: .LBB13_3: ; %frem.compute +; GFX9-NEXT: .LBB13_3: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16692,10 +16692,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; GFX9-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v14, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v17, 26, v14 -; GFX9-NEXT: .LBB13_5: ; %frem.loop_body +; GFX9-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v15, v11 ; GFX9-NEXT: v_mov_b32_e32 v14, v10 @@ -16713,7 +16713,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: ; %bb.6: ; %Flow51 ; GFX9-NEXT: v_mov_b32_e32 v10, v14 ; GFX9-NEXT: v_mov_b32_e32 v11, v15 -; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX9-NEXT: v_subrev_u32_e32 v14, 25, v17 ; GFX9-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; GFX9-NEXT: s_brev_b32 s2, -2 @@ -16730,7 +16730,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB13_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; GFX9-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16739,7 +16739,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB13_16 ; GFX9-NEXT: .LBB13_10: ; GFX9-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX9-NEXT: .LBB13_11: ; %frem.compute15 +; GFX9-NEXT: .LBB13_11: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16763,10 +16763,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; GFX9-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v16, v16, v17 ; GFX9-NEXT: v_add_u32_e32 v19, 26, v16 -; GFX9-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB13_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v17, v13 ; GFX9-NEXT: v_mov_b32_e32 v16, v12 @@ -16784,7 +16784,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v12, v16 ; GFX9-NEXT: v_mov_b32_e32 v13, v17 -; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX9-NEXT: v_subrev_u32_e32 v16, 25, v19 ; GFX9-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; GFX9-NEXT: s_brev_b32 s2, -2 @@ -16829,7 +16829,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB13_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else16 ; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX10-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc_lo @@ -16838,7 +16838,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB13_8 ; GFX10-NEXT: .LBB13_2: ; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX10-NEXT: .LBB13_3: ; %frem.compute +; GFX10-NEXT: .LBB13_3: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -16863,10 +16863,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX10-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 26 -; GFX10-NEXT: .LBB13_5: ; %frem.loop_body +; GFX10-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v15, v11 ; GFX10-NEXT: v_mov_b32_e32 v14, v10 @@ -16885,7 +16885,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_mov_b32_e32 v10, v14 ; GFX10-NEXT: v_mov_b32_e32 v17, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v15 -; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX10-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; GFX10-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13] @@ -16901,7 +16901,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB13_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX10-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc_lo @@ -16910,7 +16910,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB13_16 ; GFX10-NEXT: .LBB13_10: ; GFX10-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX10-NEXT: .LBB13_11: ; %frem.compute15 +; GFX10-NEXT: .LBB13_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -16935,10 +16935,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX10-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 26 -; GFX10-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB13_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v17, v13 ; GFX10-NEXT: v_mov_b32_e32 v16, v12 @@ -16957,7 +16957,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_mov_b32_e32 v12, v16 ; GFX10-NEXT: v_mov_b32_e32 v19, s2 ; GFX10-NEXT: v_mov_b32_e32 v13, v17 -; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX10-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; GFX10-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15] @@ -16998,7 +16998,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB13_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else16 ; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX11-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17008,7 +17008,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB13_8 ; GFX11-NEXT: .LBB13_2: ; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX11-NEXT: .LBB13_3: ; %frem.compute +; GFX11-NEXT: .LBB13_3: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17041,12 +17041,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 26 ; GFX11-NEXT: .p2align 6 -; GFX11-NEXT: .LBB13_5: ; %frem.loop_body +; GFX11-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17066,7 +17066,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow51 ; GFX11-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX11-NEXT: v_mov_b32_e32 v11, v15 -; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX11-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17086,7 +17086,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB13_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else ; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX11-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17096,7 +17096,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB13_16 ; GFX11-NEXT: .LBB13_10: ; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX11-NEXT: .LBB13_11: ; %frem.compute15 +; GFX11-NEXT: .LBB13_11: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17129,12 +17129,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 26 ; GFX11-NEXT: .p2align 6 -; GFX11-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB13_13: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17154,7 +17154,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow ; GFX11-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX11-NEXT: v_mov_b32_e32 v13, v17 -; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX11-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 @@ -17199,7 +17199,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1150-NEXT: s_cbranch_vccz .LBB13_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else16 ; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX1150-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17209,7 +17209,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB13_8 ; GFX1150-NEXT: .LBB13_2: ; GFX1150-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1150-NEXT: .LBB13_3: ; %frem.compute +; GFX1150-NEXT: .LBB13_3: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17241,12 +17241,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX1150-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s2, s2, s3 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s2, s2, 26 ; GFX1150-NEXT: .p2align 6 -; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17266,7 +17266,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow51 ; GFX1150-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX1150-NEXT: v_mov_b32_e32 v11, v15 -; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX1150-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17286,7 +17286,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1150-NEXT: s_cbranch_vccz .LBB13_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else ; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX1150-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17296,7 +17296,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB13_16 ; GFX1150-NEXT: .LBB13_10: ; GFX1150-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX1150-NEXT: .LBB13_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB13_11: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17328,12 +17328,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX1150-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s2, s2, s3 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s2, s2, 26 ; GFX1150-NEXT: .p2align 6 -; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17353,7 +17353,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow ; GFX1150-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX1150-NEXT: v_mov_b32_e32 v13, v17 -; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX1150-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 @@ -17398,7 +17398,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1200-NEXT: s_cbranch_vccz .LBB13_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else16 ; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX1200-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17408,7 +17408,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB13_8 ; GFX1200-NEXT: .LBB13_2: ; GFX1200-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1200-NEXT: .LBB13_3: ; %frem.compute +; GFX1200-NEXT: .LBB13_3: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17441,11 +17441,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX1200-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3 ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_add_co_i32 s2, s2, 26 -; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1200-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17466,7 +17466,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow51 ; GFX1200-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX1200-NEXT: v_mov_b32_e32 v11, v15 -; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX1200-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17488,7 +17488,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cbranch_vccz .LBB13_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else ; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX1200-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX1200-NEXT: s_wait_alu 0xfffd @@ -17499,7 +17499,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB13_16 ; GFX1200-NEXT: .LBB13_10: ; GFX1200-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX1200-NEXT: .LBB13_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB13_11: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17532,11 +17532,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX1200-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s2, s2, 26 -; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2) ; GFX1200-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17559,7 +17559,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow ; GFX1200-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX1200-NEXT: v_mov_b32_e32 v13, v17 -; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX1200-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll index d25d0f15cf838..4c0f9db147c96 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll @@ -380,9 +380,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX1:%.*]] = fpext half [[AX]] to float ; CHECK-NEXT: [[AY2:%.*]] = fpext half [[AY]] to float ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX1]], [[AY2]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] ; CHECK: [[BB4:.*]]: -; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP38:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP58:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP57:%.*]], %[[FREM_ELSE20]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq half [[TMP2]], 0xH0000 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], half 0xH7E00, half [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) @@ -396,9 +396,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX16:%.*]] = fpext half [[AX14]] to float ; CHECK-NEXT: [[AY17:%.*]] = fpext half [[AY15]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX16]], [[AY17]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB14:.*]]: -; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP57:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP48:%.*]], %[[FREM_ELSE20]] ] +; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP46:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP38:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq half [[TMP12]], 0xH0000 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], half 0xH7E00, half [[RET18]] ; CHECK-NEXT: [[TMP17:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) @@ -408,12 +408,12 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: store <2 x half> [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: -; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) +; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 11) -; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) +; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 @@ -423,10 +423,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 11 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: -; CHECK-NEXT: [[TMP27:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) -; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq float [[AX1]], [[AY2]] -; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], half [[TMP27]], half [[TMP1]] -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP28:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) +; CHECK-NEXT: [[TMP29:%.*]] = fcmp oeq float [[AX16]], [[AY17]] +; CHECK-NEXT: [[TMP38]] = select i1 [[TMP29]], half [[TMP28]], half [[TMP11]] +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] @@ -456,15 +456,15 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX12:%.*]] = select i1 [[CLT10]], float [[AXP11]], float [[AX9]] ; CHECK-NEXT: [[AX13:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX12]], i32 [[EY]]) ; CHECK-NEXT: [[TMP37:%.*]] = fptrunc float [[AX13]] to half -; CHECK-NEXT: [[TMP38]] = call half @llvm.copysign.f16(half [[TMP37]], half [[TMP1]]) -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP46]] = call half @llvm.copysign.f16(half [[TMP37]], half [[TMP11]]) +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE19]]: -; CHECK-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) +; CHECK-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 ; CHECK-NEXT: [[EX21:%.*]] = sub i32 [[TMP41]], 1 ; CHECK-NEXT: [[AX22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP40]], i32 11) -; CHECK-NEXT: [[TMP42:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) +; CHECK-NEXT: [[TMP42:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP42]], 0 ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP42]], 1 ; CHECK-NEXT: [[EY23:%.*]] = sub i32 [[TMP44]], 1 @@ -474,10 +474,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[NB25]], 11 ; CHECK-NEXT: br i1 [[TMP45]], label %[[FREM_LOOP_BODY27:.*]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_ELSE20]]: -; CHECK-NEXT: [[TMP46:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) -; CHECK-NEXT: [[TMP47:%.*]] = fcmp oeq float [[AX16]], [[AY17]] -; CHECK-NEXT: [[TMP48]] = select i1 [[TMP47]], half [[TMP46]], half [[TMP11]] -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP47:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) +; CHECK-NEXT: [[TMP48:%.*]] = fcmp oeq float [[AX1]], [[AY2]] +; CHECK-NEXT: [[TMP57]] = select i1 [[TMP48]], half [[TMP47]], half [[TMP1]] +; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY27]]: ; CHECK-NEXT: [[NB_IV29:%.*]] = phi i32 [ [[NB25]], %[[FREM_COMPUTE19]] ], [ [[NB_UPDATE37:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[AX_LOOP_PHI30:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_UPDATE36:%.*]], %[[FREM_LOOP_BODY27]] ] @@ -507,8 +507,8 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX45:%.*]] = select i1 [[CLT43]], float [[AXP44]], float [[AX42]] ; CHECK-NEXT: [[AX46:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX45]], i32 [[EY23]]) ; CHECK-NEXT: [[TMP56:%.*]] = fptrunc float [[AX46]] to half -; CHECK-NEXT: [[TMP57]] = call half @llvm.copysign.f16(half [[TMP56]], half [[TMP11]]) -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP58]] = call half @llvm.copysign.f16(half [[TMP56]], half [[TMP1]]) +; CHECK-NEXT: br label %[[BB4]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x half>, ptr addrspace(1) %in2, i32 4 @@ -532,9 +532,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX1:%.*]] = fpext half [[AX]] to float ; CHECK-NEXT: [[AY2:%.*]] = fpext half [[AY]] to float ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX1]], [[AY2]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE85:.*]], label %[[FREM_ELSE86:.*]] ; CHECK: [[BB4:.*]]: -; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP58:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP49:%.*]], %[[FREM_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP116:%.*]], %[[FREM_LOOP_EXIT94:.*]] ], [ [[TMP115:%.*]], %[[FREM_ELSE86]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq half [[TMP2]], 0xH0000 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], half 0xH7E00, half [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) @@ -548,9 +548,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX16:%.*]] = fpext half [[AX14]] to float ; CHECK-NEXT: [[AY17:%.*]] = fpext half [[AY15]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX16]], [[AY17]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE52:.*]], label %[[FREM_ELSE53:.*]] ; CHECK: [[BB14:.*]]: -; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP77:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP68:%.*]], %[[FREM_ELSE20]] ] +; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP104:%.*]], %[[FREM_LOOP_EXIT61:.*]] ], [ [[TMP96:%.*]], %[[FREM_ELSE53]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq half [[TMP12]], 0xH0000 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], half 0xH7E00, half [[RET18]] ; CHECK-NEXT: [[TMP17:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) @@ -564,9 +564,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX49:%.*]] = fpext half [[AX47]] to float ; CHECK-NEXT: [[AY50:%.*]] = fpext half [[AY48]] to float ; CHECK-NEXT: [[TMP23:%.*]] = fcmp ogt float [[AX49]], [[AY50]] -; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE52:.*]], label %[[FREM_ELSE53:.*]] +; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] ; CHECK: [[BB24:.*]]: -; CHECK-NEXT: [[RET51:%.*]] = phi half [ [[TMP96:%.*]], %[[FREM_LOOP_EXIT61:.*]] ], [ [[TMP87:%.*]], %[[FREM_ELSE53]] ] +; CHECK-NEXT: [[RET51:%.*]] = phi half [ [[TMP85:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP77:%.*]], %[[FREM_ELSE20]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fcmp ueq half [[TMP22]], 0xH0000 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], half 0xH7E00, half [[RET51]] ; CHECK-NEXT: [[TMP27:%.*]] = call half @llvm.fabs.f16(half [[TMP21]]) @@ -580,9 +580,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX82:%.*]] = fpext half [[AX80]] to float ; CHECK-NEXT: [[AY83:%.*]] = fpext half [[AY81]] to float ; CHECK-NEXT: [[TMP33:%.*]] = fcmp ogt float [[AX82]], [[AY83]] -; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE85:.*]], label %[[FREM_ELSE86:.*]] +; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB34:.*]]: -; CHECK-NEXT: [[RET84:%.*]] = phi half [ [[TMP115:%.*]], %[[FREM_LOOP_EXIT94:.*]] ], [ [[TMP106:%.*]], %[[FREM_ELSE86]] ] +; CHECK-NEXT: [[RET84:%.*]] = phi half [ [[TMP66:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP58:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP35:%.*]] = fcmp ueq half [[TMP32]], 0xH0000 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], half 0xH7E00, half [[RET84]] ; CHECK-NEXT: [[TMP37:%.*]] = call half @llvm.fabs.f16(half [[TMP31]]) @@ -592,12 +592,12 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: store <4 x half> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: -; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) +; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX82]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP42]], 1 ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP41]], i32 11) -; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) +; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY83]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP43]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP45]], 1 @@ -607,10 +607,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[NB]], 11 ; CHECK-NEXT: br i1 [[TMP46]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: -; CHECK-NEXT: [[TMP47:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) -; CHECK-NEXT: [[TMP48:%.*]] = fcmp oeq float [[AX1]], [[AY2]] -; CHECK-NEXT: [[TMP49]] = select i1 [[TMP48]], half [[TMP47]], half [[TMP1]] -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP48:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP31]]) +; CHECK-NEXT: [[TMP49:%.*]] = fcmp oeq float [[AX82]], [[AY83]] +; CHECK-NEXT: [[TMP58]] = select i1 [[TMP49]], half [[TMP48]], half [[TMP31]] +; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] @@ -640,15 +640,15 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX12:%.*]] = select i1 [[CLT10]], float [[AXP11]], float [[AX9]] ; CHECK-NEXT: [[AX13:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX12]], i32 [[EY]]) ; CHECK-NEXT: [[TMP57:%.*]] = fptrunc float [[AX13]] to half -; CHECK-NEXT: [[TMP58]] = call half @llvm.copysign.f16(half [[TMP57]], half [[TMP1]]) -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP66]] = call half @llvm.copysign.f16(half [[TMP57]], half [[TMP31]]) +; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_COMPUTE19]]: -; CHECK-NEXT: [[TMP59:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) +; CHECK-NEXT: [[TMP59:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX49]]) ; CHECK-NEXT: [[TMP60:%.*]] = extractvalue { float, i32 } [[TMP59]], 0 ; CHECK-NEXT: [[TMP61:%.*]] = extractvalue { float, i32 } [[TMP59]], 1 ; CHECK-NEXT: [[EX21:%.*]] = sub i32 [[TMP61]], 1 ; CHECK-NEXT: [[AX22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP60]], i32 11) -; CHECK-NEXT: [[TMP62:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) +; CHECK-NEXT: [[TMP62:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY50]]) ; CHECK-NEXT: [[TMP63:%.*]] = extractvalue { float, i32 } [[TMP62]], 0 ; CHECK-NEXT: [[TMP64:%.*]] = extractvalue { float, i32 } [[TMP62]], 1 ; CHECK-NEXT: [[EY23:%.*]] = sub i32 [[TMP64]], 1 @@ -658,10 +658,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[NB25]], 11 ; CHECK-NEXT: br i1 [[TMP65]], label %[[FREM_LOOP_BODY27:.*]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_ELSE20]]: -; CHECK-NEXT: [[TMP66:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) -; CHECK-NEXT: [[TMP67:%.*]] = fcmp oeq float [[AX16]], [[AY17]] -; CHECK-NEXT: [[TMP68]] = select i1 [[TMP67]], half [[TMP66]], half [[TMP11]] -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP67:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP21]]) +; CHECK-NEXT: [[TMP68:%.*]] = fcmp oeq float [[AX49]], [[AY50]] +; CHECK-NEXT: [[TMP77]] = select i1 [[TMP68]], half [[TMP67]], half [[TMP21]] +; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_LOOP_BODY27]]: ; CHECK-NEXT: [[NB_IV29:%.*]] = phi i32 [ [[NB25]], %[[FREM_COMPUTE19]] ], [ [[NB_UPDATE37:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[AX_LOOP_PHI30:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_UPDATE36:%.*]], %[[FREM_LOOP_BODY27]] ] @@ -691,15 +691,15 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX45:%.*]] = select i1 [[CLT43]], float [[AXP44]], float [[AX42]] ; CHECK-NEXT: [[AX46:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX45]], i32 [[EY23]]) ; CHECK-NEXT: [[TMP76:%.*]] = fptrunc float [[AX46]] to half -; CHECK-NEXT: [[TMP77]] = call half @llvm.copysign.f16(half [[TMP76]], half [[TMP11]]) -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP85]] = call half @llvm.copysign.f16(half [[TMP76]], half [[TMP21]]) +; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_COMPUTE52]]: -; CHECK-NEXT: [[TMP78:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX49]]) +; CHECK-NEXT: [[TMP78:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) ; CHECK-NEXT: [[TMP79:%.*]] = extractvalue { float, i32 } [[TMP78]], 0 ; CHECK-NEXT: [[TMP80:%.*]] = extractvalue { float, i32 } [[TMP78]], 1 ; CHECK-NEXT: [[EX54:%.*]] = sub i32 [[TMP80]], 1 ; CHECK-NEXT: [[AX55:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP79]], i32 11) -; CHECK-NEXT: [[TMP81:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY50]]) +; CHECK-NEXT: [[TMP81:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) ; CHECK-NEXT: [[TMP82:%.*]] = extractvalue { float, i32 } [[TMP81]], 0 ; CHECK-NEXT: [[TMP83:%.*]] = extractvalue { float, i32 } [[TMP81]], 1 ; CHECK-NEXT: [[EY56:%.*]] = sub i32 [[TMP83]], 1 @@ -709,10 +709,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP84:%.*]] = icmp sgt i32 [[NB58]], 11 ; CHECK-NEXT: br i1 [[TMP84]], label %[[FREM_LOOP_BODY60:.*]], label %[[FREM_LOOP_EXIT61]] ; CHECK: [[FREM_ELSE53]]: -; CHECK-NEXT: [[TMP85:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP21]]) -; CHECK-NEXT: [[TMP86:%.*]] = fcmp oeq float [[AX49]], [[AY50]] -; CHECK-NEXT: [[TMP87]] = select i1 [[TMP86]], half [[TMP85]], half [[TMP21]] -; CHECK-NEXT: br label %[[BB24]] +; CHECK-NEXT: [[TMP86:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) +; CHECK-NEXT: [[TMP87:%.*]] = fcmp oeq float [[AX16]], [[AY17]] +; CHECK-NEXT: [[TMP96]] = select i1 [[TMP87]], half [[TMP86]], half [[TMP11]] +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY60]]: ; CHECK-NEXT: [[NB_IV62:%.*]] = phi i32 [ [[NB58]], %[[FREM_COMPUTE52]] ], [ [[NB_UPDATE70:%.*]], %[[FREM_LOOP_BODY60]] ] ; CHECK-NEXT: [[AX_LOOP_PHI63:%.*]] = phi float [ [[AX55]], %[[FREM_COMPUTE52]] ], [ [[AX_UPDATE69:%.*]], %[[FREM_LOOP_BODY60]] ] @@ -742,15 +742,15 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX78:%.*]] = select i1 [[CLT76]], float [[AXP77]], float [[AX75]] ; CHECK-NEXT: [[AX79:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX78]], i32 [[EY56]]) ; CHECK-NEXT: [[TMP95:%.*]] = fptrunc float [[AX79]] to half -; CHECK-NEXT: [[TMP96]] = call half @llvm.copysign.f16(half [[TMP95]], half [[TMP21]]) -; CHECK-NEXT: br label %[[BB24]] +; CHECK-NEXT: [[TMP104]] = call half @llvm.copysign.f16(half [[TMP95]], half [[TMP11]]) +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE85]]: -; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX82]]) +; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) ; CHECK-NEXT: [[TMP98:%.*]] = extractvalue { float, i32 } [[TMP97]], 0 ; CHECK-NEXT: [[TMP99:%.*]] = extractvalue { float, i32 } [[TMP97]], 1 ; CHECK-NEXT: [[EX87:%.*]] = sub i32 [[TMP99]], 1 ; CHECK-NEXT: [[AX88:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP98]], i32 11) -; CHECK-NEXT: [[TMP100:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY83]]) +; CHECK-NEXT: [[TMP100:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) ; CHECK-NEXT: [[TMP101:%.*]] = extractvalue { float, i32 } [[TMP100]], 0 ; CHECK-NEXT: [[TMP102:%.*]] = extractvalue { float, i32 } [[TMP100]], 1 ; CHECK-NEXT: [[EY89:%.*]] = sub i32 [[TMP102]], 1 @@ -760,10 +760,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP103:%.*]] = icmp sgt i32 [[NB91]], 11 ; CHECK-NEXT: br i1 [[TMP103]], label %[[FREM_LOOP_BODY93:.*]], label %[[FREM_LOOP_EXIT94]] ; CHECK: [[FREM_ELSE86]]: -; CHECK-NEXT: [[TMP104:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP31]]) -; CHECK-NEXT: [[TMP105:%.*]] = fcmp oeq float [[AX82]], [[AY83]] -; CHECK-NEXT: [[TMP106]] = select i1 [[TMP105]], half [[TMP104]], half [[TMP31]] -; CHECK-NEXT: br label %[[BB34]] +; CHECK-NEXT: [[TMP105:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) +; CHECK-NEXT: [[TMP106:%.*]] = fcmp oeq float [[AX1]], [[AY2]] +; CHECK-NEXT: [[TMP115]] = select i1 [[TMP106]], half [[TMP105]], half [[TMP1]] +; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY93]]: ; CHECK-NEXT: [[NB_IV95:%.*]] = phi i32 [ [[NB91]], %[[FREM_COMPUTE85]] ], [ [[NB_UPDATE103:%.*]], %[[FREM_LOOP_BODY93]] ] ; CHECK-NEXT: [[AX_LOOP_PHI96:%.*]] = phi float [ [[AX88]], %[[FREM_COMPUTE85]] ], [ [[AX_UPDATE102:%.*]], %[[FREM_LOOP_BODY93]] ] @@ -793,8 +793,8 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX111:%.*]] = select i1 [[CLT109]], float [[AXP110]], float [[AX108]] ; CHECK-NEXT: [[AX112:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX111]], i32 [[EY89]]) ; CHECK-NEXT: [[TMP114:%.*]] = fptrunc float [[AX112]] to half -; CHECK-NEXT: [[TMP115]] = call half @llvm.copysign.f16(half [[TMP114]], half [[TMP31]]) -; CHECK-NEXT: br label %[[BB34]] +; CHECK-NEXT: [[TMP116]] = call half @llvm.copysign.f16(half [[TMP114]], half [[TMP1]]) +; CHECK-NEXT: br label %[[BB4]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <4 x half>, ptr addrspace(1) %in2, i32 4 @@ -816,9 +816,9 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call float @llvm.fabs.f32(float [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX]], [[AY]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB4:.*]]: -; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP37:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP56:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP55:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq float [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float 0x7FF8000000000000, float [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) @@ -830,9 +830,9 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX12:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX12]], [[AY13]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB14:.*]]: -; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP55:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP47:%.*]], %[[FREM_ELSE16]] ] +; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP45:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP37:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float 0x7FF8000000000000, float [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) @@ -842,12 +842,12 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: store <2 x float> [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: -; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) +; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 12) -; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) +; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 @@ -857,10 +857,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 12 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: -; CHECK-NEXT: [[TMP27:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) -; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq float [[AX]], [[AY]] -; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], float [[TMP27]], float [[TMP1]] -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) +; CHECK-NEXT: [[TMP29:%.*]] = fcmp oeq float [[AX12]], [[AY13]] +; CHECK-NEXT: [[TMP37]] = select i1 [[TMP29]], float [[TMP28]], float [[TMP11]] +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] @@ -889,15 +889,15 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP9:%.*]] = fadd float [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], float [[AXP9]], float [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX10]], i32 [[EY]]) -; CHECK-NEXT: [[TMP37]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP1]]) -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP45]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP11]]) +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE15]]: -; CHECK-NEXT: [[TMP38:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) +; CHECK-NEXT: [[TMP38:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP38]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP40]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 12) -; CHECK-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) +; CHECK-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP43]], 1 @@ -907,10 +907,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[NB21]], 12 ; CHECK-NEXT: br i1 [[TMP44]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: -; CHECK-NEXT: [[TMP45:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) -; CHECK-NEXT: [[TMP46:%.*]] = fcmp oeq float [[AX12]], [[AY13]] -; CHECK-NEXT: [[TMP47]] = select i1 [[TMP46]], float [[TMP45]], float [[TMP11]] -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP46:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) +; CHECK-NEXT: [[TMP47:%.*]] = fcmp oeq float [[AX]], [[AY]] +; CHECK-NEXT: [[TMP55]] = select i1 [[TMP47]], float [[TMP46]], float [[TMP1]] +; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] @@ -939,8 +939,8 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP40:%.*]] = fadd float [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], float [[AXP40]], float [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX41]], i32 [[EY19]]) -; CHECK-NEXT: [[TMP55]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP11]]) -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP56]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP1]]) +; CHECK-NEXT: br label %[[BB4]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x float>, ptr addrspace(1) %in2, i32 4 @@ -962,9 +962,9 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call float @llvm.fabs.f32(float [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX]], [[AY]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE77:.*]], label %[[FREM_ELSE78:.*]] ; CHECK: [[BB4:.*]]: -; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP57:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP49:%.*]], %[[FREM_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP112:%.*]], %[[FREM_LOOP_EXIT86:.*]] ], [ [[TMP111:%.*]], %[[FREM_ELSE78]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq float [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float 0x7FF8000000000000, float [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) @@ -976,9 +976,9 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX12:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX12]], [[AY13]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE46:.*]], label %[[FREM_ELSE47:.*]] ; CHECK: [[BB14:.*]]: -; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP75:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP67:%.*]], %[[FREM_ELSE16]] ] +; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP101:%.*]], %[[FREM_LOOP_EXIT55:.*]] ], [ [[TMP93:%.*]], %[[FREM_ELSE47]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float 0x7FF8000000000000, float [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) @@ -990,9 +990,9 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX43:%.*]] = call float @llvm.fabs.f32(float [[TMP21]]) ; CHECK-NEXT: [[AY44:%.*]] = call float @llvm.fabs.f32(float [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = fcmp ogt float [[AX43]], [[AY44]] -; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE46:.*]], label %[[FREM_ELSE47:.*]] +; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB24:.*]]: -; CHECK-NEXT: [[RET45:%.*]] = phi float [ [[TMP93:%.*]], %[[FREM_LOOP_EXIT55:.*]] ], [ [[TMP85:%.*]], %[[FREM_ELSE47]] ] +; CHECK-NEXT: [[RET45:%.*]] = phi float [ [[TMP83:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP75:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fcmp ueq float [[TMP22]], 0.000000e+00 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float 0x7FF8000000000000, float [[RET45]] ; CHECK-NEXT: [[TMP27:%.*]] = call float @llvm.fabs.f32(float [[TMP21]]) @@ -1004,9 +1004,9 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX74:%.*]] = call float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[AY75:%.*]] = call float @llvm.fabs.f32(float [[TMP32]]) ; CHECK-NEXT: [[TMP33:%.*]] = fcmp ogt float [[AX74]], [[AY75]] -; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE77:.*]], label %[[FREM_ELSE78:.*]] +; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB34:.*]]: -; CHECK-NEXT: [[RET76:%.*]] = phi float [ [[TMP111:%.*]], %[[FREM_LOOP_EXIT86:.*]] ], [ [[TMP103:%.*]], %[[FREM_ELSE78]] ] +; CHECK-NEXT: [[RET76:%.*]] = phi float [ [[TMP65:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP57:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP35:%.*]] = fcmp ueq float [[TMP32]], 0.000000e+00 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float 0x7FF8000000000000, float [[RET76]] ; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.fabs.f32(float [[TMP31]]) @@ -1016,12 +1016,12 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: store <4 x float> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: -; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) +; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX74]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP42]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP41]], i32 12) -; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) +; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY75]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP43]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP45]], 1 @@ -1031,10 +1031,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[NB]], 12 ; CHECK-NEXT: br i1 [[TMP46]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: -; CHECK-NEXT: [[TMP47:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) -; CHECK-NEXT: [[TMP48:%.*]] = fcmp oeq float [[AX]], [[AY]] -; CHECK-NEXT: [[TMP49]] = select i1 [[TMP48]], float [[TMP47]], float [[TMP1]] -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP48:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP31]]) +; CHECK-NEXT: [[TMP49:%.*]] = fcmp oeq float [[AX74]], [[AY75]] +; CHECK-NEXT: [[TMP57]] = select i1 [[TMP49]], float [[TMP48]], float [[TMP31]] +; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] @@ -1063,15 +1063,15 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP9:%.*]] = fadd float [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], float [[AXP9]], float [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX10]], i32 [[EY]]) -; CHECK-NEXT: [[TMP57]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP1]]) -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP65]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP31]]) +; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_COMPUTE15]]: -; CHECK-NEXT: [[TMP58:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) +; CHECK-NEXT: [[TMP58:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX43]]) ; CHECK-NEXT: [[TMP59:%.*]] = extractvalue { float, i32 } [[TMP58]], 0 ; CHECK-NEXT: [[TMP60:%.*]] = extractvalue { float, i32 } [[TMP58]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP60]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP59]], i32 12) -; CHECK-NEXT: [[TMP61:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) +; CHECK-NEXT: [[TMP61:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY44]]) ; CHECK-NEXT: [[TMP62:%.*]] = extractvalue { float, i32 } [[TMP61]], 0 ; CHECK-NEXT: [[TMP63:%.*]] = extractvalue { float, i32 } [[TMP61]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP63]], 1 @@ -1081,10 +1081,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[NB21]], 12 ; CHECK-NEXT: br i1 [[TMP64]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: -; CHECK-NEXT: [[TMP65:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) -; CHECK-NEXT: [[TMP66:%.*]] = fcmp oeq float [[AX12]], [[AY13]] -; CHECK-NEXT: [[TMP67]] = select i1 [[TMP66]], float [[TMP65]], float [[TMP11]] -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP66:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP21]]) +; CHECK-NEXT: [[TMP67:%.*]] = fcmp oeq float [[AX43]], [[AY44]] +; CHECK-NEXT: [[TMP75]] = select i1 [[TMP67]], float [[TMP66]], float [[TMP21]] +; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] @@ -1113,15 +1113,15 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP40:%.*]] = fadd float [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], float [[AXP40]], float [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX41]], i32 [[EY19]]) -; CHECK-NEXT: [[TMP75]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP11]]) -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP83]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP21]]) +; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_COMPUTE46]]: -; CHECK-NEXT: [[TMP76:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX43]]) +; CHECK-NEXT: [[TMP76:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) ; CHECK-NEXT: [[TMP77:%.*]] = extractvalue { float, i32 } [[TMP76]], 0 ; CHECK-NEXT: [[TMP78:%.*]] = extractvalue { float, i32 } [[TMP76]], 1 ; CHECK-NEXT: [[EX48:%.*]] = sub i32 [[TMP78]], 1 ; CHECK-NEXT: [[AX49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP77]], i32 12) -; CHECK-NEXT: [[TMP79:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY44]]) +; CHECK-NEXT: [[TMP79:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) ; CHECK-NEXT: [[TMP80:%.*]] = extractvalue { float, i32 } [[TMP79]], 0 ; CHECK-NEXT: [[TMP81:%.*]] = extractvalue { float, i32 } [[TMP79]], 1 ; CHECK-NEXT: [[EY50:%.*]] = sub i32 [[TMP81]], 1 @@ -1131,10 +1131,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP82:%.*]] = icmp sgt i32 [[NB52]], 12 ; CHECK-NEXT: br i1 [[TMP82]], label %[[FREM_LOOP_BODY54:.*]], label %[[FREM_LOOP_EXIT55]] ; CHECK: [[FREM_ELSE47]]: -; CHECK-NEXT: [[TMP83:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP21]]) -; CHECK-NEXT: [[TMP84:%.*]] = fcmp oeq float [[AX43]], [[AY44]] -; CHECK-NEXT: [[TMP85]] = select i1 [[TMP84]], float [[TMP83]], float [[TMP21]] -; CHECK-NEXT: br label %[[BB24]] +; CHECK-NEXT: [[TMP84:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) +; CHECK-NEXT: [[TMP85:%.*]] = fcmp oeq float [[AX12]], [[AY13]] +; CHECK-NEXT: [[TMP93]] = select i1 [[TMP85]], float [[TMP84]], float [[TMP11]] +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY54]]: ; CHECK-NEXT: [[NB_IV56:%.*]] = phi i32 [ [[NB52]], %[[FREM_COMPUTE46]] ], [ [[NB_UPDATE64:%.*]], %[[FREM_LOOP_BODY54]] ] ; CHECK-NEXT: [[AX_LOOP_PHI57:%.*]] = phi float [ [[AX49]], %[[FREM_COMPUTE46]] ], [ [[AX_UPDATE63:%.*]], %[[FREM_LOOP_BODY54]] ] @@ -1163,15 +1163,15 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP71:%.*]] = fadd float [[AX69]], [[AY51]] ; CHECK-NEXT: [[AX72:%.*]] = select i1 [[CLT70]], float [[AXP71]], float [[AX69]] ; CHECK-NEXT: [[AX73:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX72]], i32 [[EY50]]) -; CHECK-NEXT: [[TMP93]] = call float @llvm.copysign.f32(float [[AX73]], float [[TMP21]]) -; CHECK-NEXT: br label %[[BB24]] +; CHECK-NEXT: [[TMP101]] = call float @llvm.copysign.f32(float [[AX73]], float [[TMP11]]) +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE77]]: -; CHECK-NEXT: [[TMP94:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX74]]) +; CHECK-NEXT: [[TMP94:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) ; CHECK-NEXT: [[TMP95:%.*]] = extractvalue { float, i32 } [[TMP94]], 0 ; CHECK-NEXT: [[TMP96:%.*]] = extractvalue { float, i32 } [[TMP94]], 1 ; CHECK-NEXT: [[EX79:%.*]] = sub i32 [[TMP96]], 1 ; CHECK-NEXT: [[AX80:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP95]], i32 12) -; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY75]]) +; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) ; CHECK-NEXT: [[TMP98:%.*]] = extractvalue { float, i32 } [[TMP97]], 0 ; CHECK-NEXT: [[TMP99:%.*]] = extractvalue { float, i32 } [[TMP97]], 1 ; CHECK-NEXT: [[EY81:%.*]] = sub i32 [[TMP99]], 1 @@ -1181,10 +1181,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP100:%.*]] = icmp sgt i32 [[NB83]], 12 ; CHECK-NEXT: br i1 [[TMP100]], label %[[FREM_LOOP_BODY85:.*]], label %[[FREM_LOOP_EXIT86]] ; CHECK: [[FREM_ELSE78]]: -; CHECK-NEXT: [[TMP101:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP31]]) -; CHECK-NEXT: [[TMP102:%.*]] = fcmp oeq float [[AX74]], [[AY75]] -; CHECK-NEXT: [[TMP103]] = select i1 [[TMP102]], float [[TMP101]], float [[TMP31]] -; CHECK-NEXT: br label %[[BB34]] +; CHECK-NEXT: [[TMP102:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) +; CHECK-NEXT: [[TMP103:%.*]] = fcmp oeq float [[AX]], [[AY]] +; CHECK-NEXT: [[TMP111]] = select i1 [[TMP103]], float [[TMP102]], float [[TMP1]] +; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY85]]: ; CHECK-NEXT: [[NB_IV87:%.*]] = phi i32 [ [[NB83]], %[[FREM_COMPUTE77]] ], [ [[NB_UPDATE95:%.*]], %[[FREM_LOOP_BODY85]] ] ; CHECK-NEXT: [[AX_LOOP_PHI88:%.*]] = phi float [ [[AX80]], %[[FREM_COMPUTE77]] ], [ [[AX_UPDATE94:%.*]], %[[FREM_LOOP_BODY85]] ] @@ -1213,8 +1213,8 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP102:%.*]] = fadd float [[AX100]], [[AY82]] ; CHECK-NEXT: [[AX103:%.*]] = select i1 [[CLT101]], float [[AXP102]], float [[AX100]] ; CHECK-NEXT: [[AX104:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX103]], i32 [[EY81]]) -; CHECK-NEXT: [[TMP111]] = call float @llvm.copysign.f32(float [[AX104]], float [[TMP31]]) -; CHECK-NEXT: br label %[[BB34]] +; CHECK-NEXT: [[TMP112]] = call float @llvm.copysign.f32(float [[AX104]], float [[TMP1]]) +; CHECK-NEXT: br label %[[BB4]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <4 x float>, ptr addrspace(1) %in2, i32 4 @@ -1236,9 +1236,9 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX:%.*]] = call double @llvm.fabs.f64(double [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call double @llvm.fabs.f64(double [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt double [[AX]], [[AY]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB4:.*]]: -; CHECK-NEXT: [[RET:%.*]] = phi double [ [[TMP37:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = phi double [ [[TMP56:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP55:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq double [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double 0x7FF8000000000000, double [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.fabs.f64(double [[TMP1]]) @@ -1250,9 +1250,9 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AX12:%.*]] = call double @llvm.fabs.f64(double [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call double @llvm.fabs.f64(double [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt double [[AX12]], [[AY13]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB14:.*]]: -; CHECK-NEXT: [[RET14:%.*]] = phi double [ [[TMP55:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP47:%.*]], %[[FREM_ELSE16]] ] +; CHECK-NEXT: [[RET14:%.*]] = phi double [ [[TMP45:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP37:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq double [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], double 0x7FF8000000000000, double [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fabs.f64(double [[TMP11]]) @@ -1262,12 +1262,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: store <2 x double> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: -; CHECK-NEXT: [[TMP20:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX]]) +; CHECK-NEXT: [[TMP20:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { double, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { double, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP21]], i32 26) -; CHECK-NEXT: [[TMP23:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY]]) +; CHECK-NEXT: [[TMP23:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY13]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { double, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { double, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 @@ -1277,10 +1277,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 26 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: -; CHECK-NEXT: [[TMP27:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP1]]) -; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq double [[AX]], [[AY]] -; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], double [[TMP27]], double [[TMP1]] -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP28:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP11]]) +; CHECK-NEXT: [[TMP29:%.*]] = fcmp oeq double [[AX12]], [[AY13]] +; CHECK-NEXT: [[TMP37]] = select i1 [[TMP29]], double [[TMP28]], double [[TMP11]] +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi double [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] @@ -1309,15 +1309,15 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP9:%.*]] = fadd double [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], double [[AXP9]], double [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX10]], i32 [[EY]]) -; CHECK-NEXT: [[TMP37]] = call double @llvm.copysign.f64(double [[AX11]], double [[TMP1]]) -; CHECK-NEXT: br label %[[BB4]] +; CHECK-NEXT: [[TMP45]] = call double @llvm.copysign.f64(double [[AX11]], double [[TMP11]]) +; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE15]]: -; CHECK-NEXT: [[TMP38:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX12]]) +; CHECK-NEXT: [[TMP38:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractvalue { double, i32 } [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { double, i32 } [[TMP38]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP40]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP39]], i32 26) -; CHECK-NEXT: [[TMP41:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY13]]) +; CHECK-NEXT: [[TMP41:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY]]) ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { double, i32 } [[TMP41]], 0 ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { double, i32 } [[TMP41]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP43]], 1 @@ -1327,10 +1327,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[NB21]], 26 ; CHECK-NEXT: br i1 [[TMP44]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: -; CHECK-NEXT: [[TMP45:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP11]]) -; CHECK-NEXT: [[TMP46:%.*]] = fcmp oeq double [[AX12]], [[AY13]] -; CHECK-NEXT: [[TMP47]] = select i1 [[TMP46]], double [[TMP45]], double [[TMP11]] -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP46:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP1]]) +; CHECK-NEXT: [[TMP47:%.*]] = fcmp oeq double [[AX]], [[AY]] +; CHECK-NEXT: [[TMP55]] = select i1 [[TMP47]], double [[TMP46]], double [[TMP1]] +; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi double [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] @@ -1359,8 +1359,8 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CHECK-NEXT: [[AXP40:%.*]] = fadd double [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], double [[AXP40]], double [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX41]], i32 [[EY19]]) -; CHECK-NEXT: [[TMP55]] = call double @llvm.copysign.f64(double [[AX42]], double [[TMP11]]) -; CHECK-NEXT: br label %[[BB14]] +; CHECK-NEXT: [[TMP56]] = call double @llvm.copysign.f64(double [[AX42]], double [[TMP1]]) +; CHECK-NEXT: br label %[[BB4]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x double>, ptr addrspace(1) %in2, i32 4