diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 70297e471a7a0a..590a961e02d048 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1095,17 +1095,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(BonusInst.getType(), - (NewBonusInst->getName() + ".merge").str()); - SSAUpdate.AddAvailableValue(BB, &BonusInst); - SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + // Note that we expect to be in a block-closed SSA form for this to work! for (Use &U : make_early_inc_range(BonusInst.uses())) { auto *UI = cast(U.getUser()); - if (UI->getParent() != PredBlock) - SSAUpdate.RewriteUseAfterInsertions(U); - else // Use is in the same block as, and comes before, NewBonusInst. - SSAUpdate.RewriteUse(U); + auto *PN = dyn_cast(UI); + if (!PN) { + assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && + "If the user is not a PHI node, then it should be in the same " + "block as, and come after, the original bonus instruction."); + continue; // Keep using the original bonus instruction. + } + // Is this the block-closed SSA form PHI node? + if (PN->getIncomingBlock(U) == BB) + continue; // Great, keep using the original bonus instruction. + // The only other alternative is an "use" when coming from + // the predecessor block - here we should refer to the cloned bonus instr. + assert(PN->getIncomingBlock(U) == PredBlock && + "Not in block-closed SSA form?"); + U.set(NewBonusInst); } } } @@ -3239,6 +3246,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // Early exits once we reach the limit. if (NumBonusInsts > BonusInstThreshold) return false; + + auto IsBCSSAUse = [BB, &I](Use &U) { + auto *UI = cast(U.getUser()); + if (auto *PN = dyn_cast(UI)) + return PN->getIncomingBlock(U) == BB; + return UI->getParent() == BB && I.comesBefore(UI); + }; + + // Does this instruction require rewriting of uses? + if (!all_of(I.uses(), IsBCSSAUse)) + return false; } // Ok, we have the budget. Perform the transformation. diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index 4af4ec0b885cdb..b62b5c723114c9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1054,7 +1054,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca ; CHECK-NEXT: cmp r3, #8 ; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: blo.w .LBB16_12 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %if.then ; CHECK-NEXT: lsrs.w r12, r3, #2 ; CHECK-NEXT: beq.w .LBB16_12 ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 58177a877338cb..6fa8637bd6d670 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1048,7 +1048,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: cmp r3, #8 ; CHECK-NEXT: blo.w .LBB16_12 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %if.then ; CHECK-NEXT: lsrs.w r12, r3, #2 ; CHECK-NEXT: beq.w .LBB16_12 ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index e9674d2ae66c1e..7594c18e51f328 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -207,14 +207,14 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: cmp r3, #4 ; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill ; CHECK-NEXT: bne .LBB2_8 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #56] +; CHECK-NEXT: ldr.w r11, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -223,57 +223,57 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsr.w r11, r0, #3 +; CHECK-NEXT: lsrs r0, r0, #3 ; CHECK-NEXT: b .LBB2_5 ; CHECK-NEXT: .LBB2_3: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB2_4: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #72] -; CHECK-NEXT: add.w r0, r8, r12 -; CHECK-NEXT: add r0, r6 -; CHECK-NEXT: add r0, r10 -; CHECK-NEXT: strb.w r0, [r1, r9] +; CHECK-NEXT: ldr r3, [sp, #72] +; CHECK-NEXT: add.w r1, r8, r10 +; CHECK-NEXT: add r1, r6 +; CHECK-NEXT: add r1, r12 +; CHECK-NEXT: strb.w r1, [r3, r9] ; CHECK-NEXT: add.w r9, r9, #1 ; CHECK-NEXT: cmp r9, r2 ; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: .LBB2_5: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB2_7 Depth 2 -; CHECK-NEXT: ldr r0, [sp, #68] -; CHECK-NEXT: ldr.w r10, [r0, r9, lsl #2] -; CHECK-NEXT: subs.w r0, r11, r11 +; CHECK-NEXT: ldr r1, [sp, #68] +; CHECK-NEXT: ldr.w r12, [r1, r9, lsl #2] +; CHECK-NEXT: subs r1, r0, r0 ; CHECK-NEXT: ble .LBB2_3 ; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r10 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mla r7, r9, r3, r1 +; CHECK-NEXT: ldr r7, [sp, #64] +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: dls lr, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mla r7, r9, r7, r3 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB2_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r4 +; CHECK-NEXT: vldrb.s16 q0, [r4], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r11 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r10, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r11 ; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r0], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r11 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vadd.i16 q1, q1, r11 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 ; CHECK-NEXT: le lr, .LBB2_7 ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_8: @ %if.end @@ -390,14 +390,14 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: cmp r3, #4 ; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill ; CHECK-NEXT: bne .LBB3_8 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq .LBB3_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #56] +; CHECK-NEXT: ldr.w r11, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -406,55 +406,55 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsr.w r11, r0, #3 +; CHECK-NEXT: lsrs r0, r0, #3 ; CHECK-NEXT: .LBB3_3: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB3_5 Depth 2 -; CHECK-NEXT: ldr r0, [sp, #68] -; CHECK-NEXT: ldr.w r10, [r0, r9, lsl #2] -; CHECK-NEXT: subs.w r0, r11, r11 +; CHECK-NEXT: ldr r1, [sp, #68] +; CHECK-NEXT: ldr.w r12, [r1, r9, lsl #2] +; CHECK-NEXT: subs r1, r0, r0 ; CHECK-NEXT: ble .LBB3_6 ; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r10 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mla r7, r9, r3, r1 +; CHECK-NEXT: ldr r7, [sp, #64] +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: dls lr, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mla r7, r9, r7, r3 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB3_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r4 +; CHECK-NEXT: vldrb.s16 q0, [r4], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r11 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r10, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r11 ; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r0], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r11 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vadd.i16 q1, q1, r11 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 ; CHECK-NEXT: le lr, .LBB3_5 ; CHECK-NEXT: b .LBB3_7 ; CHECK-NEXT: .LBB3_6: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB3_7: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #72] -; CHECK-NEXT: add.w r0, r8, r12 -; CHECK-NEXT: add r0, r6 -; CHECK-NEXT: add r0, r10 -; CHECK-NEXT: strb.w r0, [r1, r9] +; CHECK-NEXT: ldr r3, [sp, #72] +; CHECK-NEXT: add.w r1, r8, r10 +; CHECK-NEXT: add r1, r6 +; CHECK-NEXT: add r1, r12 +; CHECK-NEXT: strb.w r1, [r3, r9] ; CHECK-NEXT: add.w r9, r9, #1 ; CHECK-NEXT: cmp r9, r2 ; CHECK-NEXT: bne .LBB3_3 @@ -683,13 +683,13 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: cmp r3, #4 ; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill ; CHECK-NEXT: bne .LBB5_8 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph ; CHECK-NEXT: ldr r2, [sp, #92] -; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldr r4, [sp, #76] ; CHECK-NEXT: add.w r0, r1, r2, lsl #1 @@ -703,20 +703,20 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: lsrs r1, r0, #3 ; CHECK-NEXT: b .LBB5_5 ; CHECK-NEXT: .LBB5_3: @ in Loop: Header=BB5_5 Depth=1 -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB5_4: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1 -; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add.w r0, r8, r10 ; CHECK-NEXT: ldr r1, [sp, #100] ; CHECK-NEXT: add r0, r6 -; CHECK-NEXT: add r0, r10 -; CHECK-NEXT: strb.w r0, [r1, r9] -; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: add r0, r12 +; CHECK-NEXT: strb.w r0, [r1, r11] +; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: cmp r9, r0 +; CHECK-NEXT: cmp r11, r0 ; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: .LBB5_5: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -724,36 +724,35 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: ldr r0, [sp, #96] ; CHECK-NEXT: cmp r1, r1 ; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: ldr.w r10, [r0, r9, lsl #2] +; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2] ; CHECK-NEXT: bge .LBB5_3 ; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp, #92] ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mla r3, r9, lr, r0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mla r3, r11, lr, r0 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: ldm.w sp, {r0, r5, r7} @ 12-byte Folded Reload ; CHECK-NEXT: dlstp.16 lr, lr ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vldrb.s16 q0, [r0], #8 ; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r3], #8 -; CHECK-NEXT: vmlava.s16 r10, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r7], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r8, q0, q1 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 ; CHECK-NEXT: letp lr, .LBB5_7 ; CHECK-NEXT: b .LBB5_4 ; CHECK-NEXT: .LBB5_8: @ %if.end @@ -873,13 +872,13 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: cmp r3, #4 ; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill ; CHECK-NEXT: bne .LBB6_8 -; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB6_8 ; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph ; CHECK-NEXT: ldr r2, [sp, #92] -; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldr r4, [sp, #76] ; CHECK-NEXT: add.w r0, r1, r2, lsl #1 @@ -897,53 +896,52 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: ldr r0, [sp, #96] ; CHECK-NEXT: cmp r1, r1 ; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: ldr.w r10, [r0, r9, lsl #2] +; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2] ; CHECK-NEXT: bge .LBB6_6 ; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp, #92] ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mla r3, r9, lr, r0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mla r3, r11, lr, r0 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: ldm.w sp, {r0, r5, r7} @ 12-byte Folded Reload ; CHECK-NEXT: dlstp.16 lr, lr ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vldrb.s16 q0, [r0], #8 ; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r3], #8 -; CHECK-NEXT: vmlava.s16 r10, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r7], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 ; CHECK-NEXT: vadd.i16 q1, q1, r4 -; CHECK-NEXT: vmlava.s16 r8, q0, q1 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 ; CHECK-NEXT: letp lr, .LBB6_5 ; CHECK-NEXT: b .LBB6_7 ; CHECK-NEXT: .LBB6_6: @ in Loop: Header=BB6_3 Depth=1 -; CHECK-NEXT: mov r8, r10 -; CHECK-NEXT: mov r12, r10 -; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1 -; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add.w r0, r8, r10 ; CHECK-NEXT: ldr r1, [sp, #100] ; CHECK-NEXT: add r0, r6 -; CHECK-NEXT: add r0, r10 -; CHECK-NEXT: strb.w r0, [r1, r9] -; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: add r0, r12 +; CHECK-NEXT: strb.w r0, [r1, r11] +; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: cmp r9, r0 +; CHECK-NEXT: cmp r11, r0 ; CHECK-NEXT: bne .LBB6_3 ; CHECK-NEXT: .LBB6_8: @ %if.end ; CHECK-NEXT: ldr r0, [sp, #100] diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll index bcfecbf228dc8e..fa39b77aae36a0 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll @@ -7,13 +7,16 @@ define void @basic(i32 %K, i32 %N) { ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[OUTER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[CMP_INNER_PEEL8:%.*]] = icmp sgt i32 [[K:%.*]], 3 +; CHECK-NEXT: [[CMP_INNER_PEEL:%.*]] = icmp sgt i32 [[K:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP_INNER_PEEL]], label [[INNER_PEEL2:%.*]], label [[OUTER_BACKEDGE]] +; CHECK: inner.peel2: +; CHECK-NEXT: [[CMP_INNER_PEEL8:%.*]] = icmp sgt i32 [[K]], 3 ; CHECK-NEXT: br i1 [[CMP_INNER_PEEL8]], label [[INNER:%.*]], label [[OUTER_BACKEDGE]] ; CHECK: inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_INC:%.*]], [[INNER]] ], [ 3, [[OUTER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_INC:%.*]], [[INNER]] ], [ 3, [[INNER_PEEL2]] ] ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[CMP_INNER:%.*]] = icmp slt i32 [[J_INC]], [[K]] -; CHECK-NEXT: br i1 [[CMP_INNER]], label [[INNER]], label [[OUTER_BACKEDGE]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP_INNER]], label [[INNER]], label [[OUTER_BACKEDGE]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: outer.backedge: ; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP_OUTER:%.*]] = icmp slt i32 [[I_INC]], [[N:%.*]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index 6130a762b33e19..46cf58e4864260 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -90,24 +90,33 @@ return: define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 -; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]] -; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]] -; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]] -; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP4]], label [[RETURN]], label [[LOR_LHS_FALSE6:%.*]] +; CHECK: lor.lhs.false6: +; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP9]], label [[RETURN]], label [[LOR_LHS_FALSE11:%.*]] +; CHECK: lor.lhs.false11: +; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 +; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 +; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP14]], i1 true, i1 [[CMP19]] +; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP24]] +; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP29]] +; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP34]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND3]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float [ 0.000000e+00, [[LOR_LHS_FALSE6]] ], [ 0.000000e+00, [[LOR_LHS_FALSE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[LOR_LHS_FALSE11]] ] ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: @@ -261,24 +270,33 @@ return: define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_separate_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 -; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP18]] -; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]] -; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]] -; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP4]], label [[RETURN]], label [[LOR_LHS_FALSE6:%.*]] +; CHECK: lor.lhs.false6: +; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP9]], label [[RETURN]], label [[LOR_LHS_FALSE11:%.*]] +; CHECK: lor.lhs.false11: +; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 +; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 +; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP14]], i1 true, i1 [[CMP18]] +; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP23]] +; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP28]] +; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP33]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND3]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float [ 0.000000e+00, [[LOR_LHS_FALSE6]] ], [ 0.000000e+00, [[LOR_LHS_FALSE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[LOR_LHS_FALSE11]] ] ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index 2ff04182607716..0b388e78661168 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -324,10 +324,11 @@ define void @one_pred_with_extra_op_liveout(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_liveout( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1_ADJ]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: final_left: @@ -357,10 +358,11 @@ define void @one_pred_with_extra_op_liveout_multiuse(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_liveout_multiuse( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1_ADJ]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: final_left: @@ -396,10 +398,11 @@ define void @one_pred_with_extra_op_liveout_distant_phi(i8 %v0, i8 %v1) { ; CHECK-NEXT: br i1 [[C0]], label [[PRED:%.*]], label [[LEFT_END:%.*]] ; CHECK: pred: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[V2_ADJ:%.*]] = add i8 [[V0]], [[V1]] ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2_ADJ]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C2]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: call void @use8(i8 [[V2_ADJ]]) @@ -554,10 +557,11 @@ define void @one_pred_with_extra_op_eexternally_used_only(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_eexternally_used_only( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: final_left: @@ -587,10 +591,11 @@ define void @one_pred_with_extra_op_externally_used_only_multiuse(i8 %v0, i8 %v1 ; CHECK-LABEL: @one_pred_with_extra_op_externally_used_only_multiuse( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: final_left: @@ -735,10 +740,11 @@ define void @one_pred_with_extra_op_externally_used_only_after_cond_distant_phi( ; CHECK-NEXT: br i1 [[C0]], label [[PRED:%.*]], label [[LEFT_END:%.*]] ; CHECK: pred: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[V2_ADJ:%.*]] = add i8 [[V4:%.*]], [[V5:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C1]], i1 [[C3]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: br i1 [[C3]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: call void @use8(i8 [[V2_ADJ]]) @@ -834,22 +840,17 @@ define void @pr48450() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: [[DEC_OLD:%.*]] = add i8 [[COUNTDOWN]], -1 -; CHECK-NEXT: [[CMP_NOT_OLD:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT_OLD]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODYTHREAD_PRE_SPLIT]] +; CHECK-NEXT: [[DEC]] = add i8 [[COUNTDOWN]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: if.then: ; CHECK-NEXT: [[C2:%.*]] = call i1 @gen1() -; CHECK-NEXT: [[C2_NOT:%.*]] = xor i1 [[C2]], true -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[COUNTDOWN]], -1 -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] +; CHECK-NEXT: br i1 [[C2]], label [[FOR_INC]], label [[IF_END_LOOPEXIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC]], [[IF_THEN]] ], [ [[DEC_OLD]], [[FOR_INC]] ] ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: if.end.loopexit: @@ -885,23 +886,18 @@ define void @pr48450_2(i1 %enable_loopback) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: [[DEC_OLD:%.*]] = add i8 [[COUNTDOWN]], -1 -; CHECK-NEXT: [[CMP_NOT_OLD:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT_OLD]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODYTHREAD_PRE_SPLIT]] +; CHECK-NEXT: [[DEC]] = add i8 [[COUNTDOWN]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: if.then: ; CHECK-NEXT: [[C2:%.*]] = call i1 @gen1() -; CHECK-NEXT: [[C2_NOT:%.*]] = xor i1 [[C2]], true -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[COUNTDOWN]], -1 -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[COUNTDOWN]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] +; CHECK-NEXT: br i1 [[C2]], label [[FOR_INC]], label [[IF_END_LOOPEXIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC_MERGE]], [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ], [ [[DEC]], [[IF_THEN]] ] -; CHECK-NEXT: [[SHOULD_LOOPBACK:%.*]] = phi i1 [ true, [[FOR_INC]] ], [ false, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK]] ], [ true, [[IF_THEN]] ] +; CHECK-NEXT: [[SHOULD_LOOPBACK:%.*]] = phi i1 [ true, [[FOR_INC]] ], [ false, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ] ; CHECK-NEXT: [[DO_LOOPBACK:%.*]] = and i1 [[SHOULD_LOOPBACK]], [[ENABLE_LOOPBACK:%.*]] ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: br i1 [[DO_LOOPBACK]], label [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK]], label [[FOR_BODY]] @@ -1001,16 +997,13 @@ cleanup: define void @pr49510() { ; CHECK-LABEL: @pr49510( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTOLD:%.*]] = load i16, i16* @global_pr49510, align 1 -; CHECK-NEXT: [[TOBOOL_OLD:%.*]] = icmp ne i16 [[DOTOLD]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_OLD]], label [[LAND_RHS:%.*]], label [[FOR_END:%.*]] -; CHECK: land.rhs: -; CHECK-NEXT: [[DOTMERGE:%.*]] = phi i16 [ [[TMP0:%.*]], [[LAND_RHS]] ], [ [[DOTOLD]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[DOTMERGE]], 0 -; CHECK-NEXT: [[TMP0]] = load i16, i16* @global_pr49510, align 1 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @global_pr49510, align 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[TMP0]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[TOBOOL]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[LAND_RHS]], label [[FOR_END]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP0]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[TOBOOL]], [[CMP]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -1039,19 +1032,17 @@ for.end: define i32 @pr51125() { ; CHECK-LABEL: @pr51125( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD_OLD:%.*]] = load i32, i32* @global_pr51125, align 4 -; CHECK-NEXT: [[ISZERO_OLD:%.*]] = icmp eq i32 [[LD_OLD]], 0 -; CHECK-NEXT: br i1 [[ISZERO_OLD]], label [[EXIT:%.*]], label [[L2:%.*]] +; CHECK-NEXT: br label [[L:%.*]] +; CHECK: L: +; CHECK-NEXT: [[LD:%.*]] = load i32, i32* @global_pr51125, align 4 +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[LD]], 0 +; CHECK-NEXT: br i1 [[ISZERO]], label [[EXIT:%.*]], label [[L2:%.*]] ; CHECK: L2: -; CHECK-NEXT: [[LD_MERGE:%.*]] = phi i32 [ [[LD:%.*]], [[L2]] ], [ [[LD_OLD]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 -1, i32* @global_pr51125, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LD_MERGE]], -1 -; CHECK-NEXT: [[LD]] = load i32, i32* @global_pr51125, align 4 -; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[LD]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[ISZERO]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT]], label [[L2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[L]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD]], [[L2]] ], [ [[LD_OLD]], [[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD]], [[L2]] ], [ [[LD]], [[L]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: