diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 317959c0342f7..66a2297bde0be 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5584,7 +5584,8 @@ class t2LOL // Setup for the iteration count of a WLS. See t2WhileLoopSetup. def arm_wlssetup : SDNode<"ARMISD::WLSSETUP", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>, + [SDNPSideEffect]>; // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart def arm_wls : SDNode<"ARMISD::WLS", @@ -5668,6 +5669,7 @@ def t2DoLoopStartTP : // t2WhileLoopSetup to setup LR and t2WhileLoopStart to perform the branch. Not // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). +let hasSideEffects = 1 in def t2WhileLoopSetup : t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, [(set i32:$lr, (arm_wlssetup i32:$tc))]>; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/pr168209.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/pr168209.ll new file mode 100644 index 0000000000000..a6dded12c064b --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/pr168209.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s + +; Checks that t2WhileLoopSetup is not CSEd. + +define i32 @test(i16 %arg) { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: wls lr, r0, .LBB0_4 +; CHECK-NEXT: .LBB0_1: @ %bb3 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %bb2 +; CHECK-NEXT: wls lr, r0, .LBB0_4 +; CHECK-NEXT: .LBB0_3: @ %bb7 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: le lr, .LBB0_3 +; CHECK-NEXT: .LBB0_4: @ %.critedge +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop {r7, pc} +bb: + %i = zext i16 %arg to i32 + %i1 = icmp eq i16 %arg, 0 + br i1 %i1, label %.critedge, label %bb3 + +bb2: ; preds = %bb3 + br i1 %i1, label %.critedge, label %bb7 + +bb3: ; preds = %bb3, %bb + %i4 = phi i32 [ %i5, %bb3 ], [ 0, %bb ] + %i5 = add i32 %i4, 1 + %i6 = icmp eq i32 %i5, %i + br i1 %i6, label %bb2, label %bb3 + +bb7: ; preds = %bb7, %bb2 + %i8 = phi i32 [ %i9, %bb7 ], [ 0, %bb2 ] + %i9 = add i32 %i8, 1 + %i10 = icmp eq i32 %i9, %i + br i1 %i10, label %.critedge, label %bb7 + +.critedge: ; preds = %bb7, %bb2, %bb + ret i32 0 +}