diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll index dc23f51987635..d6f99c67a01ff 100644 --- a/llvm/test/CodeGen/AArch64/pr166870.ll +++ b/llvm/test/CodeGen/AArch64/pr166870.ll @@ -1,68 +1,99 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -O3 < %s -mtriple=aarch64 | FileCheck %s -; The seemingly redundant mov where src_reg == dst_reg shouldn't be removed, -; because it has the effect of zeroing the upper bits in x8. +; The seemingly redundant wreg mov where src_reg == dst_reg shouldn't be +; removed, because it has the effect of zeroing the upper bits in the matching +; xreg. -define i32 @ham(i32 %arg, i1 %arg1, i1 %arg2, ptr %arg3) nounwind { -; CHECK-LABEL: ham: +define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwind { +; CHECK-LABEL: widget: ; CHECK: // %bb.0: // %bb -; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: tbnz w1, #0, .LBB0_3 -; CHECK-NEXT: // %bb.1: // %bb4 -; CHECK-NEXT: tbnz w2, #0, .LBB0_3 -; CHECK-NEXT: // %bb.2: // %bb5 -; CHECK-NEXT: mov x19, x3 -; CHECK-NEXT: mov w21, w1 -; CHECK-NEXT: mov w20, w0 -; CHECK-NEXT: bl zot -; CHECK-NEXT: tbz w21, #0, .LBB0_4 -; CHECK-NEXT: .LBB0_3: // %bb6 -; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: tbz w2, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %bb5 +; CHECK-NEXT: tbz w4, #0, .LBB0_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w0, #0 // =0x0 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: mov w8, w20 -; CHECK-NEXT: mov w20, wzr +; CHECK-NEXT: .LBB0_4: // %bb6 +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: mov x20, x0 +; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: bl baz +; CHECK-NEXT: mov w0, #0 // =0x0 +; CHECK-NEXT: cbnz wzr, .LBB0_11 +; CHECK-NEXT: // %bb.5: // %bb6 +; CHECK-NEXT: mov w10, #1 // =0x1 +; CHECK-NEXT: cbnz w10, .LBB0_11 +; CHECK-NEXT: // %bb.6: // %bb7 +; CHECK-NEXT: cbnz w10, .LBB0_10 +; CHECK-NEXT: // %bb.7: // %bb8 +; CHECK-NEXT: mov x8, x21 +; CHECK-NEXT: mov x9, x20 +; CHECK-NEXT: mov w20, #0 // =0x0 +; CHECK-NEXT: mov w9, w9 +; CHECK-NEXT: mov x21, x9 ; CHECK-NEXT: mov w8, w8 -; CHECK-NEXT: mov w21, w8 -; CHECK-NEXT: .LBB0_5: // %bb7 +; CHECK-NEXT: mov x22, x8 +; CHECK-NEXT: .LBB0_8: // %bb10 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: strb w20, [x19] -; CHECK-NEXT: cbnz x21, .LBB0_5 -; CHECK-NEXT: // %bb.6: // %bb8 -; CHECK-NEXT: // in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: bl quux -; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: cbnz x21, .LBB0_8 +; CHECK-NEXT: // %bb.9: // %bb12 +; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=1 +; CHECK-NEXT: bl snork +; CHECK-NEXT: cbnz x22, .LBB0_8 +; CHECK-NEXT: .LBB0_10: +; CHECK-NEXT: mov w0, #0 // =0x0 +; CHECK-NEXT: .LBB0_11: +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret bb: - br i1 %arg1, label %bb6, label %bb4 - -bb4: - %load = load ptr, ptr null, align 8 - br i1 %arg2, label %bb6, label %bb5 + br i1 %arg2, label %bb14, label %bb5 bb5: - %call = call i32 @zot() #0 - %zext = zext i32 %arg to i64 - br i1 %arg1, label %bb6, label %bb7 + %load = load ptr, ptr null, align 8 + br i1 %arg4, label %bb14, label %bb6 bb6: - ret i32 0 + %call = call i32 @baz() #1 + %or = or i1 false, true + br i1 %or, label %bb14, label %bb7 bb7: - store i8 0, ptr %arg3, align 1 - %icmp = icmp eq i64 %zext, 0 - br i1 %icmp, label %bb8, label %bb7 + %icmp = icmp eq i32 0, 0 + %zext = zext i32 %arg to i64 + br i1 %icmp, label %bb14, label %bb8 bb8: - call void @quux() - br label %bb7 + %zext9 = zext i32 %arg1 to i64 + br label %bb10 + +bb10: + store i8 0, ptr %arg3, align 1 + %icmp11 = icmp eq i64 %zext, 0 + br i1 %icmp11, label %bb12, label %bb10 + +bb12: + call void @snork() + %icmp13 = icmp eq i64 0, %zext9 + br i1 %icmp13, label %bb14, label %bb10 + +bb14: + %phi = phi i32 [ 0, %bb6 ], [ 0, %bb7 ], [ 0, %bb12 ], [ 1, %bb ], [ 0, %bb5 ] + ret i32 %phi } -declare i32 @zot() +declare i32 @baz() -declare void @quux() +declare void @snork() -attributes #0 = { returns_twice } +attributes #0 = { "target-cpu"="apple-m1" } +attributes #1 = { returns_twice }