diff --git a/llvm/test/CodeGen/AArch64/neg-imm.ll b/llvm/test/CodeGen/AArch64/neg-imm.ll index 7bf08b0f36188..70948fdb8c699 100644 --- a/llvm/test/CodeGen/AArch64/neg-imm.ll +++ b/llvm/test/CodeGen/AArch64/neg-imm.ll @@ -1,14 +1,41 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; LSR used to pick a sub-optimal solution due to the target responding ; conservatively to isLegalAddImmediate for negative values. declare void @foo(i32) define void @test(i32 %px) { -; CHECK_LABEL: test: -; CHECK_LABEL: %entry -; CHECK: subs [[REG0:w[0-9]+]], -; CHECK: csel {{w[0-9]+}}, wzr, [[REG0]] +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: subs w8, w0, #1 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: csel w20, wzr, w8, lt +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_1: // %for.inc +; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: cmp w20, w19 +; CHECK-NEXT: add w20, w20, #1 +; CHECK-NEXT: b.gt .LBB0_4 +; CHECK-NEXT: .LBB0_2: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmp w19, w20 +; CHECK-NEXT: b.eq .LBB0_1 +; CHECK-NEXT: // %bb.3: // %if.then3 +; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: bl foo +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %sub = add nsw i32 %px, -1 %cmp = icmp slt i32 %px, 1 @@ -16,10 +43,6 @@ entry: br label %for.body for.body: -; CHECK_LABEL: %for.body -; CHECK: cmp -; CHECK-NEXT: b.eq -; CHECK-LABEL: %if.then3 %x.015 = phi i32 [ %inc, %for.inc ], [ %.sub, %entry ] %cmp2 = icmp eq i32 %x.015, %px br i1 %cmp2, label %for.inc, label %if.then3 @@ -29,11 +52,6 @@ if.then3: br label %for.inc for.inc: -; CHECK_LABEL: %for.inc -; CHECK: cmp -; CHECK-NEXT: add -; CHECK-NEXT: b.le -; CHECK_LABEL: %for.cond.cleanup %inc = add nsw i32 %x.015, 1 %cmp1 = icmp sgt i32 %x.015, %px br i1 %cmp1, label %for.cond.cleanup.loopexit, label %for.body diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 7882f5189413b..0c8323475b516 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -1,249 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @movi8b() { ; CHECK-LABEL: movi8b: -; CHECK: movi {{v[0-9]+}}.8b, #{{0x8|8}} +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8b, #8 +; CHECK-NEXT: ret ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <16 x i8> @movi16b() { ; CHECK-LABEL: movi16b: -; CHECK: movi {{v[0-9]+}}.16b, #{{0x8|8}} +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.16b, #8 +; CHECK-NEXT: ret ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <2 x i32> @movi2s_lsl0() { ; CHECK-LABEL: movi2s_lsl0: -; CHECK: movi {{d[0-9]+}}, #0x0000ff000000ff +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0x0000ff000000ff +; CHECK-NEXT: ret ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { ; CHECK-LABEL: movi2s_lsl8: -; CHECK: movi {{d[0-9]+}}, #0x00ff000000ff00 +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0x00ff000000ff00 +; CHECK-NEXT: ret ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { ; CHECK-LABEL: movi2s_lsl16: -; CHECK: movi {{d[0-9]+}}, #0xff000000ff0000 +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xff000000ff0000 +; CHECK-NEXT: ret ret <2 x i32> < i32 16711680, i32 16711680 > } define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: -; CHECK: movi {{d[0-9]+}}, #0xff000000ff000000 +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xff000000ff000000 +; CHECK-NEXT: ret ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { ; CHECK-LABEL: movi4s_lsl0: -; CHECK: movi {{v[0-9]+}}.2d, #0x0000ff000000ff +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff +; CHECK-NEXT: ret ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { ; CHECK-LABEL: movi4s_lsl8: -; CHECK: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0x00ff000000ff00 +; CHECK-NEXT: ret ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { ; CHECK-LABEL: movi4s_lsl16: -; CHECK: movi {{v[0-9]+}}.2d, #0xff000000ff0000 +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xff000000ff0000 +; CHECK-NEXT: ret ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > } define <4 x i32> @movi4s_lsl24() { ; CHECK-LABEL: movi4s_lsl24: -; CHECK: movi {{v[0-9]+}}.2d, #0xff000000ff000000 +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xff000000ff000000 +; CHECK-NEXT: ret ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { ; CHECK-LABEL: movi4h_lsl0: -; CHECK: movi {{d[0-9]+}}, #0xff00ff00ff00ff +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xff00ff00ff00ff +; CHECK-NEXT: ret ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { ; CHECK-LABEL: movi4h_lsl8: -; CHECK: movi d0, #0xff00ff00ff00ff00 +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xff00ff00ff00ff00 +; CHECK-NEXT: ret ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: -; CHECK: movi v0.2d, #0xff00ff00ff00ff +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xff00ff00ff00ff +; CHECK-NEXT: ret ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { ; CHECK-LABEL: movi8h_lsl8: -; CHECK: movi v0.2d, #0xff00ff00ff00ff00 +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xff00ff00ff00ff00 +; CHECK-NEXT: ret ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } define <2 x i32> @mvni2s_lsl0() { ; CHECK-LABEL: mvni2s_lsl0: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}} +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16 +; CHECK-NEXT: ret ret <2 x i32> < i32 4294967279, i32 4294967279 > } define <2 x i32> @mvni2s_lsl8() { ; CHECK-LABEL: mvni2s_lsl8: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16, lsl #8 +; CHECK-NEXT: ret ret <2 x i32> < i32 4294963199, i32 4294963199 > } define <2 x i32> @mvni2s_lsl16() { ; CHECK-LABEL: mvni2s_lsl16: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16, lsl #16 +; CHECK-NEXT: ret ret <2 x i32> < i32 4293918719, i32 4293918719 > } define <2 x i32> @mvni2s_lsl24() { ; CHECK-LABEL: mvni2s_lsl24: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16, lsl #24 +; CHECK-NEXT: ret ret <2 x i32> < i32 4026531839, i32 4026531839 > } define <4 x i32> @mvni4s_lsl0() { ; CHECK-LABEL: mvni4s_lsl0: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}} +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16 +; CHECK-NEXT: ret ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > } define <4 x i32> @mvni4s_lsl8() { ; CHECK-LABEL: mvni4s_lsl8: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16, lsl #8 +; CHECK-NEXT: ret ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > } define <4 x i32> @mvni4s_lsl16() { ; CHECK-LABEL: mvni4s_lsl16: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16, lsl #16 +; CHECK-NEXT: ret ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > } define <4 x i32> @mvni4s_lsl24() { ; CHECK-LABEL: mvni4s_lsl24: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16, lsl #24 +; CHECK-NEXT: ret ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 > } define <4 x i16> @mvni4h_lsl0() { ; CHECK-LABEL: mvni4h_lsl0: -; CHECK: mvni {{v[0-9]+}}.4h, #{{0x10|16}} +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4h, #16 +; CHECK-NEXT: ret ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define <4 x i16> @mvni4h_lsl8() { ; CHECK-LABEL: mvni4h_lsl8: -; CHECK: mvni {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4h, #16, lsl #8 +; CHECK-NEXT: ret ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define <8 x i16> @mvni8h_lsl0() { ; CHECK-LABEL: mvni8h_lsl0: -; CHECK: mvni {{v[0-9]+}}.8h, #{{0x10|16}} +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.8h, #16 +; CHECK-NEXT: ret ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 > } define <8 x i16> @mvni8h_lsl8() { ; CHECK-LABEL: mvni8h_lsl8: -; CHECK: mvni {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.8h, #16, lsl #8 +; CHECK-NEXT: ret ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 > } define <2 x i32> @movi2s_msl8(<2 x i32> %a) { ; CHECK-LABEL: movi2s_msl8: -; CHECK: movi {{d[0-9]+}}, #0x00ffff0000ffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0x00ffff0000ffff +; CHECK-NEXT: ret ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { ; CHECK-LABEL: movi2s_msl16: -; CHECK: movi d0, #0xffffff00ffffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xffffff00ffffff +; CHECK-NEXT: ret ret <2 x i32> < i32 16777215, i32 16777215 > } define <4 x i32> @movi4s_msl8() { ; CHECK-LABEL: movi4s_msl8: -; CHECK: movi v0.2d, #0x00ffff0000ffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: ret ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { ; CHECK-LABEL: movi4s_msl16: -; CHECK: movi v0.2d, #0xffffff00ffffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xffffff00ffffff +; CHECK-NEXT: ret ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } define <2 x i32> @mvni2s_msl8() { ; CHECK-LABEL: mvni2s_msl8: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16, msl #8 +; CHECK-NEXT: ret ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264> } define <2 x i32> @mvni2s_msl16() { ; CHECK-LABEL: mvni2s_msl16: -; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.2s, #16, msl #16 +; CHECK-NEXT: ret ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504> } define <4 x i32> @mvni4s_msl8() { ; CHECK-LABEL: mvni4s_msl8: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #8 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16, msl #8 +; CHECK-NEXT: ret ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264> } define <4 x i32> @mvni4s_msl16() { ; CHECK-LABEL: mvni4s_msl16: -; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v0.4s, #16, msl #16 +; CHECK-NEXT: ret ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504> } define <2 x i64> @movi2d() { ; CHECK-LABEL: movi2d: -; CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xff0000ff0000ffff +; CHECK-NEXT: ret ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } define <1 x i64> @movid() { ; CHECK-LABEL: movid: -; CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0xff0000ff0000ffff +; CHECK-NEXT: ret ret <1 x i64> < i64 18374687574888349695 > } define <2 x float> @fmov2s() { ; CHECK-LABEL: fmov2s: -; CHECK: fmov {{v[0-9]+}}.2s, #{{-12.00000000|-1.200000e\+01}} +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.2s, #-12.00000000 +; CHECK-NEXT: ret ret <2 x float> < float -1.2e1, float -1.2e1> } define <4 x float> @fmov4s() { ; CHECK-LABEL: fmov4s: -; CHECK: fmov {{v[0-9]+}}.4s, #{{-12.00000000|-1.200000e\+01}} +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.4s, #-12.00000000 +; CHECK-NEXT: ret ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1> } define <2 x double> @fmov2d() { ; CHECK-LABEL: fmov2d: -; CHECK: fmov {{v[0-9]+}}.2d, #{{-12.00000000|-1.200000e\+01}} +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.2d, #-12.00000000 +; CHECK-NEXT: ret ret <2 x double> < double -1.2e1, double -1.2e1> } define <2 x i32> @movi1d_1() { ; CHECK-LABEL: movi1d_1: -; CHECK: movi d0, #0x{{0*}}ffffffff0000 +; CHECK: // %bb.0: +; CHECK-NEXT: movi d0, #0x00ffffffff0000 +; CHECK-NEXT: ret ret <2 x i32> < i32 -65536, i32 65535> } @@ -251,9 +332,11 @@ define <2 x i32> @movi1d_1() { declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) define <2 x i32> @movi1d() { ; CHECK-LABEL: movi1d: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] -; CHECK-NEXT: movi d1, #0x{{0*}}ffffffff0000 +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI40_0 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI40_0] +; CHECK-NEXT: movi d1, #0x00ffffffff0000 +; CHECK-NEXT: b test_movi1d %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) ret <2 x i32> %1 } diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll index 241879ad5d5d4..c4fddba58be63 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -1,117 +1,137 @@ -; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { ; CHECK-LABEL: test_stnp_v4i64: -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-NEXT: ret store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 { ; CHECK-LABEL: test_stnp_v4i32: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0] +; CHECK-NEXT: ret store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 { ; CHECK-LABEL: test_stnp_v8i16: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0] +; CHECK-NEXT: ret store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 { ; CHECK-LABEL: test_stnp_v16i8: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0] +; CHECK-NEXT: ret store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 { ; CHECK-LABEL: test_stnp_v2i32: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 { ; CHECK-LABEL: test_stnp_v4i16: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 { ; CHECK-LABEL: test_stnp_v8i8: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v2f64: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0] +; CHECK-NEXT: ret store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0] +; CHECK-NEXT: ret store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v1f64: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 { ; CHECK-LABEL: test_stnp_v1i64: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0] +; CHECK-NEXT: ret store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0 ret void } define void @test_stnp_i64(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64: -; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 -; CHECK-NEXT: stnp w1, w[[HI]], [x0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x8, x1, #32 +; CHECK-NEXT: stnp w1, w8, [x0] +; CHECK-NEXT: ret store i64 %v, i64* %p, align 1, !nontemporal !0 ret void } @@ -119,9 +139,10 @@ define void @test_stnp_i64(i64* %p, i64 %v) #0 { define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v2f64_offset: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0, #16] +; CHECK-NEXT: ret %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 ret void @@ -129,9 +150,10 @@ define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v2f64_offset_neg: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0, #-16] +; CHECK-NEXT: ret %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 ret void @@ -139,9 +161,11 @@ define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0, #8] +; CHECK-NEXT: ret %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 ret void @@ -149,9 +173,11 @@ define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset_neg: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0, #-8] +; CHECK-NEXT: ret %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 ret void @@ -159,9 +185,10 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset: -; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 -; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x8, x1, #32 +; CHECK-NEXT: stnp w1, w8, [x0, #8] +; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 1 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 ret void @@ -169,9 +196,10 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset_neg: -; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 -; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x8, x1, #32 +; CHECK-NEXT: stnp w1, w8, [x0, #-8] +; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 -1 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 ret void @@ -179,10 +207,11 @@ define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4 -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 4 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -191,10 +220,11 @@ define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4 -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: sub x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -4 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -203,10 +233,11 @@ define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512 -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #512 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 512 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -215,9 +246,10 @@ define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_504: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0, #504] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 504 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -226,10 +258,11 @@ define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508 -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #508 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 508 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -238,10 +271,11 @@ define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520 -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: sub x8, x0, #520 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -520 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -250,9 +284,10 @@ define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_neg_512: -; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: stnp d0, d1, [x0, #-512] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -512 %tmp1 = bitcast i8* %tmp0 to <4 x float>* store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -262,10 +297,12 @@ define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 { define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256 -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 256 %tmp1 = bitcast i8* %tmp0 to <2 x float>* store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 @@ -274,9 +311,11 @@ define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset_252: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0, #252] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 252 %tmp1 = bitcast i8* %tmp0 to <2 x float>* store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 @@ -285,10 +324,12 @@ define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 { define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260 -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sub x8, x0, #260 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x8] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -260 %tmp1 = bitcast i8* %tmp0 to <2 x float>* store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 @@ -297,9 +338,11 @@ define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset_neg_256: -; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0[1] +; CHECK-NEXT: stnp s0, s1, [x0, #-256] +; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -256 %tmp1 = bitcast i8* %tmp0 to <2 x float>* store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 @@ -310,9 +353,16 @@ declare void @dummy(<4 x float>*) define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca: -; CHECK: stnp d0, d{{.*}}, [sp] -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: bl _dummy +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stnp d0, d1, [sp] +; CHECK-NEXT: bl _dummy +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %tmp0 = alloca <4 x float> store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0 call void @dummy(<4 x float>* %tmp0) @@ -321,9 +371,16 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: -; CHECK: stnp d0, d{{.*}}, [sp, #16] -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: bl _dummy +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov d1, v0[1] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stnp d0, d1, [sp, #16] +; CHECK-NEXT: bl _dummy +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %tmp0 = alloca <4 x float>, i32 2 %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 @@ -332,9 +389,9 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { } define void @test_stnp_v32i8(<32 x i8> %v, <32 x i8>* %ptr) { -; CHECK-LABEL: _test_stnp_v32i8: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v32i8: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -343,10 +400,10 @@ entry: } define void @test_stnp_v32i16(<32 x i16> %v, <32 x i16>* %ptr) { -; CHECK-LABEL: _test_stnp_v32i16: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v32i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -355,10 +412,10 @@ entry: } define void @test_stnp_v32f16(<32 x half> %v, <32 x half>* %ptr) { -; CHECK-LABEL: _test_stnp_v32f16: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v32f16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -367,10 +424,10 @@ entry: } define void @test_stnp_v16i32(<16 x i32> %v, <16 x i32>* %ptr) { -; CHECK-LABEL: _test_stnp_v16i32: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v16i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -379,10 +436,10 @@ entry: } define void @test_stnp_v16f32(<16 x float> %v, <16 x float>* %ptr) { -; CHECK-LABEL: _test_stnp_v16f32: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v16f32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -391,53 +448,61 @@ entry: } define void @test_stnp_v17f32(<17 x float> %v, <17 x float>* %ptr) { -; CHECK-LABEL: _test_stnp_v17f32: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ldr s16, [sp, #16] -; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: mov.s v4[1], v5[0] -; CHECK-NEXT: ldr s1, [sp] -; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ld1.s { v16 }[1], [x8] -; CHECK-NEXT: add x8, sp, #4 -; CHECK-NEXT: ld1.s { v1 }[1], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1.s { v16 }[2], [x8] -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: ld1.s { v1 }[2], [x8] -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: add x8, sp, #12 -; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: ldr s2, [sp, #32] -; CHECK-NEXT: mov.s v4[2], v6[0] -; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: mov.s v4[3], v7[0] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] -; CHECK-NEXT: ld1.s { v1 }[3], [x8] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] -; CHECK-NEXT: mov d0, v16[1] -; CHECK-NEXT: mov d3, v1[1] -; CHECK-NEXT: stnp d16, d0, [x0, #48] -; CHECK-NEXT: stnp d1, d3, [x0, #32] -; CHECK-NEXT: str s2, [x0, #64] -; CHECK-NEXT: ret +; CHECK-LABEL: test_stnp_v17f32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr s16, [sp, #16] +; CHECK-NEXT: mov.s v0[1], v1[0] +; CHECK-NEXT: ldr s1, [sp] +; CHECK-NEXT: add x8, sp, #20 +; CHECK-NEXT: ld1.s { v16 }[1], [x8] +; CHECK-NEXT: add x8, sp, #4 +; CHECK-NEXT: ld1.s { v1 }[1], [x8] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: ld1.s { v16 }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ld1.s { v1 }[2], [x8] +; CHECK-NEXT: add x8, sp, #28 +; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 +; CHECK-NEXT: ld1.s { v16 }[3], [x8] +; CHECK-NEXT: add x8, sp, #12 +; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 +; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 +; CHECK-NEXT: mov.s v4[1], v5[0] +; CHECK-NEXT: ld1.s { v1 }[3], [x8] +; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 +; CHECK-NEXT: mov.s v0[2], v2[0] +; CHECK-NEXT: ldr s2, [sp, #32] +; CHECK-NEXT: mov.s v4[2], v6[0] +; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 +; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 +; CHECK-NEXT: mov.s v0[3], v3[0] +; CHECK-NEXT: mov.s v4[3], v7[0] +; CHECK-NEXT: mov d3, v4[1] +; CHECK-NEXT: mov d5, v0[1] +; CHECK-NEXT: stnp d4, d3, [x0, #16] +; CHECK-NEXT: stnp d0, d5, [x0] +; CHECK-NEXT: mov d0, v16[1] +; CHECK-NEXT: mov d3, v1[1] +; CHECK-NEXT: stnp d16, d0, [x0, #48] +; CHECK-NEXT: stnp d1, d3, [x0, #32] +; CHECK-NEXT: str s2, [x0, #64] +; CHECK-NEXT: ret entry: store <17 x float> %v, <17 x float>* %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, <16 x i32>* %ptr) { -; CHECK-LABEL: _test_stnp_v16i32_invalid_offset: -; CHECK-NEXT: .cfi_startproc +; CHECK-LABEL: test_stnp_v16i32_invalid_offset: +; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov w8, #32000 ; CHECK-NEXT: mov w9, #32032 ; CHECK-NEXT: add x8, x0, x8 ; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: stnp q2, q3, [x9] -; CHECK-NEXT: stnp q0, q1, [x8] +; CHECK-NEXT: stnp q2, q3, [x9] +; CHECK-NEXT: stnp q0, q1, [x8] ; CHECK-NEXT: ret entry: @@ -447,12 +512,12 @@ entry: } define void @test_stnp_v16f64(<16 x double> %v, <16 x double>* %ptr) { -; CHECK-LABEL: _test_stnp_v16f64: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q6, q7, [x0, #96] -; CHECK-NEXT: stnp q4, q5, [x0, #64] -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v16f64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q6, q7, [x0, #96] +; CHECK-NEXT: stnp q4, q5, [x0, #64] +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: @@ -461,12 +526,12 @@ entry: } define void @test_stnp_v16i64(<16 x i64> %v, <16 x i64>* %ptr) { -; CHECK-LABEL: _test_stnp_v16i64: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: stnp q6, q7, [x0, #96] -; CHECK-NEXT: stnp q4, q5, [x0, #64] -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] +; CHECK-LABEL: test_stnp_v16i64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stnp q6, q7, [x0, #96] +; CHECK-NEXT: stnp q4, q5, [x0, #64] +; CHECK-NEXT: stnp q2, q3, [x0, #32] +; CHECK-NEXT: stnp q0, q1, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll index f7568adbd0f03..917c6e7fe19e1 100644 --- a/llvm/test/CodeGen/AArch64/select_fmf.ll +++ b/llvm/test/CodeGen/AArch64/select_fmf.ll @@ -1,16 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-- | FileCheck %s ; This test provides fmf coverage for DAG combining of selects -; CHECK-LABEL: select_select_fold_select_and -; CHECK: fminnm s1, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov -; CHECK-NEXT: fccmp s1, s0, #4, lt -; CHECK-NEXT: fcsel s2, s2, s0, gt - ; select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { +; CHECK-LABEL: select_select_fold_select_and: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fmaxnm s2, s0, s3 +; CHECK-NEXT: fmov s4, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #4, lt +; CHECK-NEXT: fcsel s2, s2, s0, gt +; CHECK-NEXT: fadd s1, s0, s4 +; CHECK-NEXT: fadd s4, s1, s2 +; CHECK-NEXT: fcmp s4, s1 +; CHECK-NEXT: b.le .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.then.i157.i.i +; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.end.i159.i.i +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #48844, lsl #16 +; CHECK-NEXT: mov w9, #13107 +; CHECK-NEXT: movk w9, #48819, lsl #16 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fadd s2, s3, s2 +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s0, s0, s2, gt +; CHECK-NEXT: ret %tmp21 = fcmp fast olt float %x, %y %tmp22 = select fast i1 %tmp21, float %x, float %y %tmp24 = fcmp fast ogt float %tmp22, %w @@ -39,16 +61,36 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i ret float %phi1 } -; CHECK-LABEL: select_select_fold_select_or -; CHECK: fcmp s1, s2 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov -; CHECK-NEXT: fccmp s1, s0, #0, ge -; CHECK-NEXT: fcsel s2, s0, s2, gt - ; select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { +; CHECK-LABEL: select_select_fold_select_or: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fmaxnm s2, s0, s3 +; CHECK-NEXT: fmov s4, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #0, ge +; CHECK-NEXT: fcsel s2, s0, s2, gt +; CHECK-NEXT: fadd s1, s0, s4 +; CHECK-NEXT: fadd s4, s1, s2 +; CHECK-NEXT: fcmp s4, s1 +; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.then.i157.i.i +; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %if.end.i159.i.i +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #48844, lsl #16 +; CHECK-NEXT: mov w9, #13107 +; CHECK-NEXT: movk w9, #48819, lsl #16 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fadd s2, s3, s2 +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s0, s0, s2, gt +; CHECK-NEXT: ret %tmp21 = fcmp fast olt float %x, %y %tmp22 = select fast i1 %tmp21, float %x, float %y %tmp24 = fcmp fast ogt float %tmp22, %w diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll index f2a2642f470f3..fc06a20c77325 100644 --- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -1,29 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { ; CHECK-LABEL: test_sitofp_fixed: +; CHECK: ; %bb.0: +; CHECK-NEXT: sshll2.2d v4, v0, #0 +; CHECK-NEXT: sshll2.2d v5, v1, #0 +; CHECK-NEXT: sshll2.2d v6, v2, #0 +; CHECK-NEXT: sshll2.2d v7, v3, #0 +; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v16, v1, #0 +; CHECK-NEXT: sshll.2d v17, v2, #0 +; CHECK-NEXT: sshll.2d v18, v3, #0 +; CHECK-NEXT: scvtf.2d v1, v4, #6 +; CHECK-NEXT: scvtf.2d v3, v5, #6 +; CHECK-NEXT: scvtf.2d v5, v6, #6 +; CHECK-NEXT: scvtf.2d v7, v7, #6 +; CHECK-NEXT: scvtf.2d v0, v0, #6 +; CHECK-NEXT: scvtf.2d v2, v16, #6 +; CHECK-NEXT: scvtf.2d v4, v17, #6 +; CHECK-NEXT: scvtf.2d v6, v18, #6 +; CHECK-NEXT: ret - ; First, extend each i32 to i64 -; CHECK-DAG: sshll2.2d [[BLOCK0_HI:v[0-9]+]], v0, #0 -; CHECK-DAG: sshll2.2d [[BLOCK1_HI:v[0-9]+]], v1, #0 -; CHECK-DAG: sshll2.2d [[BLOCK2_HI:v[0-9]+]], v2, #0 -; CHECK-DAG: sshll2.2d [[BLOCK3_HI:v[0-9]+]], v3, #0 -; CHECK-DAG: sshll.2d [[BLOCK0_LO:v[0-9]+]], v0, #0 -; CHECK-DAG: sshll.2d [[BLOCK1_LO:v[0-9]+]], v1, #0 -; CHECK-DAG: sshll.2d [[BLOCK2_LO:v[0-9]+]], v2, #0 -; CHECK-DAG: sshll.2d [[BLOCK3_LO:v[0-9]+]], v3, #0 - - ; Next, convert each to double. -; CHECK-DAG: scvtf.2d v0, [[BLOCK0_LO]] -; CHECK-DAG: scvtf.2d v1, [[BLOCK0_HI]] -; CHECK-DAG: scvtf.2d v2, [[BLOCK1_LO]] -; CHECK-DAG: scvtf.2d v3, [[BLOCK1_HI]] -; CHECK-DAG: scvtf.2d v4, [[BLOCK2_LO]] -; CHECK-DAG: scvtf.2d v5, [[BLOCK2_HI]] -; CHECK-DAG: scvtf.2d v6, [[BLOCK3_LO]] -; CHECK-DAG: scvtf.2d v7, [[BLOCK3_HI]] - -; CHECK: ret %flt = sitofp <16 x i32> %in to <16 x double> %res = fdiv <16 x double> %flt, ret <16 x double> %res @@ -32,11 +30,12 @@ define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { ; This one is small enough to satisfy isSimple, but still illegally large. define <4 x double> @test_sitofp_fixed_shortish(<4 x i64> %in) { ; CHECK-LABEL: test_sitofp_fixed_shortish: +; CHECK: ; %bb.0: +; CHECK-NEXT: scvtf.2d v0, v0, #6 +; CHECK-NEXT: scvtf.2d v1, v1, #6 +; CHECK-NEXT: ret -; CHECK-DAG: scvtf.2d v0, v0 -; CHECK-DAG: scvtf.2d v1, v1 -; CHECK: ret %flt = sitofp <4 x i64> %in to <4 x double> %res = fdiv <4 x double> %flt, ret <4 x double> %res diff --git a/llvm/test/CodeGen/AArch64/tst-br.ll b/llvm/test/CodeGen/AArch64/tst-br.ll index 5c9778c6ff83d..34979d99867c0 100644 --- a/llvm/test/CodeGen/AArch64/tst-br.ll +++ b/llvm/test/CodeGen/AArch64/tst-br.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The @@ -8,6 +9,30 @@ define i32 @test_tbz() { ; CHECK-LABEL: test_tbz: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr w8, [x8, _var32@PAGEOFF] +; CHECK-NEXT: tbz w8, #15, LBB0_5 +; CHECK-NEXT: ; %bb.1: ; %test1 +; CHECK-NEXT: tbz w8, #12, LBB0_5 +; CHECK-NEXT: ; %bb.2: ; %test2 +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, _var64@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr x8, [x8, _var64@PAGEOFF] +; CHECK-NEXT: tbz w8, #15, LBB0_5 +; CHECK-NEXT: ; %bb.3: ; %test3 +; CHECK-NEXT: tbz w8, #12, LBB0_5 +; CHECK-NEXT: ; %bb.4: ; %end2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_5: ; %end1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 %val = load i32, i32* @var32 %val64 = load i64, i64* @var64 @@ -15,34 +40,25 @@ define i32 @test_tbz() { %tbit0 = and i32 %val, 32768 %tst0 = icmp ne i32 %tbit0, 0 br i1 %tst0, label %test1, label %end1 -; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.?LBB0_[0-9]+]] test1: %tbit1 = and i32 %val, 4096 %tst1 = icmp ne i32 %tbit1, 0 br i1 %tst1, label %test2, label %end1 -; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]] test2: %tbit2 = and i64 %val64, 32768 %tst2 = icmp ne i64 %tbit2, 0 br i1 %tst2, label %test3, label %end1 -; CHECK: tbz {{[wx][0-9]+}}, #15, [[LBL_end1]] test3: %tbit3 = and i64 %val64, 4096 %tst3 = icmp ne i64 %tbit3, 0 br i1 %tst3, label %end2, label %end1 -; CHECK: tbz {{[wx][0-9]+}}, #12, [[LBL_end1]] end2: -; CHECK: mov w0, #1 -; CHECK-NEXT: ret ret i32 1 end1: -; CHECK: [[LBL_end1]]: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret ret i32 0 }