diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll index 73708e3fd8c44..c3a03b2cb3542 100644 --- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll +++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -mtriple=aarch64_be %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE @x = common dso_local global i128 0 @y = common dso_local global i128 0 @@ -116,3 +117,53 @@ define void @test7() { store volatile i128 %tmp, ptr getelementptr (i8, ptr @y, i64 503) ret void } + +define i128 @load_nonvol(i32, i32, ptr %p) { +; CHECK-LABEL: load_nonvol: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x0, x1, [x2] +; CHECK-NEXT: ret +entry: + %l = load i128, ptr %p, align 16 + ret i128 %l +} + +define i128 @load_vol(i32, i32, ptr %p) { +; CHECK-LE-LABEL: load_vol: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldp x0, x1, [x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: load_vol: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldp x1, x0, [x2] +; CHECK-BE-NEXT: ret +entry: + %l = load volatile i128, ptr %p, align 16 + ret i128 %l +} + +define void @store_nonvol(i128 %a, ptr %p) { +; CHECK-LABEL: store_nonvol: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x0, x1, [x2] +; CHECK-NEXT: ret +entry: + store i128 %a, ptr %p, align 16 + ret void +} + +define void @loadstore_vol(i128 %a, ptr %p) { +; CHECK-LE-LABEL: loadstore_vol: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stp x0, x1, [x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: loadstore_vol: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: stp x1, x0, [x2] +; CHECK-BE-NEXT: ret +entry: + store volatile i128 %a, ptr %p, align 16 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll index 4fcb5c0342e52..fe19ca7e2cc43 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -1,287 +1,424 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple aarch64 | FileCheck %s --check-prefixes=CHECK-LE +; RUN: llc < %s -mtriple aarch64_be | FileCheck %s --check-prefixes=CHECK-BE define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 { -; CHECK-LABEL: test_stnp_v4i64: -; CHECK: ; %bb.0: -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret store <4 x i64> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4i32(ptr %p, <4 x i32> %v) #0 { -; CHECK-LABEL: test_stnp_v4i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0] +; CHECK-BE-NEXT: ret store <4 x i32> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v8i16(ptr %p, <8 x i16> %v) #0 { -; CHECK-LABEL: test_stnp_v8i16: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v8i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v8i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0] +; CHECK-BE-NEXT: ret store <8 x i16> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 { -; CHECK-LABEL: test_stnp_v16i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v16i8: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16i8: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0] +; CHECK-BE-NEXT: ret store <16 x i8> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 { -; CHECK-LABEL: test_stnp_v2i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <2 x i32> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 { -; CHECK-LABEL: test_stnp_v4i16: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <4 x i16> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 { -; CHECK-LABEL: test_stnp_v8i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v8i8: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v8i8: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <8 x i8> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f64(ptr %p, <2 x double> %v) #0 { -; CHECK-LABEL: test_stnp_v2f64: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0] +; CHECK-BE-NEXT: ret store <2 x double> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0] +; CHECK-BE-NEXT: ret store <4 x float> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <2 x float> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 { -; CHECK-LABEL: test_stnp_v1f64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v1f64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v1f64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <1 x double> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 { -; CHECK-LABEL: test_stnp_v1i64: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v1i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v1i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0] +; CHECK-BE-NEXT: ret store <1 x i64> %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_i64(ptr %p, i64 %v) #0 { -; CHECK-LABEL: test_stnp_i64: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsr x8, x1, #32 -; CHECK-NEXT: stnp w1, w8, [x0] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: lsr x8, x1, #32 +; CHECK-LE-NEXT: stnp w1, w8, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str x1, [x0] +; CHECK-BE-NEXT: ret store i64 %v, ptr %p, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f64_offset(ptr %p, <2 x double> %v) #0 { -; CHECK-LABEL: test_stnp_v2f64_offset: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0, #16] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f64_offset: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0, #16] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f64_offset: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0, #16] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr <2 x double>, ptr %p, i32 1 store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 { -; CHECK-LABEL: test_stnp_v2f64_offset_neg: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0, #-16] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f64_offset_neg: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0, #-16] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f64_offset_neg: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur q0, [x0, #-16] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr <2 x double>, ptr %p, i32 -1 store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_offset: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0, #8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_offset: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0, #8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_offset: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0, #8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr <2 x float>, ptr %p, i32 1 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_offset_neg: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0, #-8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0, #-8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur d0, [x0, #-8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr <2 x float>, ptr %p, i32 -1 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_i64_offset(ptr %p, i64 %v) #0 { -; CHECK-LABEL: test_stnp_i64_offset: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsr x8, x1, #32 -; CHECK-NEXT: stnp w1, w8, [x0, #8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_i64_offset: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: lsr x8, x1, #32 +; CHECK-LE-NEXT: stnp w1, w8, [x0, #8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_i64_offset: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str x1, [x0, #8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i64, ptr %p, i32 1 store i64 %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_i64_offset_neg(ptr %p, i64 %v) #0 { -; CHECK-LABEL: test_stnp_i64_offset_neg: -; CHECK: ; %bb.0: -; CHECK-NEXT: lsr x8, x1, #32 -; CHECK-NEXT: stnp w1, w8, [x0, #-8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_i64_offset_neg: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: lsr x8, x1, #32 +; CHECK-LE-NEXT: stnp w1, w8, [x0, #-8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_i64_offset_neg: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur x1, [x0, #-8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i64, ptr %p, i32 -1 store i64 %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_invalid_offset_4(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: add x8, x0, #4 -; CHECK-NEXT: stnp d0, d1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_4: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: add x8, x0, #4 +; CHECK-LE-NEXT: stnp d0, d1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_4: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur q0, [x0, #4] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 4 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_invalid_offset_neg_4(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: sub x8, x0, #4 -; CHECK-NEXT: stnp d0, d1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_4: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: sub x8, x0, #4 +; CHECK-LE-NEXT: stnp d0, d1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_4: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur q0, [x0, #-4] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 -4 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_invalid_offset_512(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: add x8, x0, #512 -; CHECK-NEXT: stnp d0, d1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_512: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: add x8, x0, #512 +; CHECK-LE-NEXT: stnp d0, d1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_512: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str q0, [x0, #512] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 512 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_offset_504(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_offset_504: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0, #504] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_offset_504: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0, #504] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_offset_504: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: add x8, x0, #504 +; CHECK-BE-NEXT: str q0, [x8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 504 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_invalid_offset_508(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: add x8, x0, #508 -; CHECK-NEXT: stnp d0, d1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_508: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: add x8, x0, #508 +; CHECK-LE-NEXT: stnp d0, d1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_508: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: add x8, x0, #508 +; CHECK-BE-NEXT: str q0, [x8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 508 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_invalid_offset_neg_520(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: sub x8, x0, #520 -; CHECK-NEXT: stnp d0, d1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_520: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: sub x8, x0, #520 +; CHECK-LE-NEXT: stnp d0, d1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_520: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub x8, x0, #520 +; CHECK-BE-NEXT: str q0, [x8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 -520 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_offset_neg_512: -; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: stnp d0, d1, [x0, #-512] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_offset_neg_512: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: stnp d0, d1, [x0, #-512] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_offset_neg_512: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub x8, x0, #512 +; CHECK-BE-NEXT: str q0, [x8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 -512 store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void @@ -289,50 +426,71 @@ define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 { define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: add x8, x0, #256 -; CHECK-NEXT: stnp s0, s1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: add x8, x0, #256 +; CHECK-LE-NEXT: stnp s0, s1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_256: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str d0, [x0, #256] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 256 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_offset_252: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0, #252] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_offset_252: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0, #252] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_offset_252: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur d0, [x0, #252] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 252 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: sub x8, x0, #260 -; CHECK-NEXT: stnp s0, s1, [x8] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: sub x8, x0, #260 +; CHECK-LE-NEXT: stnp s0, s1, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_neg_260: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub x8, x0, #260 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 -260 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void } define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v2f32_offset_neg_256: -; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: stnp s0, s1, [x0, #-256] -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-LE-NEXT: mov s1, v0.s[1] +; CHECK-LE-NEXT: stnp s0, s1, [x0, #-256] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg_256: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stur d0, [x0, #-256] +; CHECK-BE-NEXT: ret %tmp0 = getelementptr i8, ptr %p, i32 -256 store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0 ret void @@ -341,17 +499,28 @@ define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 { declare void @dummy(ptr) define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_offset_alloca: -; CHECK: ; %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-NEXT: stnp d0, d1, [sp] -; CHECK-NEXT: bl _dummy -; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: sub sp, sp, #32 +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: mov x0, sp +; CHECK-LE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-LE-NEXT: stnp d0, d1, [sp] +; CHECK-LE-NEXT: bl dummy +; CHECK-LE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-LE-NEXT: add sp, sp, #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub sp, sp, #32 +; CHECK-BE-NEXT: mov x0, sp +; CHECK-BE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-BE-NEXT: str q0, [sp] +; CHECK-BE-NEXT: bl dummy +; CHECK-BE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-BE-NEXT: add sp, sp, #32 +; CHECK-BE-NEXT: ret %tmp0 = alloca <4 x float> store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0 call void @dummy(ptr %tmp0) @@ -359,17 +528,28 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { } define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { -; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: -; CHECK: ; %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; CHECK-NEXT: stnp d0, d1, [sp, #16] -; CHECK-NEXT: bl _dummy -; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca_2: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: sub sp, sp, #48 +; CHECK-LE-NEXT: mov d1, v0.d[1] +; CHECK-LE-NEXT: mov x0, sp +; CHECK-LE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-LE-NEXT: stnp d0, d1, [sp, #16] +; CHECK-LE-NEXT: bl dummy +; CHECK-LE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-LE-NEXT: add sp, sp, #48 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca_2: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub sp, sp, #48 +; CHECK-BE-NEXT: mov x0, sp +; CHECK-BE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-BE-NEXT: str q0, [sp, #16] +; CHECK-BE-NEXT: bl dummy +; CHECK-BE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-BE-NEXT: add sp, sp, #48 +; CHECK-BE-NEXT: ret %tmp0 = alloca <4 x float>, i32 2 %tmp1 = getelementptr <4 x float>, ptr %tmp0, i32 1 store <4 x float> %v, ptr %tmp1, align 1, !nontemporal !0 @@ -378,122 +558,239 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { } define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v32i8: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v32i8: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v32i8: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v1.16b, v1.16b +; CHECK-BE-NEXT: rev64 v0.16b, v0.16b +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v32i16: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v32i16: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v32i16: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v3.8h, v3.8h +; CHECK-BE-NEXT: rev64 v2.8h, v2.8h +; CHECK-BE-NEXT: rev64 v1.8h, v1.8h +; CHECK-BE-NEXT: rev64 v0.8h, v0.8h +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v32f16: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v32f16: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v32f16: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v3.8h, v3.8h +; CHECK-BE-NEXT: rev64 v2.8h, v2.8h +; CHECK-BE-NEXT: rev64 v1.8h, v1.8h +; CHECK-BE-NEXT: rev64 v0.8h, v0.8h +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v16i32: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v16i32: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16i32: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v3.4s, v3.4s +; CHECK-BE-NEXT: rev64 v2.4s, v2.4s +; CHECK-BE-NEXT: rev64 v1.4s, v1.4s +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v16f32(<16 x float> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v16f32: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v16f32: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16f32: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v3.4s, v3.4s +; CHECK-BE-NEXT: rev64 v2.4s, v2.4s +; CHECK-BE-NEXT: rev64 v1.4s, v1.4s +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <16 x float> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v17f32: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 -; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr s16, [sp, #16] -; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 -; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 -; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 -; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 -; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 -; CHECK-NEXT: mov.s v4[1], v5[0] -; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: ldr s5, [sp] -; CHECK-NEXT: ld1.s { v16 }[1], [x8] -; CHECK-NEXT: add x8, sp, #4 -; CHECK-NEXT: ld1.s { v5 }[1], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: mov.s v4[2], v6[0] -; CHECK-NEXT: ld1.s { v16 }[2], [x8] -; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: ld1.s { v5 }[2], [x8] -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: add x8, sp, #12 -; CHECK-NEXT: mov.s v4[3], v7[0] -; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: ld1.s { v5 }[3], [x8] -; CHECK-NEXT: mov d1, v16[1] -; CHECK-NEXT: mov d2, v5[1] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d6, v0[1] -; CHECK-NEXT: stnp d16, d1, [x0, #48] -; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d5, d2, [x0, #32] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d6, [x0] -; CHECK-NEXT: str s1, [x0, #64] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v17f32: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: // kill: def $s4 killed $s4 def $q4 +; CHECK-LE-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-LE-NEXT: ldr s16, [sp, #32] +; CHECK-LE-NEXT: // kill: def $s5 killed $s5 def $q5 +; CHECK-LE-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-LE-NEXT: add x8, sp, #40 +; CHECK-LE-NEXT: // kill: def $s6 killed $s6 def $q6 +; CHECK-LE-NEXT: // kill: def $s2 killed $s2 def $q2 +; CHECK-LE-NEXT: // kill: def $s7 killed $s7 def $q7 +; CHECK-LE-NEXT: // kill: def $s3 killed $s3 def $q3 +; CHECK-LE-NEXT: mov v4.s[1], v5.s[0] +; CHECK-LE-NEXT: mov v0.s[1], v1.s[0] +; CHECK-LE-NEXT: ldr s5, [sp] +; CHECK-LE-NEXT: ld1 { v16.s }[1], [x8] +; CHECK-LE-NEXT: add x8, sp, #8 +; CHECK-LE-NEXT: ld1 { v5.s }[1], [x8] +; CHECK-LE-NEXT: add x8, sp, #48 +; CHECK-LE-NEXT: mov v4.s[2], v6.s[0] +; CHECK-LE-NEXT: ld1 { v16.s }[2], [x8] +; CHECK-LE-NEXT: mov v0.s[2], v2.s[0] +; CHECK-LE-NEXT: add x8, sp, #16 +; CHECK-LE-NEXT: ld1 { v5.s }[2], [x8] +; CHECK-LE-NEXT: add x8, sp, #56 +; CHECK-LE-NEXT: ld1 { v16.s }[3], [x8] +; CHECK-LE-NEXT: add x8, sp, #24 +; CHECK-LE-NEXT: mov v4.s[3], v7.s[0] +; CHECK-LE-NEXT: mov v0.s[3], v3.s[0] +; CHECK-LE-NEXT: ld1 { v5.s }[3], [x8] +; CHECK-LE-NEXT: mov d1, v16.d[1] +; CHECK-LE-NEXT: mov d2, v5.d[1] +; CHECK-LE-NEXT: mov d3, v4.d[1] +; CHECK-LE-NEXT: mov d6, v0.d[1] +; CHECK-LE-NEXT: stnp d16, d1, [x0, #48] +; CHECK-LE-NEXT: ldr s1, [sp, #64] +; CHECK-LE-NEXT: stnp d5, d2, [x0, #32] +; CHECK-LE-NEXT: stnp d4, d3, [x0, #16] +; CHECK-LE-NEXT: stnp d0, d6, [x0] +; CHECK-LE-NEXT: str s1, [x0, #64] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v17f32: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: // kill: def $s4 killed $s4 def $q4 +; CHECK-BE-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-BE-NEXT: ldr s16, [sp, #36] +; CHECK-BE-NEXT: // kill: def $s5 killed $s5 def $q5 +; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-BE-NEXT: ldr s17, [sp, #4] +; CHECK-BE-NEXT: add x8, sp, #44 +; CHECK-BE-NEXT: mov v4.s[1], v5.s[0] +; CHECK-BE-NEXT: mov v0.s[1], v1.s[0] +; CHECK-BE-NEXT: // kill: def $s6 killed $s6 def $q6 +; CHECK-BE-NEXT: // kill: def $s2 killed $s2 def $q2 +; CHECK-BE-NEXT: // kill: def $s7 killed $s7 def $q7 +; CHECK-BE-NEXT: // kill: def $s3 killed $s3 def $q3 +; CHECK-BE-NEXT: ldr s1, [sp, #68] +; CHECK-BE-NEXT: ld1 { v16.s }[1], [x8] +; CHECK-BE-NEXT: add x8, sp, #12 +; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8] +; CHECK-BE-NEXT: add x8, sp, #52 +; CHECK-BE-NEXT: str s1, [x0, #64] +; CHECK-BE-NEXT: ld1 { v16.s }[2], [x8] +; CHECK-BE-NEXT: add x8, sp, #20 +; CHECK-BE-NEXT: mov v4.s[2], v6.s[0] +; CHECK-BE-NEXT: mov v0.s[2], v2.s[0] +; CHECK-BE-NEXT: ld1 { v17.s }[2], [x8] +; CHECK-BE-NEXT: add x8, sp, #60 +; CHECK-BE-NEXT: ld1 { v16.s }[3], [x8] +; CHECK-BE-NEXT: add x8, sp, #28 +; CHECK-BE-NEXT: ld1 { v17.s }[3], [x8] +; CHECK-BE-NEXT: mov v4.s[3], v7.s[0] +; CHECK-BE-NEXT: add x8, x0, #48 +; CHECK-BE-NEXT: mov v0.s[3], v3.s[0] +; CHECK-BE-NEXT: st1 { v16.4s }, [x8] +; CHECK-BE-NEXT: add x8, x0, #32 +; CHECK-BE-NEXT: st1 { v17.4s }, [x8] +; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: st1 { v4.4s }, [x8] +; CHECK-BE-NEXT: st1 { v0.4s }, [x0] +; CHECK-BE-NEXT: ret entry: store <17 x float> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v16i32_invalid_offset: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #32032 ; =0x7d20 -; CHECK-NEXT: mov w9, #32000 ; =0x7d00 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: stnp q2, q3, [x8] -; CHECK-NEXT: stnp q0, q1, [x9] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v16i32_invalid_offset: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov w8, #32032 // =0x7d20 +; CHECK-LE-NEXT: mov w9, #32000 // =0x7d00 +; CHECK-LE-NEXT: add x8, x0, x8 +; CHECK-LE-NEXT: add x9, x0, x9 +; CHECK-LE-NEXT: stnp q2, q3, [x8] +; CHECK-LE-NEXT: stnp q0, q1, [x9] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16i32_invalid_offset: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: rev64 v3.4s, v3.4s +; CHECK-BE-NEXT: rev64 v2.4s, v2.4s +; CHECK-BE-NEXT: mov w8, #32032 // =0x7d20 +; CHECK-BE-NEXT: rev64 v1.4s, v1.4s +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: mov w9, #32000 // =0x7d00 +; CHECK-BE-NEXT: add x8, x0, x8 +; CHECK-BE-NEXT: add x9, x0, x9 +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q2, q3, [x8] +; CHECK-BE-NEXT: stnp q0, q1, [x9] +; CHECK-BE-NEXT: ret entry: %gep = getelementptr <16 x i32>, ptr %ptr, i32 500 store <16 x i32> %v, ptr %gep, align 4, !nontemporal !0 @@ -501,28 +798,58 @@ entry: } define void @test_stnp_v16f64(<16 x double> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v16f64: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q6, q7, [x0, #96] -; CHECK-NEXT: stnp q4, q5, [x0, #64] -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v16f64: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q6, q7, [x0, #96] +; CHECK-LE-NEXT: stnp q4, q5, [x0, #64] +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16f64: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8 +; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8 +; CHECK-BE-NEXT: ext v5.16b, v5.16b, v5.16b, #8 +; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q6, q7, [x0, #96] +; CHECK-BE-NEXT: stnp q4, q5, [x0, #64] +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <16 x double> %v, ptr %ptr, align 4, !nontemporal !0 ret void } define void @test_stnp_v16i64(<16 x i64> %v, ptr %ptr) { -; CHECK-LABEL: test_stnp_v16i64: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: stnp q6, q7, [x0, #96] -; CHECK-NEXT: stnp q4, q5, [x0, #64] -; CHECK-NEXT: stnp q2, q3, [x0, #32] -; CHECK-NEXT: stnp q0, q1, [x0] -; CHECK-NEXT: ret - +; CHECK-LE-LABEL: test_stnp_v16i64: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: stnp q6, q7, [x0, #96] +; CHECK-LE-NEXT: stnp q4, q5, [x0, #64] +; CHECK-LE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-LE-NEXT: stnp q0, q1, [x0] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_stnp_v16i64: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8 +; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8 +; CHECK-BE-NEXT: ext v5.16b, v5.16b, v5.16b, #8 +; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 +; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: stnp q6, q7, [x0, #96] +; CHECK-BE-NEXT: stnp q4, q5, [x0, #64] +; CHECK-BE-NEXT: stnp q2, q3, [x0, #32] +; CHECK-BE-NEXT: stnp q0, q1, [x0] +; CHECK-BE-NEXT: ret entry: store <16 x i64> %v, ptr %ptr, align 4, !nontemporal !0 ret void