diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 066724bea92c9..f1baaf82195f9 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -915,6 +915,10 @@ def FeatureUseWzrToVecMove : SubtargetFeature<"use-wzr-to-vec-move", "UseWzrToVecMove", "true", "Move from WZR to insert 0 into vector registers">; +def FeatureDisableUnpredicatedLdStLower : SubtargetFeature< + "disable-unpredicated-ld-st-lower", "DisableUnpredicatedLdStLower", + "true", "Disable lowering unpredicated loads/stores as LDR/STR">; + //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index da93a2b13fc11..5490ee7201f3b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -443,6 +443,8 @@ def AllowMisalignedMemAccesses def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">; +def AllowUnpredicatedLdStLower + : Predicate<"!Subtarget->disableUnpredicatedLdStLower()">; //===----------------------------------------------------------------------===// // AArch64-specific DAG Nodes. diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 120415f91c9ae..72882ac078c55 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -306,7 +306,8 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", FeatureAggressiveFMA, FeatureArithmeticBccFusion, FeatureStorePairSuppress, - FeaturePredictableSelectIsExpensive]>; + FeaturePredictableSelectIsExpensive, + FeatureDisableUnpredicatedLdStLower]>; def TuneMONAKA : SubtargetFeature<"fujitsu-monaka", "ARMProcFamily", "MONAKA", "Fujitsu FUJITSU-MONAKA processors", [ diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e99b3f8ff07e0..4d549c6c55d17 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3164,7 +3164,7 @@ let Predicates = [HasSVE_or_SME] in { } // Allow using LDR/STR to avoid the predicate dependence. - let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses] in + let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses, AllowUnpredicatedLdStLower] in foreach Ty = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in { let AddedComplexity = 2 in { def : Pat<(Ty (load (am_sve_indexed_s9 GPR64sp:$base, simm9:$offset))), diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll index 523fdea6b2231..3e2aed22764c5 100644 --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+disable-unpredicated-ld-st-lower < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,NO-UPLS-LOWER %s +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=a64fx < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,A64FX %s ; LD1B @@ -8,6 +10,12 @@ define @ld1b_lower_bound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #-8, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1b_lower_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #-8, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -8 %load = load , ptr %base ret %load @@ -18,6 +26,12 @@ define @ld1b_inbound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #2, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1b_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #2, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 2 %load = load , ptr %base ret %load @@ -28,6 +42,12 @@ define @ld1b_upper_bound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #7, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1b_upper_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #7, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 7 %load = load , ptr %base ret %load @@ -38,6 +58,13 @@ define @ld1b_out_of_upper_bound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #8, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1b_out_of_upper_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #8 +; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, x8] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 8 %load = load , ptr %base ret %load @@ -48,6 +75,13 @@ define @ld1b_out_of_lower_bound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #-9, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1b_out_of_lower_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #-9 +; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, x8] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -9 %load = load , ptr %base ret %load @@ -60,6 +94,12 @@ define @ld1h_inbound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #-2, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1h_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.h +; COMMON-NO-UPLS-LOWER-NEXT: ld1h { z0.h }, p0/z, [x0, #-2, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -2 %load = load , ptr %base ret %load @@ -72,6 +112,12 @@ define @ld1s_inbound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #4, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1s_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.s +; COMMON-NO-UPLS-LOWER-NEXT: ld1w { z0.s }, p0/z, [x0, #4, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 4 %load = load , ptr %base ret %load @@ -84,6 +130,12 @@ define @ld1d_inbound(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr z0, [x0, #6, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: ld1d_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.d +; COMMON-NO-UPLS-LOWER-NEXT: ld1d { z0.d }, p0/z, [x0, #6, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 6 %load = load , ptr %base ret %load @@ -97,6 +149,22 @@ define void @load_nxv6f16(ptr %a) { ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0] ; CHECK-NEXT: ret +; +; NO-UPLS-LOWER-LABEL: load_nxv6f16: +; NO-UPLS-LOWER: // %bb.0: +; NO-UPLS-LOWER-NEXT: ptrue p0.d +; NO-UPLS-LOWER-NEXT: ptrue p1.s +; NO-UPLS-LOWER-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl] +; NO-UPLS-LOWER-NEXT: ld1h { z0.s }, p1/z, [x0] +; NO-UPLS-LOWER-NEXT: ret +; +; A64FX-LABEL: load_nxv6f16: +; A64FX: // %bb.0: +; A64FX-NEXT: ptrue p0.d +; A64FX-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl] +; A64FX-NEXT: ptrue p0.s +; A64FX-NEXT: ld1h { z0.s }, p0/z, [x0] +; A64FX-NEXT: ret %val = load volatile , ptr %a ret void } @@ -108,6 +176,22 @@ define void @load_nxv6f32(ptr %a) { ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ret +; +; NO-UPLS-LOWER-LABEL: load_nxv6f32: +; NO-UPLS-LOWER: // %bb.0: +; NO-UPLS-LOWER-NEXT: ptrue p0.d +; NO-UPLS-LOWER-NEXT: ptrue p1.s +; NO-UPLS-LOWER-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl] +; NO-UPLS-LOWER-NEXT: ld1w { z0.s }, p1/z, [x0] +; NO-UPLS-LOWER-NEXT: ret +; +; A64FX-LABEL: load_nxv6f32: +; A64FX: // %bb.0: +; A64FX-NEXT: ptrue p0.d +; A64FX-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl] +; A64FX-NEXT: ptrue p0.s +; A64FX-NEXT: ld1w { z0.s }, p0/z, [x0] +; A64FX-NEXT: ret %val = load volatile , ptr %a ret void } @@ -119,6 +203,22 @@ define void @load_nxv12f16(ptr %a) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ret +; +; NO-UPLS-LOWER-LABEL: load_nxv12f16: +; NO-UPLS-LOWER: // %bb.0: +; NO-UPLS-LOWER-NEXT: ptrue p0.s +; NO-UPLS-LOWER-NEXT: ptrue p1.h +; NO-UPLS-LOWER-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl] +; NO-UPLS-LOWER-NEXT: ld1h { z0.h }, p1/z, [x0] +; NO-UPLS-LOWER-NEXT: ret +; +; A64FX-LABEL: load_nxv12f16: +; A64FX: // %bb.0: +; A64FX-NEXT: ptrue p0.s +; A64FX-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl] +; A64FX-NEXT: ptrue p0.h +; A64FX-NEXT: ld1h { z0.h }, p0/z, [x0] +; A64FX-NEXT: ret %val = load volatile , ptr %a ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll index 71b883f0ef7ec..35f39ce5069bd 100644 --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+disable-unpredicated-ld-st-lower < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,NO-UPLS-LOWER %s +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=a64fx < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,A64FX %s ; ST1B @@ -8,6 +10,12 @@ define void @st1b_lower_bound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #-8, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1b_lower_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: st1b { z0.b }, p0, [x0, #-8, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -8 store %data, ptr %base ret void @@ -18,6 +26,12 @@ define void @st1b_inbound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #1, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1b_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: st1b { z0.b }, p0, [x0, #1, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 1 store %data, ptr %base ret void @@ -28,6 +42,12 @@ define void @st1b_upper_bound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #7, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1b_upper_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: st1b { z0.b }, p0, [x0, #7, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 7 store %data, ptr %base ret void @@ -38,6 +58,13 @@ define void @st1b_out_of_upper_bound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #8, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1b_out_of_upper_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #8 +; COMMON-NO-UPLS-LOWER-NEXT: st1b { z0.b }, p0, [x0, x8] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 8 store %data, ptr %base ret void @@ -48,6 +75,13 @@ define void @st1b_out_of_lower_bound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #-9, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1b_out_of_lower_bound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b +; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #-9 +; COMMON-NO-UPLS-LOWER-NEXT: st1b { z0.b }, p0, [x0, x8] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -9 store %data, ptr %base ret void @@ -60,6 +94,12 @@ define void @st1h_inbound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #-6, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1h_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.h +; COMMON-NO-UPLS-LOWER-NEXT: st1h { z0.h }, p0, [x0, #-6, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 -6 store %data, ptr %base ret void @@ -72,6 +112,12 @@ define void @st1w_inbound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #2, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1w_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.s +; COMMON-NO-UPLS-LOWER-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 2 store %data, ptr %base ret void @@ -84,6 +130,12 @@ define void @st1d_inbound( %data, ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str z0, [x0, #5, mul vl] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: st1d_inbound: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.d +; COMMON-NO-UPLS-LOWER-NEXT: st1d { z0.d }, p0, [x0, #5, mul vl] +; COMMON-NO-UPLS-LOWER-NEXT: ret %base = getelementptr , ptr %a, i64 5 store %data, ptr %base ret void @@ -99,6 +151,13 @@ define void @store_nxv2f32(ptr %out) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: store_nxv2f32: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: fmov z0.s, #1.00000000 +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.d +; COMMON-NO-UPLS-LOWER-NEXT: st1w { z0.d }, p0, [x0] +; COMMON-NO-UPLS-LOWER-NEXT: ret store splat(float 1.0), ptr %out ret void } @@ -110,6 +169,13 @@ define void @store_nxv4f16(ptr %out) { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; COMMON-NO-UPLS-LOWER-LABEL: store_nxv4f16: +; COMMON-NO-UPLS-LOWER: // %bb.0: +; COMMON-NO-UPLS-LOWER-NEXT: fmov z0.h, #1.00000000 +; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.s +; COMMON-NO-UPLS-LOWER-NEXT: st1h { z0.s }, p0, [x0] +; COMMON-NO-UPLS-LOWER-NEXT: ret store splat(half 1.0), ptr %out ret void } @@ -124,6 +190,24 @@ define void @store_nxv6f32(ptr %out) { ; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret +; +; NO-UPLS-LOWER-LABEL: store_nxv6f32: +; NO-UPLS-LOWER: // %bb.0: +; NO-UPLS-LOWER-NEXT: fmov z0.s, #1.00000000 +; NO-UPLS-LOWER-NEXT: ptrue p0.d +; NO-UPLS-LOWER-NEXT: ptrue p1.s +; NO-UPLS-LOWER-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] +; NO-UPLS-LOWER-NEXT: st1w { z0.s }, p1, [x0] +; NO-UPLS-LOWER-NEXT: ret +; +; A64FX-LABEL: store_nxv6f32: +; A64FX: // %bb.0: +; A64FX-NEXT: fmov z0.s, #1.00000000 +; A64FX-NEXT: ptrue p0.d +; A64FX-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] +; A64FX-NEXT: ptrue p0.s +; A64FX-NEXT: st1w { z0.s }, p0, [x0] +; A64FX-NEXT: ret store splat(float 1.0), ptr %out ret void } @@ -136,6 +220,24 @@ define void @store_nxv12f16(ptr %out) { ; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret +; +; NO-UPLS-LOWER-LABEL: store_nxv12f16: +; NO-UPLS-LOWER: // %bb.0: +; NO-UPLS-LOWER-NEXT: fmov z0.h, #1.00000000 +; NO-UPLS-LOWER-NEXT: ptrue p0.s +; NO-UPLS-LOWER-NEXT: ptrue p1.h +; NO-UPLS-LOWER-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; NO-UPLS-LOWER-NEXT: st1h { z0.h }, p1, [x0] +; NO-UPLS-LOWER-NEXT: ret +; +; A64FX-LABEL: store_nxv12f16: +; A64FX: // %bb.0: +; A64FX-NEXT: fmov z0.h, #1.00000000 +; A64FX-NEXT: ptrue p0.s +; A64FX-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; A64FX-NEXT: ptrue p0.h +; A64FX-NEXT: st1h { z0.h }, p0, [x0] +; A64FX-NEXT: ret store splat(half 1.0), ptr %out ret void }