diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll new file mode 100644 index 0000000000000..bfc25873aa064 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI0_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2 + ret <2 x half> %sel +} + +define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI1_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2 + ret <4 x half> %sel +} + +define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI2_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2 + ret <8 x half> %sel +} + +define void @select_v16f16(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: adrp x9, .LCPI3_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <16 x half>, ptr %a + %op2 = load volatile <16 x half>, ptr %b + %sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2 + store <16 x half> %sel, ptr %a + ret void +} + +define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2 + ret <2 x float> %sel +} + +define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI5_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_0] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: stp w8, w8, [sp] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2 + ret <4 x float> %sel +} + +define void @select_v8f32(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: adrp x9, .LCPI6_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: stp w8, w8, [sp] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI6_0] +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <8 x float>, ptr %a + %op2 = load volatile <8 x float>, ptr %b + %sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2 + store <8 x float> %sel, ptr %a + ret void +} + +define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2 + ret <1 x double> %sel +} + +define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: adrp x9, .LCPI8_0 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: stp x8, x8, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI8_0] +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2 + ret <2 x double> %sel +} + +define void @select_v4f64(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: adrp x9, .LCPI9_0 +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: stp x8, x8, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_0] +; CHECK-NEXT: ldr q5, [sp] +; CHECK-NEXT: eor z4.d, z5.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z5.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: and z3.d, z3.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <4 x double>, ptr %a + %op2 = load volatile <4 x double>, ptr %b + %sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2 + store <4 x double> %sel, ptr %a + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll new file mode 100644 index 0000000000000..c29f02a14fde1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -0,0 +1,257 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: mov z3.s, z2.s[1] +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: ldr d3, [sp, #8] +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: lsl z3.h, p0/m, z3.h, z2.h +; CHECK-NEXT: asrr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2 + ret <2 x half> %sel +} + +define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) #0 { +; CHECK-LABEL: select_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: adrp x9, .LCPI1_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2 + ret <4 x half> %sel +} + +define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) #0 { +; CHECK-LABEL: select_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: adrp x9, .LCPI2_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.h, z2.b +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2 + ret <8 x half> %sel +} + +define void @select_v16f16(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: fcmeq v5.8h, v3.8h, v0.8h +; CHECK-NEXT: fcmeq v4.8h, v2.8h, v1.8h +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: eor z4.d, z4.d, z6.d +; CHECK-NEXT: eor z6.d, z5.d, z6.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z6.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x half>, ptr %a + %op2 = load <16 x half>, ptr %b + %mask = fcmp oeq <16 x half> %op1, %op2 + %sel = select <16 x i1> %mask, <16 x half> %op1, <16 x half> %op2 + store <16 x half> %sel, ptr %a + ret void +} + +define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2 + ret <2 x float> %sel +} + +define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) #0 { +; CHECK-LABEL: select_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2 + ret <4 x float> %sel +} + +define void @select_v8f32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: fcmeq v5.4s, v3.4s, v0.4s +; CHECK-NEXT: fcmeq v4.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: eor z4.d, z4.d, z6.d +; CHECK-NEXT: eor z6.d, z5.d, z6.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z6.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <8 x float>, ptr %a + %op2 = load <8 x float>, ptr %b + %mask = fcmp oeq <8 x float> %op1, %op2 + %sel = select <8 x i1> %mask, <8 x float> %op1, <8 x float> %op2 + store <8 x float> %sel, ptr %a + ret void +} + +define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> %mask) #0 { +; CHECK-LABEL: select_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2 + ret <1 x double> %sel +} + +define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: adrp x9, .LCPI8_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI8_1] +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2 + ret <2 x double> %sel +} + +define void @select_v4f64(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: fcmeq v5.2d, v3.2d, v0.2d +; CHECK-NEXT: fcmeq v4.2d, v2.2d, v1.2d +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: eor z4.d, z4.d, z6.d +; CHECK-NEXT: eor z6.d, z5.d, z6.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z6.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <4 x double>, ptr %a + %op2 = load <4 x double>, ptr %b + %mask = fcmp oeq <4 x double> %op1, %op2 + %sel = select <4 x i1> %mask, <4 x double> %op1, <4 x double> %op2 + store <4 x double> %sel, ptr %a + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll new file mode 100644 index 0000000000000..989fb79060cea --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll @@ -0,0 +1,427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI0_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2 + ret <4 x i8> %sel +} + +define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI1_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0] +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: strb w8, [sp, #10] +; CHECK-NEXT: strb w8, [sp, #9] +; CHECK-NEXT: strb w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2 + ret <8 x i8> %sel +} + +define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI2_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0] +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: strb w8, [sp, #10] +; CHECK-NEXT: strb w8, [sp, #9] +; CHECK-NEXT: strb w8, [sp, #8] +; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: strb w8, [sp, #6] +; CHECK-NEXT: strb w8, [sp, #5] +; CHECK-NEXT: strb w8, [sp, #4] +; CHECK-NEXT: strb w8, [sp, #3] +; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: strb w8, [sp, #1] +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2 + ret <16 x i8> %sel +} + +define void @select_v32i8(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: adrp x9, .LCPI3_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0] +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: strb w8, [sp, #10] +; CHECK-NEXT: strb w8, [sp, #9] +; CHECK-NEXT: strb w8, [sp, #8] +; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: strb w8, [sp, #6] +; CHECK-NEXT: strb w8, [sp, #5] +; CHECK-NEXT: strb w8, [sp, #4] +; CHECK-NEXT: strb w8, [sp, #3] +; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: strb w8, [sp, #1] +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <32 x i8>, ptr %a + %op2 = load volatile <32 x i8>, ptr %b + %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2 + store <32 x i8> %sel, ptr %a + ret void +} + +define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2 + ret <2 x i16> %sel +} + +define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI5_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI5_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2 + ret <4 x i16> %sel +} + +define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI6_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI6_0] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2 + ret <8 x i16> %sel +} + +define void @select_v16i16(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: adrp x9, .LCPI7_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI7_0] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <16 x i16>, ptr %a + %op2 = load volatile <16 x i16>, ptr %b + %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2 + store <16 x i16> %sel, ptr %a + ret void +} + +define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI8_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI8_0] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2 + ret <2 x i32> %sel +} + +define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: adrp x9, .LCPI9_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI9_0] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: stp w8, w8, [sp] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2 + ret <4 x i32> %sel +} + +define void @select_v8i32(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: adrp x9, .LCPI10_0 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: stp w8, w8, [sp, #8] +; CHECK-NEXT: stp w8, w8, [sp] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_0] +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <8 x i32>, ptr %a + %op2 = load volatile <8 x i32>, ptr %b + %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2 + store <8 x i32> %sel, ptr %a + ret void +} + +define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2 + ret <1 x i64> %sel +} + +define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) #0 { +; CHECK-LABEL: select_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: adrp x9, .LCPI12_0 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: stp x8, x8, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI12_0] +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2 + ret <2 x i64> %sel +} + +define void @select_v4i64(ptr %a, ptr %b, i1 %mask) #0 { +; CHECK-LABEL: select_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: adrp x9, .LCPI13_0 +; CHECK-NEXT: ldr q3, [x1, #16] +; CHECK-NEXT: stp x8, x8, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI13_0] +; CHECK-NEXT: ldr q5, [sp] +; CHECK-NEXT: eor z4.d, z5.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z5.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: and z3.d, z3.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %op1 = load volatile <4 x i64>, ptr %a + %op2 = load volatile <4 x i64>, ptr %b + %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2 + store <4 x i64> %sel, ptr %a + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll new file mode 100644 index 0000000000000..33fa29f3b7f63 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -0,0 +1,344 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 { +; CHECK-LABEL: select_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: adrp x9, .LCPI0_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <4 x i1> %mask, <4 x i8> %op1, <4 x i8> %op2 + ret <4 x i8> %sel +} + +define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 { +; CHECK-LABEL: select_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: adrp x9, .LCPI1_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 + ret <8 x i8> %sel +} + +define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 { +; CHECK-LABEL: select_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: adrp x9, .LCPI2_1 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] +; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 + ret <16 x i8> %sel +} + +define void @select_v32i8(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: cmeq v6.16b, v3.16b, v0.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: and z3.d, z3.d, z6.d +; CHECK-NEXT: cmeq v5.16b, v2.16b, v1.16b +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: eor z5.d, z5.d, z4.d +; CHECK-NEXT: eor z4.d, z6.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <32 x i8>, ptr %a + %op2 = load <32 x i8>, ptr %b + %mask = icmp eq <32 x i8> %op1, %op2 + %sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2 + store <32 x i8> %sel, ptr %a + ret void +} + +define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x i16> %op1, <2 x i16> %op2 + ret <2 x i16> %sel +} + +define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #0 { +; CHECK-LABEL: select_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 + ret <4 x i16> %sel +} + +define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 { +; CHECK-LABEL: select_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: adrp x9, .LCPI6_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.h, z2.b +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI6_1] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 + ret <8 x i16> %sel +} + +define void @select_v16i16(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: cmeq v6.8h, v3.8h, v0.8h +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: and z3.d, z3.d, z6.d +; CHECK-NEXT: cmeq v5.8h, v2.8h, v1.8h +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: eor z5.d, z5.d, z4.d +; CHECK-NEXT: eor z4.d, z6.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i16>, ptr %a + %op2 = load <16 x i16>, ptr %b + %mask = icmp eq <16 x i16> %op1, %op2 + %sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2 + store <16 x i16> %sel, ptr %a + ret void +} + +define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: adrp x9, .LCPI8_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI8_1] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 + ret <2 x i32> %sel +} + +define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 { +; CHECK-LABEL: select_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: adrp x9, .LCPI9_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_1] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 + ret <4 x i32> %sel +} + +define void @select_v8i32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: cmeq v6.4s, v3.4s, v0.4s +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: and z3.d, z3.d, z6.d +; CHECK-NEXT: cmeq v5.4s, v2.4s, v1.4s +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: eor z5.d, z5.d, z4.d +; CHECK-NEXT: eor z4.d, z6.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <8 x i32>, ptr %a + %op2 = load <8 x i32>, ptr %b + %mask = icmp eq <8 x i32> %op1, %op2 + %sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2 + store <8 x i32> %sel, ptr %a + ret void +} + +define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) #0 { +; CHECK-LABEL: select_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2 + ret <1 x i64> %sel +} + +define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 { +; CHECK-LABEL: select_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: adrp x9, .LCPI12_1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI12_1] +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 + ret <2 x i64> %sel +} + +define void @select_v4i64(ptr %a, ptr %b) #0 { +; CHECK-LABEL: select_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: cmeq v6.2d, v3.2d, v0.2d +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: and z3.d, z3.d, z6.d +; CHECK-NEXT: cmeq v5.2d, v2.2d, v1.2d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: eor z5.d, z5.d, z4.d +; CHECK-NEXT: eor z4.d, z6.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <4 x i64>, ptr %a + %op2 = load <4 x i64>, ptr %b + %mask = icmp eq <4 x i64> %op1, %op2 + %sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2 + store <4 x i64> %sel, ptr %a + ret void +} + +attributes #0 = { "target-features"="+sve" uwtable }