From 83c4427a2fa4b19c99bb825a16bc4b8f6b238a53 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Wed, 12 Nov 2025 20:53:09 -0800 Subject: [PATCH] [RISCV][llvm] Handle INSERT_VECTOR_ELT, EXTRACT_VECTOR_ELT codegen for zvfbfa --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 19 +- llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 263 +++++++ llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 700 +++++++++++++++---- llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll | 30 +- 4 files changed, 852 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d086a2a4a3057..6fa639e8d9874 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1264,11 +1264,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); - setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, - ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, - ISD::VECTOR_COMPRESS}, + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE, + ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE, + ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS}, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); @@ -1278,9 +1278,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, - ISD::EXTRACT_VECTOR_ELT}, - VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, EltVT, Custom); @@ -10356,7 +10355,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, } if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || - ValVT == MVT::bf16) { + (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) { // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first. MVT IntVT = VecVT.changeTypeToInteger(); SDValue IntInsert = DAG.getNode( @@ -10593,7 +10592,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, } if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || - EltVT == MVT::bf16) { + (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) { // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x MVT IntVT = VecVT.changeTypeToInteger(); SDValue IntVec = DAG.getBitcast(IntVT, Vec); diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index 692a7ce0b20e8..903c0dcaba2d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -5,6 +5,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFBFA define bfloat @extractelt_nxv1bf16_0( %v) { ; NOZFMIN-LABEL: extractelt_nxv1bf16_0: @@ -22,6 +24,12 @@ define bfloat @extractelt_nxv1bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -44,6 +52,13 @@ define bfloat @extractelt_nxv1bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -66,6 +81,13 @@ define bfloat @extractelt_nxv1bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -86,6 +108,12 @@ define bfloat @extractelt_nxv2bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -108,6 +136,13 @@ define bfloat @extractelt_nxv2bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -130,6 +165,13 @@ define bfloat @extractelt_nxv2bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -150,6 +192,12 @@ define bfloat @extractelt_nxv4bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -172,6 +220,13 @@ define bfloat @extractelt_nxv4bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -194,6 +249,13 @@ define bfloat @extractelt_nxv4bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -214,6 +276,12 @@ define bfloat @extractelt_nxv8bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -236,6 +304,13 @@ define bfloat @extractelt_nxv8bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -258,6 +333,14 @@ define bfloat @extractelt_nxv8bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -278,6 +361,12 @@ define bfloat @extractelt_nxv16bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -300,6 +389,13 @@ define bfloat @extractelt_nxv16bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -322,6 +418,14 @@ define bfloat @extractelt_nxv16bf16_idx( %v, i32 zeroext % ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -342,6 +446,12 @@ define bfloat @extractelt_nxv32bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -364,6 +474,13 @@ define bfloat @extractelt_nxv32bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -386,6 +503,14 @@ define bfloat @extractelt_nxv32bf16_idx( %v, i32 zeroext % ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -412,6 +537,13 @@ define half @extractelt_nxv1f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -441,6 +573,14 @@ define half @extractelt_nxv1f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -470,6 +610,14 @@ define half @extractelt_nxv1f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -496,6 +644,13 @@ define half @extractelt_nxv2f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -525,6 +680,14 @@ define half @extractelt_nxv2f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -554,6 +717,14 @@ define half @extractelt_nxv2f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -580,6 +751,13 @@ define half @extractelt_nxv4f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -609,6 +787,14 @@ define half @extractelt_nxv4f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -638,6 +824,14 @@ define half @extractelt_nxv4f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -664,6 +858,13 @@ define half @extractelt_nxv8f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -693,6 +894,14 @@ define half @extractelt_nxv8f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -722,6 +931,14 @@ define half @extractelt_nxv8f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -748,6 +965,13 @@ define half @extractelt_nxv16f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -777,6 +1001,14 @@ define half @extractelt_nxv16f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -806,6 +1038,14 @@ define half @extractelt_nxv16f16_idx( %v, i32 zeroext %idx) ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -832,6 +1072,13 @@ define half @extractelt_nxv32f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -861,6 +1108,14 @@ define half @extractelt_nxv32f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -890,6 +1145,14 @@ define half @extractelt_nxv32f16_idx( %v, i32 zeroext %idx) ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 607e0085c3f46..7c6e0cea706d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -7,225 +7,511 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA define @insertelt_nxv1bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv1bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf4, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv1bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv1bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, mf4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv2bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv2bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv2bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv2bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, mf2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv4bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv4bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv4bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv4bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv8bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv8bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v10, a0 +; ZVFH-NEXT: vslideup.vi v8, v10, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v10, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v10, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv8bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv8bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v10, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v10, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v10, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v10, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv16bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv16bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vslideup.vi v8, v12, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v12, a0 +; ZVFH-NEXT: vslideup.vi v8, v12, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v12, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v12, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv16bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv16bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v12, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v12, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v12, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v12, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv32bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv32bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vslideup.vi v8, v16, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v16, a0 +; ZVFH-NEXT: vslideup.vi v8, v16, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v16, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v16, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv32bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv32bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v16, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v16, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v16, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m8, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v16, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } @@ -243,6 +529,13 @@ define @insertelt_nxv1f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -262,6 +555,14 @@ define @insertelt_nxv1f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -285,6 +586,16 @@ define @insertelt_nxv1f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -302,6 +613,13 @@ define @insertelt_nxv2f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -321,6 +639,14 @@ define @insertelt_nxv2f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -344,6 +670,16 @@ define @insertelt_nxv2f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -361,6 +697,13 @@ define @insertelt_nxv4f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -380,6 +723,14 @@ define @insertelt_nxv4f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -403,6 +754,16 @@ define @insertelt_nxv4f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -420,6 +781,13 @@ define @insertelt_nxv8f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -439,6 +807,14 @@ define @insertelt_nxv8f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v10, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v10, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v10, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -462,6 +838,16 @@ define @insertelt_nxv8f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v10, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v10, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -479,6 +865,13 @@ define @insertelt_nxv16f16_0( %v, half ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -498,6 +891,14 @@ define @insertelt_nxv16f16_imm( %v, hal ; ZVFHMIN-NEXT: vmv.s.x v12, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v12, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v12, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -521,6 +922,16 @@ define @insertelt_nxv16f16_idx( %v, hal ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v12, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v12, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -538,6 +949,13 @@ define @insertelt_nxv32f16_0( %v, half ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -557,6 +975,14 @@ define @insertelt_nxv32f16_imm( %v, hal ; ZVFHMIN-NEXT: vmv.s.x v16, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v16, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v16, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -580,6 +1006,16 @@ define @insertelt_nxv32f16_idx( %v, hal ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v16, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v16, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll index fbc7311945c8b..7a63a4710c534 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll @@ -7,9 +7,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv1bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16( %0) @@ -21,9 +20,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16( %0) @@ -35,9 +33,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16( %0) @@ -49,9 +46,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16( %0) @@ -63,9 +59,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16( %0) @@ -77,9 +72,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16( %0)