diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 033e632559729..3a205de4e368e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -961,6 +961,10 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMVectorElementType<0>], [IntrNoMem]>; + class AdvSIMD_SVE_DUP_Unpred_Intrinsic + : Intrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_DUPQ_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1287,6 +1291,8 @@ def int_aarch64_sve_prf // def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic; +def int_aarch64_sve_dup_x : AdvSIMD_SVE_DUP_Unpred_Intrinsic; + def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e2c56c39c06dc..23df49790b5e1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11296,6 +11296,9 @@ static SDValue performIntrinsicCombine(SDNode *N, return LowerSVEIntrinsicIndex(N, DAG); case Intrinsic::aarch64_sve_dup: return LowerSVEIntrinsicDUP(N, DAG); + case Intrinsic::aarch64_sve_dup_x: + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0), + N->getOperand(1)); case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_sel: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll new file mode 100644 index 0000000000000..8cc1ca86836b1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -0,0 +1,127 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; +; Unpredicated dup instruction (which is an alias for mov): +; * register + register, +; * register + immediate +; + +define @dup_i8(i8 %b) { +; CHECK-LABEL: dup_i8: +; CHECK: mov z0.b, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b) + ret %out +} + +define @dup_imm_i8() { +; CHECK-LABEL: dup_imm_i8: +; CHECK: mov z0.b, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 16) + ret %out +} + +define @dup_i16(i16 %b) { +; CHECK-LABEL: dup_i16: +; CHECK: mov z0.h, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b) + ret %out +} + +define @dup_imm_i16(i16 %b) { +; CHECK-LABEL: dup_imm_i16: +; CHECK: mov z0.h, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) + ret %out +} + +define @dup_i32(i32 %b) { +; CHECK-LABEL: dup_i32: +; CHECK: mov z0.s, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b) + ret %out +} + +define @dup_imm_i32(i32 %b) { +; CHECK-LABEL: dup_imm_i32: +; CHECK: mov z0.s, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 16) + ret %out +} + +define @dup_i64(i64 %b) { +; CHECK-LABEL: dup_i64: +; CHECK: mov z0.d, x0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b) + ret %out +} + +define @dup_imm_i64(i64 %b) { +; CHECK-LABEL: dup_imm_i64: +; CHECK: mov z0.d, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 16) + ret %out +} + +define @dup_f16(half %b) { +; CHECK-LABEL: dup_f16: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8f16(half %b) + ret %out +} + +define @dup_imm_f16(half %b) { +; CHECK-LABEL: dup_imm_f16: +; CHECK: mov z0.h, #16.00000000 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8f16(half 16.) + ret %out +} + +define @dup_f32(float %b) { +; CHECK-LABEL: dup_f32: +; CHECK: mov z0.s, s0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv4f32(float %b) + ret %out +} + +define @dup_imm_f32(float %b) { +; CHECK-LABEL: dup_imm_f32: +; CHECK: mov z0.s, #16.00000000 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv4f32(float 16.) + ret %out +} + +define @dup_f64(double %b) { +; CHECK-LABEL: dup_f64: +; CHECK: mov z0.d, d0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv2f64(double %b) + ret %out +} + +define @dup_imm_f64(double %b) { +; CHECK-LABEL: dup_imm_f64: +; CHECK: mov z0.d, #16.00000000 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv2f64(double 16.) + ret %out +} + +declare @llvm.aarch64.sve.dup.x.nxv16i8( i8) +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) +declare @llvm.aarch64.sve.dup.x.nxv8f16(half) +declare @llvm.aarch64.sve.dup.x.nxv4f32(float) +declare @llvm.aarch64.sve.dup.x.nxv2f64(double)