Skip to content

Commit

Permalink
[AArch64][SVE] Convert svdup(vec, SV_VL1, elm) to insertelement(vec, …
Browse files Browse the repository at this point in the history
…elm, 0)

By converting the SVE intrinsic to a normal LLVM insertelement we give
the code generator a better chance to remove transitions between GPRs
and VPRs

Co-authored-by: Paul Walker <paul.walker@arm.com>

Depends on D101302

Differential Revision: https://reviews.llvm.org/D101167
  • Loading branch information
brads55 committed Apr 29, 2021
1 parent c8f20ed commit 89085bc
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
26 changes: 26 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Expand Up @@ -366,6 +366,30 @@ static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
return IC.replaceInstUsesWith(II, EarliestReplacement);
}

static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
IntrinsicInst &II) {
IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
if (!Pg)
return None;

if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
return None;

const auto PTruePattern =
cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
if (PTruePattern != AArch64SVEPredPattern::vl1)
return None;

// The intrinsic is inserting into lane zero so use an insert instead.
auto *IdxTy = Type::getInt64Ty(II.getContext());
auto *Insert = InsertElementInst::Create(
II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
Insert->insertBefore(&II);
Insert->takeName(&II);

return IC.replaceInstUsesWith(II, Insert);
}

static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
IntrinsicInst &II) {
Value *Pg = II.getArgOperand(0);
Expand Down Expand Up @@ -455,6 +479,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
break;
case Intrinsic::aarch64_sve_convert_from_svbool:
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
return instCombineSVEDup(IC, II);
case Intrinsic::aarch64_sve_lasta:
case Intrinsic::aarch64_sve_lastb:
return instCombineSVELast(IC, II);
Expand Down
52 changes: 52 additions & 0 deletions llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
@@ -0,0 +1,52 @@
; RUN: opt -S -instcombine < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 16 x i8> @dup_insertelement_0(<vscale x 16 x i8> %v, i8 %s) #0 {
; CHECK-LABEL: @dup_insertelement_0(
; CHECK: %insert = insertelement <vscale x 16 x i8> %v, i8 %s, i64 0
; CHECK-NEXT: ret <vscale x 16 x i8> %insert
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 1)
%insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> %pg, i8 %s)
ret <vscale x 16 x i8> %insert
}

define <vscale x 16 x i8> @dup_insertelement_1(<vscale x 16 x i8> %v, i8 %s) #0 {
; CHECK-LABEL: @dup_insertelement_1(
; CHECK: %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 2)
; CHECK-NEXT: %insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> %pg, i8 %s)
; CHECK-NEXT: ret <vscale x 16 x i8> %insert
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 2)
%insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> %pg, i8 %s)
ret <vscale x 16 x i8> %insert
}

define <vscale x 16 x i8> @dup_insertelement_x(<vscale x 16 x i8> %v, i8 %s, <vscale x 16 x i1> %pg) #0 {
; CHECK-LABEL: @dup_insertelement_x(
; CHECK: %insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> %pg, i8 %s)
; CHECK-NEXT: ret <vscale x 16 x i8> %insert
%insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> %pg, i8 %s)
ret <vscale x 16 x i8> %insert
}

define <vscale x 8 x i16> @dup_insertelement_0_convert(<vscale x 8 x i16> %v, i16 %s) #0 {
; CHECK-LABEL: @dup_insertelement_0_convert(
; CHECK: %insert = insertelement <vscale x 8 x i16> %v, i16 %s, i64 0
; CHECK-NEXT: ret <vscale x 8 x i16> %insert
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 1)
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg)
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
%insert = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> %v, <vscale x 8 x i1> %2, i16 %s)
ret <vscale x 8 x i16> %insert
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)

declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)

declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)

attributes #0 = { "target-features"="+sve" }

0 comments on commit 89085bc

Please sign in to comment.