Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 7 additions & 27 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4346,34 +4346,14 @@ bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
->getAPIntValue()
.trunc(VT.getFixedSizeInBits())
.getSExtValue();
int32_t ImmVal, ShiftVal;
if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
ShiftVal))
return false;

switch (VT.SimpleTy) {
case MVT::i8:
// All immediates are supported.
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
return true;
case MVT::i16:
case MVT::i32:
case MVT::i64:
// Support 8bit signed immediates.
if (Val >= -128 && Val <= 127) {
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
return true;
}
// Support 16bit signed immediates that are a multiple of 256.
if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
return true;
}
break;
default:
break;
}

return false;
Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
return true;
}

bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15277,6 +15277,27 @@ static SDValue NormalizeBuildVector(SDValue Op,
return DAG.getBuildVector(VT, DL, Ops);
}

static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST, APInt &DefBits) {
EVT VT = Op.getValueType();
// TODO: We should be able to support 64-bit destinations too
if (!ST->hasSVE() || !VT.is128BitVector() ||
DefBits.getHiBits(64) != DefBits.getLoBits(64))
return SDValue();

// See if we can make use of the SVE dup instruction.
APInt Val64 = DefBits.trunc(64);
int32_t ImmVal, ShiftVal;
if (!AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal, ShiftVal))
return SDValue();

SDLoc DL(Op);
SDValue SplatVal = DAG.getSplatVector(MVT::nxv2i64, DL,
DAG.getConstant(Val64, DL, MVT::i64));
SDValue Res = convertFromScalableVector(DAG, MVT::v2i64, SplatVal);
return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Res);
}

static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
EVT VT = Op.getValueType();
Expand Down Expand Up @@ -15316,6 +15337,10 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
if (SDValue R = TryMOVIWithBits(UndefBits))
return R;

// Try to materialise the constant using SVE when available.
if (SDValue R = trySVESplat64(Op, DAG, ST, DefBits))
return R;

// See if a fneg of the constant can be materialized with a MOVI, etc
auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
// FNegate each sub-element of the constant
Expand Down
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,36 @@ inline static bool isAnyMOVWMovAlias(uint64_t Value, int RegWidth) {
return isAnyMOVZMovAlias(Value, RegWidth);
}

static inline bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm,
int32_t &Shift) {
switch (SizeInBits) {
case 8:
// All immediates are supported.
Shift = 0;
Imm = Val & 0xFF;
return true;
case 16:
case 32:
case 64:
// Support 8bit signed immediates.
if (Val >= -128 && Val <= 127) {
Shift = 0;
Imm = Val & 0xFF;
return true;
}
// Support 16bit signed immediates that are a multiple of 256.
if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
Shift = 8;
Imm = (Val >> 8) & 0xFF;
return true;
}
break;
default:
break;
}
return false;
}

} // end namespace AArch64_AM

} // end namespace llvm
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,9 @@ define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
; CHECK-LABEL: vector_loop_with_icmp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: mov w8, #2 // =0x2
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: mov z1.d, #2 // =0x2
; CHECK-NEXT: add x8, x0, #4
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: mov w10, #1 // =0x1
; CHECK-NEXT: b .LBB5_2
; CHECK-NEXT: .LBB5_1: // %pred.store.continue6
Expand Down
238 changes: 238 additions & 0 deletions llvm/test/CodeGen/AArch64/movi64_sve.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=COMMON,NEON
; RUN: llc -mtriple=aarch64 -mattr=+neon,+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE

define <2 x i64> @movi_1_v2i64() {
; NEON-LABEL: movi_1_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov w8, #1 // =0x1
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_1_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #1 // =0x1
; SVE-NEXT: ret
ret <2 x i64> splat (i64 1)
}

define <2 x i64> @movi_127_v2i64() {
; NEON-LABEL: movi_127_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov w8, #127 // =0x7f
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_127_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #127 // =0x7f
; SVE-NEXT: ret
ret <2 x i64> splat (i64 127)
}

define <2 x i64> @movi_m128_v2i64() {
; NEON-LABEL: movi_m128_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov x8, #-128 // =0xffffffffffffff80
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_m128_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #-128 // =0xffffffffffffff80
; SVE-NEXT: ret
ret <2 x i64> splat (i64 -128)
}

define <2 x i64> @movi_256_v2i64() {
; NEON-LABEL: movi_256_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov w8, #256 // =0x100
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_256_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #256 // =0x100
; SVE-NEXT: ret
ret <2 x i64> splat (i64 256)
}

define <2 x i64> @movi_32512_v2i64() {
; NEON-LABEL: movi_32512_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov w8, #32512 // =0x7f00
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_32512_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
; SVE-NEXT: ret
ret <2 x i64> splat (i64 32512)
}

define <2 x i64> @movi_m32768_v2i64() {
; NEON-LABEL: movi_m32768_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: mov x8, #-32768 // =0xffffffffffff8000
; NEON-NEXT: dup v0.2d, x8
; NEON-NEXT: ret
;
; SVE-LABEL: movi_m32768_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000
; SVE-NEXT: ret
ret <2 x i64> splat (i64 -32768)
}

; Special cases where the destination vector does not have 64-bit elements

define <4 x i32> @movi_v4i32_1() {
; NEON-LABEL: movi_v4i32_1:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI6_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v4i32_1:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #127 // =0x7f
; SVE-NEXT: ret
ret <4 x i32> <i32 127, i32 0, i32 127, i32 0>
}

define <4 x i32> @movi_v4i32_2() {
; NEON-LABEL: movi_v4i32_2:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI7_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v4i32_2:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
; SVE-NEXT: ret
ret <4 x i32> <i32 32512, i32 0, i32 32512, i32 0>
}

define <8 x i16> @movi_v8i16_1() {
; NEON-LABEL: movi_v8i16_1:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI8_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v8i16_1:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #127 // =0x7f
; SVE-NEXT: ret
ret <8 x i16> <i16 127, i16 0, i16 0, i16 0, i16 127, i16 0, i16 0, i16 0>
}

define <8 x i16> @movi_v8i16_2() {
; NEON-LABEL: movi_v8i16_2:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI9_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v8i16_2:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
; SVE-NEXT: ret
ret <8 x i16> <i16 32512, i16 0, i16 0, i16 0, i16 32512, i16 0, i16 0, i16 0>
}

define <16 x i8> @movi_v16i8_1() {
; NEON-LABEL: movi_v16i8_1:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI10_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v16i8_1:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #127 // =0x7f
; SVE-NEXT: ret
ret <16 x i8> <i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}

define <16 x i8> @movi_v16i8_2() {
; NEON-LABEL: movi_v16i8_2:
; NEON: // %bb.0:
; NEON-NEXT: adrp x8, .LCPI11_0
; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
; NEON-NEXT: ret
;
; SVE-LABEL: movi_v16i8_2:
; SVE: // %bb.0:
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
; SVE-NEXT: ret
ret <16 x i8> <i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}

; Negative cases

define <2 x i64> @movi_128_v2i64() {
; COMMON-LABEL: movi_128_v2i64:
; COMMON: // %bb.0:
; COMMON-NEXT: mov w8, #128 // =0x80
; COMMON-NEXT: dup v0.2d, x8
; COMMON-NEXT: ret
ret <2 x i64> splat (i64 128)
}

define <2 x i64> @movi_m127_v2i64() {
; COMMON-LABEL: movi_m127_v2i64:
; COMMON: // %bb.0:
; COMMON-NEXT: mov x8, #-129 // =0xffffffffffffff7f
; COMMON-NEXT: dup v0.2d, x8
; COMMON-NEXT: ret
ret <2 x i64> splat (i64 -129)
}

define <2 x i64> @movi_32513_v2i64() {
; COMMON-LABEL: movi_32513_v2i64:
; COMMON: // %bb.0:
; COMMON-NEXT: mov w8, #32513 // =0x7f01
; COMMON-NEXT: dup v0.2d, x8
; COMMON-NEXT: ret
ret <2 x i64> splat (i64 32513)
}

define <2 x i64> @movi_m32769_v2i64() {
; COMMON-LABEL: movi_m32769_v2i64:
; COMMON: // %bb.0:
; COMMON-NEXT: mov x8, #-32769 // =0xffffffffffff7fff
; COMMON-NEXT: dup v0.2d, x8
; COMMON-NEXT: ret
ret <2 x i64> splat (i64 -32769)
}

define <2 x i64> @movi_257_v2i64() {
; COMMON-LABEL: movi_257_v2i64:
; COMMON: // %bb.0:
; COMMON-NEXT: mov w8, #257 // =0x101
; COMMON-NEXT: dup v0.2d, x8
; COMMON-NEXT: ret
ret <2 x i64> splat (i64 257)
}

define <4 x i32> @movi_v4i32_3() {
; COMMON-LABEL: movi_v4i32_3:
; COMMON: // %bb.0:
; COMMON-NEXT: adrp x8, .LCPI17_0
; COMMON-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
; COMMON-NEXT: ret
ret <4 x i32> <i32 -128, i32 0, i32 -128, i32 0>
}

define <16 x i8> @movi_v16i8_3() {
; COMMON-LABEL: movi_v16i8_3:
; COMMON: // %bb.0:
; COMMON-NEXT: adrp x8, .LCPI18_0
; COMMON-NEXT: ldr q0, [x8, :lo12:.LCPI18_0]
; COMMON-NEXT: ret
ret <16 x i8> <i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0>
}