Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 37 additions & 34 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {

bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
bool Invert) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CNode->getZExtValue();
SDLoc DL(N);

if (Invert)
ImmVal = ~ImmVal;
uint64_t ImmVal;
if (auto CI = dyn_cast<ConstantSDNode>(N))
ImmVal = CI->getZExtValue();
else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
else
return false;

// Shift mask depending on type size.
switch (VT.SimpleTy) {
case MVT::i8:
ImmVal &= 0xFF;
ImmVal |= ImmVal << 8;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i16:
ImmVal &= 0xFFFF;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i32:
ImmVal &= 0xFFFFFFFF;
ImmVal |= ImmVal << 32;
break;
case MVT::i64:
break;
default:
llvm_unreachable("Unexpected type");
}
if (Invert)
ImmVal = ~ImmVal;

uint64_t encoding;
if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
return true;
}
// Shift mask depending on type size.
switch (VT.SimpleTy) {
case MVT::i8:
ImmVal &= 0xFF;
ImmVal |= ImmVal << 8;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i16:
ImmVal &= 0xFFFF;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i32:
ImmVal &= 0xFFFFFFFF;
ImmVal |= ImmVal << 32;
break;
case MVT::i64:
break;
default:
llvm_unreachable("Unexpected type");
}
return false;

uint64_t encoding;
if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
return false;

Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
return true;
}

// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;

// Duplicate FP immediate into all vector elements
let AddedComplexity = 2 in {
let AddedComplexity = 3 in {
def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
(FDUP_ZI_H fpimm16:$imm8)>;
def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;

def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;

def SVELogicalImm8NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
Expand Down Expand Up @@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;

def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;

def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
}

//===----------------------------------------------------------------------===//
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
; SVE: // %bb.0:
; SVE-NEXT: uunpkhi z3.s, z2.h
; SVE-NEXT: uunpkhi z4.s, z1.h
; SVE-NEXT: mov w8, #32768 // =0x8000
; SVE-NEXT: uunpklo z2.s, z2.h
; SVE-NEXT: uunpklo z1.s, z1.h
; SVE-NEXT: ptrue p1.s
Expand All @@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
; SVE-NEXT: fmul z3.s, z4.s, z3.s
; SVE-NEXT: fmul z1.s, z1.s, z2.s
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
; SVE-NEXT: fmov h3, w8
; SVE-NEXT: dupm z3.h, #0x8000
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
; SVE-NEXT: mov z3.h, h3
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
; SVE-NEXT: uunpkhi z3.s, z0.h
Expand All @@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
;
; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
; SVE-B16B16: // %bb.0:
; SVE-B16B16-NEXT: mov w8, #32768 // =0x8000
; SVE-B16B16-NEXT: dupm z3.h, #0x8000
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
; SVE-B16B16-NEXT: fmov h3, w8
; SVE-B16B16-NEXT: mov z3.h, h3
; SVE-B16B16-NEXT: sel z1.h, p0, z1.h, z3.h
; SVE-B16B16-NEXT: bfsub z0.h, z0.h, z1.h
; SVE-B16B16-NEXT: ret
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AArch64/sve-fp-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_h_negzero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32768 // =0x8000
; CHECK-NEXT: dupm z3.h, #0x8000
; CHECK-NEXT: fmul z1.h, z1.h, z2.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h
; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%fmul = fmul <vscale x 8 x half> %b, %c
Expand All @@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_s_negzero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
; CHECK-NEXT: mov z3.s, #0x80000000
; CHECK-NEXT: fmul z1.s, z1.s, z2.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%fmul = fmul <vscale x 4 x float> %b, %c
Expand All @@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_d_negzero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: mov z3.d, #0x8000000000000000
; CHECK-NEXT: fmul z1.d, z1.d, z2.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%fmul = fmul <vscale x 2 x double> %b, %c
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov w8, #32768 // =0x8000
; CHECK-NEXT: dupm z2.h, #0x8000
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: ptrue p0.h
Expand All @@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
; CHECK-NEXT: mov w8, #32768 // =0x8000
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: addvl x8, sp, #1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: fadda h2, p0, h2, z0.h
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: addvl x8, sp, #1
; CHECK-NEXT: dupm z0.h, #0x8000
; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z1, [sp]
; CHECK-NEXT: str z1, [sp, #1, mul vl]
Expand All @@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
; CHECK-NEXT: mov w8, #32768 // =0x8000
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: fadda h2, p0, h2, z0.h
; CHECK-NEXT: uunpklo z0.s, z1.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: dupm z0.h, #0x8000
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: fadda h2, p0, h2, z0.h
; CHECK-NEXT: fmov s0, s2
; CHECK-NEXT: ret
Expand Down
Loading
Loading