Skip to content

Commit

Permalink
[SelectionDAG][AArch64] Constant fold in SelectionDAG::getVScale if V…
Browse files Browse the repository at this point in the history
…ScaleMin==VScaleMax.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D145113
  • Loading branch information
topperc committed Mar 2, 2023
1 parent d14e7ee commit 06c6b78
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 90 deletions.
8 changes: 2 additions & 6 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Expand Up @@ -1059,12 +1059,8 @@ class SelectionDAG {
}

/// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
assert(MulImm.getSignificantBits() <= VT.getSizeInBits() &&
"Immediate does not fit VT");
return getNode(ISD::VSCALE, DL, VT,
getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
}
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
bool ConstantFold = true);

/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -1935,6 +1935,27 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}

SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
bool ConstantFold) {
assert(MulImm.getSignificantBits() <= VT.getSizeInBits() &&
"Immediate does not fit VT");

MulImm = MulImm.sextOrTrunc(VT.getSizeInBits());

if (ConstantFold) {
const MachineFunction &MF = getMachineFunction();
auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
if (Attr.isValid()) {
unsigned VScaleMin = Attr.getVScaleRangeMin();
if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
if (*VScaleMax == VScaleMin)
return getConstant(MulImm * VScaleMin, DL, VT);
}
}

return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
}

SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
Expand Down
11 changes: 2 additions & 9 deletions llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
Expand Up @@ -209,18 +209,11 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vsca
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x8
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: ptrue p1.d, vl8
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1]
; CHECK-NEXT: st1d { z0.d }, p0, [x9, x8, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down
72 changes: 13 additions & 59 deletions llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
Expand Up @@ -84,16 +84,11 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #4
; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: mov x8, #4
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: cmp x8, #4
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
Expand Down Expand Up @@ -154,16 +149,11 @@ define <8 x i16> @extract_v8i16_nxv4i16_idx8(<vscale x 4 x i16> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: mov x8, #8
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
Expand Down Expand Up @@ -192,16 +182,11 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: mov x8, #8
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
Expand Down Expand Up @@ -262,16 +247,11 @@ define <16 x i8> @extract_v16i8_nxv8i8_idx16(<vscale x 8 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x8
; CHECK-NEXT: mov w9, #16
; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
Expand Down Expand Up @@ -300,16 +280,11 @@ define <16 x i8> @extract_v16i8_nxv4i8_idx16(<vscale x 4 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #16
; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
Expand Down Expand Up @@ -340,16 +315,9 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #16
; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
Expand Down Expand Up @@ -468,16 +436,9 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #2
; CHECK-NEXT: sub x8, x8, #2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x8, #2
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: ldr q0, [sp, #16]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
Expand All @@ -490,16 +451,9 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x9
; CHECK-NEXT: mov w10, #4
; CHECK-NEXT: subs x9, x9, #4
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x9, xzr, x9, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: cmp x9, #4
; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x10, x9, lsl #3]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down
18 changes: 2 additions & 16 deletions llvm/test/CodeGen/AArch64/sve-insert-vector.ll
Expand Up @@ -374,16 +374,9 @@ define <vscale x 2 x i64> @insert_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #2
; CHECK-NEXT: sub x8, x8, #2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x8, #2
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: str q1, [sp, #16]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand All @@ -397,17 +390,10 @@ define <vscale x 2 x i64> @insert_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec, <
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: mov w9, #4
; CHECK-NEXT: cmp x8, #4
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3]
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down

0 comments on commit 06c6b78

Please sign in to comment.