Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 41 additions & 33 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
// TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
}
}

Expand Down Expand Up @@ -11089,38 +11092,43 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}

// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
// (and (srl x, (sub c2, c1), MASK)
if (N0.getOpcode() == ISD::SHL &&
(N0.getOperand(1) == N1 || N0->hasOneUse()) &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
const APInt &RHSC = RHS->getAPIntValue();
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
if (N0.getOpcode() == ISD::SHL) {
// fold (srl (shl nuw x, c), c) -> x
if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
return N0.getOperand(0);

// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
// (and (srl x, (sub c2, c1), MASK)
if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
const APInt &RHSC = RHS->getAPIntValue();
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
}
}

Expand Down
109 changes: 44 additions & 65 deletions llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -142,16 +140,14 @@ merge:
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg4e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg4e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -237,16 +233,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor8_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: srli a1, a1, 3
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg8e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor8_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 35
; RV64-NEXT: srli a1, a1, 35
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg8e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -276,16 +270,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: store_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vsseg2e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vsseg2e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -384,8 +376,6 @@ define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: store_factor8_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: srli a1, a1, 3
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vmv1r.v v11, v9
Expand All @@ -398,8 +388,8 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
;
; RV64-LABEL: store_factor8_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 35
; RV64-NEXT: srli a1, a1, 35
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vmv1r.v v11, v9
Expand All @@ -418,16 +408,14 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -445,16 +433,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg4e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg4e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -477,20 +463,17 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_store_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv1r.v v9, v8
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v9, v8
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_store_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv1r.v v9, v8
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -504,17 +487,15 @@ define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
Expand All @@ -537,37 +518,36 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32-NEXT: vmv1r.v v8, v0
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: li a1, -1
; RV32-NEXT: li a3, -1
; RV32-NEXT: vmerge.vim v10, v9, 1, v0
; RV32-NEXT: vwaddu.vv v11, v10, v10
; RV32-NEXT: vwmaccu.vx v11, a1, v10
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: vwmaccu.vx v11, a3, v10
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vx v11, v11, a1
; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vx v11, v11, a3
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vx v10, v9, a1
; RV32-NEXT: vslideup.vx v10, v9, a3
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a1, 32
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV32-NEXT: vnsrl.wx v13, v10, a1
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vnsrl.wx v13, v10, a2
; RV32-NEXT: vnsrl.wi v12, v10, 0
; RV32-NEXT: srli a2, a2, 1
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a0, a2
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
Expand All @@ -590,20 +570,21 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v11, 0
; RV64-NEXT: slli a3, a1, 33
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vx v10, v9, a2
; RV64-NEXT: slli a2, a1, 33
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: srli a1, a3, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: srli a2, a2, 32
; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0), v0.t
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; RV64-NEXT: vnsrl.wx v13, v10, a1
; RV64-NEXT: li a2, 32
; RV64-NEXT: slli a3, a1, 32
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vnsrl.wx v13, v10, a2
; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vnsrl.wi v12, v10, 0
; RV64-NEXT: srli a3, a3, 33
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
Expand All @@ -624,8 +605,6 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_store_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vmv1r.v v11, v9
Expand All @@ -634,8 +613,8 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
;
; RV64-LABEL: masked_store_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vmv1r.v v11, v9
Expand Down