Skip to content

Commit

Permalink
[RISCV] Keep same SEW/LMUL ratio if possible in forward transfer (#69788
Browse files Browse the repository at this point in the history
)

For instructions like vmv.s.x and friends where we don't care about LMUL
or the
SEW/LMUL ratio, we can change the LMUL in its state so that it has the
same
SEW/LMUL ratio as the previous state. This allows us to avoid more VL
toggles
later down the line (i.e. use vsetvli zero, zero, which requires that
the
SEW/LMUL ratio must be the same)

This is an alternative approach to the idea in #69259, but note that
they
don't catch exactly the same test cases.
  • Loading branch information
lukel97 committed Oct 27, 2023
1 parent b23426e commit c8e1fbc
Show file tree
Hide file tree
Showing 21 changed files with 305 additions and 294 deletions.
7 changes: 4 additions & 3 deletions llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,10 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
llvm_unreachable("Opcode is not a vector unit stride load nor store");
}

uint8_t EMUL =
static_cast<uint8_t>(RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW));
return std::make_pair(EEW, EMUL);
auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
if (!EEW)
llvm_unreachable("Invalid SEW or LMUL for new ratio");
return std::make_pair(EEW, *EMUL);
}

unsigned RISCVInstrumentManager::getSchedClassID(
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,14 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
return (SEW * 8) / LMul;
}

RISCVII::VLMUL RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
unsigned EEW) {
std::optional<RISCVII::VLMUL>
RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) {
unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL);
unsigned EMULFixedPoint = (EEW * 8) / Ratio;
bool Fractional = EMULFixedPoint < 8;
unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8;
if (!isValidLMUL(EMUL, Fractional))
return std::nullopt;
return RISCVVType::encodeLMUL(EMUL, Fractional);
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS);

unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);

RISCVII::VLMUL getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
unsigned EEW);
std::optional<RISCVII::VLMUL>
getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW);
} // namespace RISCVVType

namespace RISCVRVC {
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ class VSETVLIInfo {
MaskAgnostic = MA;
}

void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }

unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
Expand Down Expand Up @@ -1038,6 +1040,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!RISCVII::hasVLOp(TSFlags))
return;

// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
// places.
DemandedFields Demanded = getDemanded(MI, MRI, ST);
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown()) {
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
Info.setVLMul(*NewVLMul);
}

// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
// VL > 0. We can discard the user requested AVL and just use the last
// one if we can prove it equally zero. This removes a vsetvli entirely
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/double_reduct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v10, v8
; CHECK-NEXT: vmv.x.s a0, v8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: li a0, -256
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
; CHECK-NEXT: vmsne.vi v9, v9, 0
; CHECK-NEXT: vadd.vi v12, v11, 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,9 @@ define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
Expand Down
108 changes: 53 additions & 55 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1101,21 +1101,20 @@ define void @urem_v2i64(ptr %x, ptr %y) {
define void @mulhu_v16i8(ptr %x) {
; CHECK-LABEL: mulhu_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: lui a1, 3
; CHECK-NEXT: addi a1, a1, -2044
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: li a1, -128
; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: addi a2, a1, 32
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: lui a2, %hi(.LCPI65_0)
; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0)
; CHECK-NEXT: vle8.v v11, (a2)
Expand All @@ -1126,21 +1125,21 @@ define void @mulhu_v16i8(ptr %x) {
; CHECK-NEXT: vmulhu.vv v8, v8, v10
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: li a2, 513
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 4
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: addi a1, a1, 78
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 3, v0
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: addi a1, a1, 304
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
; CHECK-NEXT: vsrl.vv v8, v8, v9
; CHECK-NEXT: vse8.v v8, (a0)
Expand Down Expand Up @@ -1204,9 +1203,9 @@ define void @mulhu_v6i16(ptr %x) {
; CHECK-NEXT: vdivu.vv v9, v10, v9
; CHECK-NEXT: lui a1, 45217
; CHECK-NEXT: addi a1, a1, -1785
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdivu.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
Expand Down Expand Up @@ -1309,10 +1308,10 @@ define void @mulhs_v16i8(ptr %x) {
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: lui a1, 5
; CHECK-NEXT: addi a1, a1, -1452
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: li a1, 57
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT: vmulhu.vv v8, v8, v9
; CHECK-NEXT: vmv.v.i v9, 7
Expand Down Expand Up @@ -1367,9 +1366,9 @@ define void @mulhs_v6i16(ptr %x) {
; CHECK-NEXT: vdiv.vv v9, v9, v10
; CHECK-NEXT: lui a1, 1020016
; CHECK-NEXT: addi a1, a1, 2041
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdiv.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
Expand Down Expand Up @@ -4872,45 +4871,45 @@ define void @mulhu_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 0
; LMULMAX2-NEXT: lui a2, 163907
; LMULMAX2-NEXT: addi a2, a2, -2044
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: li a2, -128
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0
; LMULMAX2-NEXT: lui a2, 66049
; LMULMAX2-NEXT: addi a2, a2, 32
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0)
; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0)
; LMULMAX2-NEXT: vle8.v v14, (a2)
; LMULMAX2-NEXT: lui a1, 163907
; LMULMAX2-NEXT: addi a1, a1, -2044
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: li a1, -128
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0
; LMULMAX2-NEXT: lui a1, 66049
; LMULMAX2-NEXT: addi a1, a1, 32
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0)
; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0)
; LMULMAX2-NEXT: vle8.v v14, (a1)
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: vsrl.vv v10, v8, v10
; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14
; LMULMAX2-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-NEXT: vmv.v.i v10, 4
; LMULMAX2-NEXT: lui a2, 8208
; LMULMAX2-NEXT: addi a2, a2, 513
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 8208
; LMULMAX2-NEXT: addi a1, a1, 513
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: lui a2, 66785
; LMULMAX2-NEXT: addi a2, a2, 78
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 66785
; LMULMAX2-NEXT: addi a1, a1, 78
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0
; LMULMAX2-NEXT: lui a2, 529160
; LMULMAX2-NEXT: addi a2, a2, 304
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 529160
; LMULMAX2-NEXT: addi a1, a1, 304
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0
; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
; LMULMAX2-NEXT: vse8.v v8, (a0)
Expand Down Expand Up @@ -5250,11 +5249,11 @@ define void @mulhs_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 7
; LMULMAX2-NEXT: lui a2, 304453
; LMULMAX2-NEXT: addi a2, a2, -1452
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 304453
; LMULMAX2-NEXT: addi a1, a1, -1452
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: li a1, -123
; LMULMAX2-NEXT: vmv.v.x v12, a1
Expand All @@ -5267,15 +5266,14 @@ define void @mulhs_v32i8(ptr %x) {
;
; LMULMAX1-LABEL: mulhs_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX1-NEXT: vle8.v v8, (a0)
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vle8.v v9, (a1)
; LMULMAX1-NEXT: lui a2, 5
; LMULMAX1-NEXT: addi a2, a2, -1452
; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; LMULMAX1-NEXT: vmv.s.x v0, a2
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX1-NEXT: vmv.v.i v10, -9
; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0
; LMULMAX1-NEXT: vdivu.vv v9, v9, v10
Expand Down

0 comments on commit c8e1fbc

Please sign in to comment.