Skip to content

Commit

Permalink
[RISCV] Use fractional LMULs for fixed length types smaller than risc…
Browse files Browse the repository at this point in the history
…v-v-vector-bits-min.

My thought process is that if v2i64 is an LMUL=1 type then v2i32
should be an LMUL=1/2 type. We limit the fractional LMUL so that
SEW=64 clips to LMUL=1, SEW=32 clips to LMUL=1/2, etc. This
ensures there's always a fractional LMUL available to truncate a type.
This does reduce the number of vsetvlis in some cases.

Some tests increase vsetvlis because the best container type for a
mask type is dependent on the LMUL+SEW that the mask was produced
from, but you can't tell that from the type. I think this is
something we need to solve this in the machine IR when optimizing
vsetvlis.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D101215
  • Loading branch information
topperc committed May 11, 2021
1 parent 5f78ba0 commit ce6e4f2
Show file tree
Hide file tree
Showing 58 changed files with 1,840 additions and 1,824 deletions.
18 changes: 7 additions & 11 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -1216,24 +1216,20 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
switch (EltVT.SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i1: {
// Masks are calculated assuming 8-bit elements since that's when we need
// the most elements.
MinVLen /= 8;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
}
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
// narrower types, but we can't have a fractional LMUL with demoninator less
// than 64/SEW.
unsigned NumElts =
divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
}
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
Expand Up @@ -17,7 +17,7 @@ define <32 x i1> @bitcast_v4i8_v32i1(<4 x i8> %a, <32 x i1> %b) {
define i8 @bitcast_v1i8_i8(<1 x i8> %a) {
; CHECK-LABEL: bitcast_v1i8_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <1 x i8> %a to i8
Expand All @@ -27,7 +27,7 @@ define i8 @bitcast_v1i8_i8(<1 x i8> %a) {
define i16 @bitcast_v2i8_i16(<2 x i8> %a) {
; CHECK-LABEL: bitcast_v2i8_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <2 x i8> %a to i16
Expand All @@ -37,7 +37,7 @@ define i16 @bitcast_v2i8_i16(<2 x i8> %a) {
define i16 @bitcast_v1i16_i16(<1 x i16> %a) {
; CHECK-LABEL: bitcast_v1i16_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <1 x i16> %a to i16
Expand All @@ -47,7 +47,7 @@ define i16 @bitcast_v1i16_i16(<1 x i16> %a) {
define i32 @bitcast_v4i8_i32(<4 x i8> %a) {
; CHECK-LABEL: bitcast_v4i8_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <4 x i8> %a to i32
Expand All @@ -57,7 +57,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a) {
define i32 @bitcast_v2i16_i32(<2 x i16> %a) {
; CHECK-LABEL: bitcast_v2i16_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <2 x i16> %a to i32
Expand All @@ -67,7 +67,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a) {
define i32 @bitcast_v1i32_i32(<1 x i32> %a) {
; CHECK-LABEL: bitcast_v1i32_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <1 x i32> %a to i32
Expand Down Expand Up @@ -153,7 +153,7 @@ define i64 @bitcast_v1i64_i64(<1 x i64> %a) {
define half @bitcast_v2i8_f16(<2 x i8> %a) {
; CHECK-LABEL: bitcast_v2i8_f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <2 x i8> %a to half
Expand All @@ -163,7 +163,7 @@ define half @bitcast_v2i8_f16(<2 x i8> %a) {
define half @bitcast_v1i16_f16(<1 x i16> %a) {
; CHECK-LABEL: bitcast_v1i16_f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <1 x i16> %a to half
Expand All @@ -173,7 +173,7 @@ define half @bitcast_v1i16_f16(<1 x i16> %a) {
define float @bitcast_v4i8_f32(<4 x i8> %a) {
; CHECK-LABEL: bitcast_v4i8_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <4 x i8> %a to float
Expand All @@ -183,7 +183,7 @@ define float @bitcast_v4i8_f32(<4 x i8> %a) {
define float @bitcast_v2i16_f32(<2 x i16> %a) {
; CHECK-LABEL: bitcast_v2i16_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <2 x i16> %a to float
Expand All @@ -193,7 +193,7 @@ define float @bitcast_v2i16_f32(<2 x i16> %a) {
define float @bitcast_v1i32_f32(<1 x i32> %a) {
; CHECK-LABEL: bitcast_v1i32_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%b = bitcast <1 x i32> %a to float
Expand Down Expand Up @@ -279,7 +279,7 @@ define double @bitcast_v1i64_f64(<1 x i64> %a) {
define <1 x i16> @bitcast_i16_v1i16(i16 %a) {
; CHECK-LABEL: bitcast_i16_v1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: ret
%b = bitcast i16 %a to <1 x i16>
Expand All @@ -289,13 +289,13 @@ define <1 x i16> @bitcast_i16_v1i16(i16 %a) {
define <2 x i16> @bitcast_i32_v2i16(i32 %a) {
; RV32-LABEL: bitcast_i32_v2i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; RV32-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
; RV32-NEXT: vmv.s.x v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitcast_i32_v2i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; RV64-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: ret
%b = bitcast i32 %a to <2 x i16>
Expand All @@ -305,13 +305,13 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a) {
define <1 x i32> @bitcast_i32_v1i32(i32 %a) {
; RV32-LABEL: bitcast_i32_v1i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; RV32-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
; RV32-NEXT: vmv.s.x v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitcast_i32_v1i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; RV64-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: ret
%b = bitcast i32 %a to <1 x i32>
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
Expand Up @@ -381,7 +381,7 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: lui a4, 4080
; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a4
; LMULMAX2-RV32-NEXT: addi a5, zero, 5
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0
Expand Down Expand Up @@ -581,7 +581,7 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: lui a4, 4080
; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0
Expand Down Expand Up @@ -1245,7 +1245,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: lui a4, 4080
; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a4
; LMULMAX2-RV32-NEXT: addi a5, zero, 85
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
Expand Down Expand Up @@ -1447,7 +1447,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: lui a6, 4080
; LMULMAX1-RV32-NEXT: vand.vx v28, v26, a6
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,mf8,ta,mu
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.i v26, 0
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
Expand Up @@ -7,7 +7,7 @@
define <4 x i8> @ret_v4i8(<4 x i8>* %p) {
; CHECK-LABEL: ret_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
; CHECK-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%v = load <4 x i8>, <4 x i8>* %p
Expand Down Expand Up @@ -107,7 +107,7 @@ define <16 x i64> @ret_v16i64(<16 x i64>* %p) {
define <8 x i1> @ret_mask_v8i1(<8 x i1>* %p) {
; CHECK-LABEL: ret_mask_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
; CHECK-NEXT: vle1.v v0, (a0)
; CHECK-NEXT: ret
%v = load <8 x i1>, <8 x i1>* %p
Expand Down Expand Up @@ -490,7 +490,7 @@ define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
define <4 x i8> @ret_v8i8_param_v4i8(<4 x i8> %v) {
; CHECK-LABEL: ret_v8i8_param_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
; CHECK-NEXT: vadd.vi v8, v8, 2
; CHECK-NEXT: ret
%r = add <4 x i8> %v, <i8 2, i8 2, i8 2, i8 2>
Expand All @@ -500,7 +500,7 @@ define <4 x i8> @ret_v8i8_param_v4i8(<4 x i8> %v) {
define <4 x i8> @ret_v4i8_param_v4i8_v4i8(<4 x i8> %v, <4 x i8> %w) {
; CHECK-LABEL: ret_v4i8_param_v4i8_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%r = add <4 x i8> %v, %w
Expand Down Expand Up @@ -539,7 +539,7 @@ define <4 x i64> @ret_v4i64_param_v4i64_v4i64(<4 x i64> %v, <4 x i64> %w) {
define <8 x i1> @ret_v8i1_param_v8i1_v8i1(<8 x i1> %v, <8 x i1> %w) {
; CHECK-LABEL: ret_v8i1_param_v8i1_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vmxor.mm v0, v0, v8
; CHECK-NEXT: ret
%r = xor <8 x i1> %v, %w
Expand Down Expand Up @@ -1022,7 +1022,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX8-NEXT: addi s0, sp, 256
; LMULMAX8-NEXT: .cfi_def_cfa s0, 0
; LMULMAX8-NEXT: andi sp, sp, -128
; LMULMAX8-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX8-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
; LMULMAX8-NEXT: vle32.v v8, (a0)
; LMULMAX8-NEXT: addi a0, zero, 32
; LMULMAX8-NEXT: vsetvli a0, a0, e32,m8,ta,mu
Expand Down Expand Up @@ -1051,7 +1051,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX4-NEXT: addi s0, sp, 256
; LMULMAX4-NEXT: .cfi_def_cfa s0, 0
; LMULMAX4-NEXT: andi sp, sp, -128
; LMULMAX4-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX4-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
; LMULMAX4-NEXT: vle32.v v8, (a0)
; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
; LMULMAX4-NEXT: vle32.v v16, (a1)
Expand Down Expand Up @@ -1083,7 +1083,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX2-NEXT: addi s0, sp, 128
; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
; LMULMAX2-NEXT: andi sp, sp, -128
; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX2-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
; LMULMAX2-NEXT: vle32.v v8, (a0)
; LMULMAX2-NEXT: vsetivli a0, 8, e32,m2,ta,mu
; LMULMAX2-NEXT: vle32.v v14, (a1)
Expand Down Expand Up @@ -1122,7 +1122,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX1-NEXT: addi s0, sp, 128
; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
; LMULMAX1-NEXT: andi sp, sp, -128
; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vle32.v v8, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v13, (a1)
Expand Down

0 comments on commit ce6e4f2

Please sign in to comment.