Skip to content

Commit

Permalink
[RISCV] Disable subregister liveness by default
Browse files Browse the repository at this point in the history
We previously enabled subregister liveness by default when compiling
with RVV. This has been shown to cause miscompilations where RVV
register operand constraints are not met. A test was added for this in
D129639 which explains the issue in more detail.

Until this issue is fixed in some way, we should not be enabling
subregister liveness unless the user asks for it.

Reviewed By: craig.topper, rogfer01, kito-cheng

Differential Revision: https://reviews.llvm.org/D129646
  • Loading branch information
frasercrmck committed Jul 14, 2022
1 parent 65ebcee commit d1a5669
Show file tree
Hide file tree
Showing 39 changed files with 24,683 additions and 20,266 deletions.
8 changes: 3 additions & 5 deletions llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Expand Up @@ -202,11 +202,9 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
}

bool RISCVSubtarget::enableSubRegLiveness() const {
if (EnableSubRegLiveness.getNumOccurrences())
return EnableSubRegLiveness;
// Enable subregister liveness for RVV to better handle LMUL>1 and segment
// load/store.
return hasVInstructions();
// FIXME: Enable subregister liveness by default for RVV to better handle
// LMUL>1 and segment load/store.
return EnableSubRegLiveness;
}

void RISCVSubtarget::getPostRAMutations(
Expand Down
18 changes: 14 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
Expand Up @@ -5,6 +5,7 @@
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
Expand All @@ -22,6 +23,7 @@ define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec) {
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
Expand Down Expand Up @@ -57,6 +59,7 @@ define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec) {
define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv8i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 8 x i32> @llvm.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 8 x i32> %c
Expand All @@ -74,6 +77,7 @@ define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec)
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
Expand Down Expand Up @@ -109,6 +113,7 @@ define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec)
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
Expand Down Expand Up @@ -180,6 +185,7 @@ define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec)
define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv1i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 1 x i32> %c
Expand Down Expand Up @@ -241,6 +247,7 @@ define <vscale x 1 x i32> @extract_nxv2i32_nxv1i32_0(<vscale x 2 x i32> %vec) {
define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_0(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 0)
ret <vscale x 2 x i8> %c
Expand Down Expand Up @@ -337,6 +344,7 @@ define <vscale x 1 x i8> @extract_nxv4i8_nxv1i8_3(<vscale x 4 x i8> %vec) {
define <vscale x 2 x half> @extract_nxv2f16_nxv16f16_0(<vscale x 16 x half> %vec) {
; CHECK-LABEL: extract_nxv2f16_nxv16f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x half> @llvm.vector.extract.nxv2f16.nxv16f16(<vscale x 16 x half> %vec, i64 0)
ret <vscale x 2 x half> %c
Expand Down Expand Up @@ -459,6 +467,7 @@ define <vscale x 16 x i1> @extract_nxv16i1_nxv32i1_16(<vscale x 32 x i1> %x) {
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_0(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 0)
ret <vscale x 6 x half> %res
Expand All @@ -470,13 +479,14 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vslidedown.vx v11, v10, a0
; CHECK-NEXT: vslidedown.vx v8, v9, a0
; CHECK-NEXT: vslidedown.vx v14, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vi v9, v11, 0
; CHECK-NEXT: vslideup.vi v13, v14, 0
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll
Expand Up @@ -7,6 +7,7 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg2e8.v v7, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; CHECK-NEXT: ret
%1 = bitcast <16 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -20,6 +21,7 @@ define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <24 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -34,6 +36,7 @@ define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <32 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -49,6 +52,7 @@ define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg5e8.v v4, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <40 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -65,6 +69,7 @@ define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg6e8.v v3, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <48 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -82,6 +87,7 @@ define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg7e8.v v2, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <56 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -100,6 +106,7 @@ define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg8e8.v v1, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <64 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll
Expand Up @@ -267,7 +267,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a4, a3
; CHECK-NEXT: .LBB16_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v28, v2, 4
; CHECK-NEXT: vslidedown.vi v3, v2, 4
; CHECK-NEXT: addi a6, a4, -32
; CHECK-NEXT: addi a3, a1, 640
; CHECK-NEXT: mv a5, a2
Expand All @@ -276,7 +276,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a5, a6
; CHECK-NEXT: .LBB16_4:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v28, 2
; CHECK-NEXT: vslidedown.vi v0, v3, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v16, (a3)
; CHECK-NEXT: addi t0, a5, -16
Expand All @@ -301,7 +301,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: .LBB16_8:
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu
; CHECK-NEXT: li a5, 64
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vmv1r.v v0, v3
; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: li t0, 48
Expand All @@ -314,7 +314,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: li a7, 64
; CHECK-NEXT: .LBB16_10:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v28, v1, 4
; CHECK-NEXT: vslidedown.vi v3, v1, 4
; CHECK-NEXT: addi t0, a7, -32
; CHECK-NEXT: addi a5, a1, 128
; CHECK-NEXT: mv a6, a2
Expand All @@ -323,7 +323,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a6, t0
; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v28, 2
; CHECK-NEXT: vslidedown.vi v0, v3, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v16, (a5)
; CHECK-NEXT: addi a5, a6, -16
Expand All @@ -347,7 +347,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: .LBB16_16:
; CHECK-NEXT: addi t0, a1, 384
; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vmv1r.v v0, v3
; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: li t1, 40
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Expand Up @@ -39,20 +39,22 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V128-LABEL: interleave_v2f64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2f64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
Expand Down Expand Up @@ -267,9 +269,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
Expand Down Expand Up @@ -317,9 +319,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV64-V128-NEXT: vle32.v v24, (a0)
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
Expand Up @@ -92,16 +92,16 @@ define void @sext_v32i8_v32i32(<32 x i8>* %x, <32 x i32>* %z) {
; LMULMAX2-NEXT: vsetivli zero, 16, e8, m2, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v10, v8, 16
; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v9, v10, 8
; LMULMAX2-NEXT: vslidedown.vi v14, v10, 8
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; LMULMAX2-NEXT: vsext.vf4 v14, v9
; LMULMAX2-NEXT: vsext.vf4 v16, v8
; LMULMAX2-NEXT: vsext.vf4 v16, v14
; LMULMAX2-NEXT: vsext.vf4 v14, v8
; LMULMAX2-NEXT: vsext.vf4 v8, v10
; LMULMAX2-NEXT: addi a0, a1, 64
; LMULMAX2-NEXT: vse32.v v8, (a0)
; LMULMAX2-NEXT: vse32.v v16, (a1)
; LMULMAX2-NEXT: vse32.v v14, (a1)
; LMULMAX2-NEXT: addi a0, a1, 96
; LMULMAX2-NEXT: vse32.v v14, (a0)
; LMULMAX2-NEXT: vse32.v v16, (a0)
; LMULMAX2-NEXT: addi a0, a1, 32
; LMULMAX2-NEXT: vse32.v v12, (a0)
; LMULMAX2-NEXT: ret
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Expand Up @@ -52,20 +52,22 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V128-LABEL: interleave_v2i64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2i64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
Expand Down Expand Up @@ -373,9 +375,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
Expand Down Expand Up @@ -423,9 +425,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV64-V128-NEXT: vle32.v v24, (a0)
Expand Down

0 comments on commit d1a5669

Please sign in to comment.