Skip to content

Commit

Permalink
[RISCV] Enable subregister liveness tracking for RVV.
Browse files Browse the repository at this point in the history
RVV makes heavy use of subregisters due to LMUL>1 and segment
load/store tuples. Enabling subregister liveness tracking improves the quality
of the register allocation.

I've added a command line that can be used to turn it off if it causes compile
time or functional issues. I used the command line to keep the old behavior
for one interesting test case that was testing register allocation.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D125108
  • Loading branch information
topperc committed May 11, 2022
1 parent 5c7ec99 commit ed242b5
Show file tree
Hide file tree
Showing 36 changed files with 20,277 additions and 24,608 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Expand Up @@ -28,6 +28,9 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"

static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
cl::init(false), cl::Hidden);

static cl::opt<int> RVVVectorBitsMax(
"riscv-v-vector-bits-max",
cl::desc("Assume V extension vector registers are at most this big, "
Expand Down Expand Up @@ -196,3 +199,11 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
}

bool RISCVSubtarget::enableSubRegLiveness() const {
if (EnableSubRegLiveness.getNumOccurrences())
return EnableSubRegLiveness;
// Enable subregister liveness for RVV to better handle LMUL>1 and segment
// load/store.
return hasVInstructions();
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Expand Up @@ -251,6 +251,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getMinRVVVectorSizeInBits() const;
unsigned getMaxLMULForFixedLengthVectors() const;
bool useRVVForFixedLengthVectors() const;

bool enableSubRegLiveness() const override;
};
} // End llvm namespace

Expand Down
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+experimental-zvfh < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+experimental-zvfh \
; RUN: -riscv-enable-subreg-liveness=false < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+experimental-zvfh < %s \
; RUN: | FileCheck %s --check-prefix=SUBREGLIVENESS

; This testcase failed to compile after
; c46aab01c002b7a04135b8b7f1f52d8c9ae23a58, which was reverted.
Expand Down Expand Up @@ -84,6 +87,66 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
;
; SUBREGLIVENESS-LABEL: last_chance_recoloring_failure:
; SUBREGLIVENESS: # %bb.0: # %entry
; SUBREGLIVENESS-NEXT: addi sp, sp, -32
; SUBREGLIVENESS-NEXT: .cfi_def_cfa_offset 32
; SUBREGLIVENESS-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; SUBREGLIVENESS-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; SUBREGLIVENESS-NEXT: .cfi_offset ra, -8
; SUBREGLIVENESS-NEXT: .cfi_offset s0, -16
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 4
; SUBREGLIVENESS-NEXT: sub sp, sp, a0
; SUBREGLIVENESS-NEXT: li a0, 55
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v8, (a0), v8
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
; SUBREGLIVENESS-NEXT: csrr a1, vlenb
; SUBREGLIVENESS-NEXT: slli a1, a1, 2
; SUBREGLIVENESS-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
; SUBREGLIVENESS-NEXT: add a0, a0, a1
; SUBREGLIVENESS-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill
; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e8, m2, ta, mu
; SUBREGLIVENESS-NEXT: vmclr.m v0
; SUBREGLIVENESS-NEXT: li s0, 36
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, tu, mu
; SUBREGLIVENESS-NEXT: vfwadd.vv v8, v8, v8, v0.t
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; SUBREGLIVENESS-NEXT: call func@plt
; SUBREGLIVENESS-NEXT: li a0, 32
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu
; SUBREGLIVENESS-NEXT: vrgather.vv v16, v8, v8, v0.t
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, mu
; SUBREGLIVENESS-NEXT: csrr a1, vlenb
; SUBREGLIVENESS-NEXT: slli a1, a1, 3
; SUBREGLIVENESS-NEXT: add a1, sp, a1
; SUBREGLIVENESS-NEXT: addi a1, a1, 16
; SUBREGLIVENESS-NEXT: csrr a2, vlenb
; SUBREGLIVENESS-NEXT: slli a2, a2, 2
; SUBREGLIVENESS-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: add a1, a1, a2
; SUBREGLIVENESS-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: addi a1, sp, 16
; SUBREGLIVENESS-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v20
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu
; SUBREGLIVENESS-NEXT: vssubu.vv v16, v16, v8, v0.t
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e32, m8, tu, mu
; SUBREGLIVENESS-NEXT: vfdiv.vv v8, v24, v8, v0.t
; SUBREGLIVENESS-NEXT: vse32.v v8, (a0)
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 4
; SUBREGLIVENESS-NEXT: add sp, sp, a0
; SUBREGLIVENESS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; SUBREGLIVENESS-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; SUBREGLIVENESS-NEXT: addi sp, sp, 32
; SUBREGLIVENESS-NEXT: ret
entry:
%i = call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64(half* nonnull poison, <vscale x 16 x i32> poison, i64 55)
%i1 = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %i, 0
Expand Down
18 changes: 4 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
Expand Up @@ -5,7 +5,6 @@
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
Expand All @@ -23,7 +22,6 @@ define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec) {
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
Expand Down Expand Up @@ -59,7 +57,6 @@ define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec) {
define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv8i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 8 x i32> %c
Expand All @@ -77,7 +74,6 @@ define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec)
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
Expand Down Expand Up @@ -113,7 +109,6 @@ define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec)
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
Expand Down Expand Up @@ -185,7 +180,6 @@ define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec)
define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv1i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 1 x i32> %c
Expand Down Expand Up @@ -247,7 +241,6 @@ define <vscale x 1 x i32> @extract_nxv2i32_nxv1i32_0(<vscale x 2 x i32> %vec) {
define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_0(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 0)
ret <vscale x 2 x i8> %c
Expand Down Expand Up @@ -345,7 +338,6 @@ define <vscale x 1 x i8> @extract_nxv4i8_nxv1i8_3(<vscale x 4 x i8> %vec) {
define <vscale x 2 x half> @extract_nxv2f16_nxv16f16_0(<vscale x 16 x half> %vec) {
; CHECK-LABEL: extract_nxv2f16_nxv16f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv16f16(<vscale x 16 x half> %vec, i64 0)
ret <vscale x 2 x half> %c
Expand Down Expand Up @@ -468,7 +460,6 @@ define <vscale x 16 x i1> @extract_nxv16i1_nxv32i1_16(<vscale x 32 x i1> %x) {
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_0(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 0)
ret <vscale x 6 x half> %res
Expand All @@ -480,14 +471,13 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vslidedown.vx v14, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
; CHECK-NEXT: vslidedown.vx v11, v10, a0
; CHECK-NEXT: vslidedown.vx v8, v9, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vslideup.vi v13, v14, 0
; CHECK-NEXT: vslideup.vi v9, v11, 0
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res
Expand Down
7 changes: 0 additions & 7 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll
Expand Up @@ -7,7 +7,6 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg2e8.v v7, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; CHECK-NEXT: ret
%1 = bitcast <16 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -21,7 +20,6 @@ define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <24 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -36,7 +34,6 @@ define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <32 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -52,7 +49,6 @@ define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg5e8.v v4, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <40 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -69,7 +65,6 @@ define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg6e8.v v3, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <48 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -87,7 +82,6 @@ define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg7e8.v v2, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <56 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand All @@ -106,7 +100,6 @@ define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg8e8.v v1, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <64 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Expand Up @@ -39,24 +39,22 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V128-LABEL: interleave_v2f64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2f64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
Expand Down Expand Up @@ -277,9 +275,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
Expand Down Expand Up @@ -327,9 +325,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV64-V128-NEXT: vle32.v v24, (a0)
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
Expand Up @@ -97,16 +97,16 @@ define void @sext_v32i8_v32i32(<32 x i8>* %x, <32 x i32>* %z) {
; LMULMAX2-NEXT: vsetivli zero, 16, e8, m2, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v10, v8, 16
; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v14, v10, 8
; LMULMAX2-NEXT: vslidedown.vi v9, v10, 8
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; LMULMAX2-NEXT: vsext.vf4 v16, v14
; LMULMAX2-NEXT: vsext.vf4 v14, v8
; LMULMAX2-NEXT: vsext.vf4 v14, v9
; LMULMAX2-NEXT: vsext.vf4 v16, v8
; LMULMAX2-NEXT: vsext.vf4 v8, v10
; LMULMAX2-NEXT: addi a0, a1, 64
; LMULMAX2-NEXT: vse32.v v8, (a0)
; LMULMAX2-NEXT: vse32.v v14, (a1)
; LMULMAX2-NEXT: vse32.v v16, (a1)
; LMULMAX2-NEXT: addi a0, a1, 96
; LMULMAX2-NEXT: vse32.v v16, (a0)
; LMULMAX2-NEXT: vse32.v v14, (a0)
; LMULMAX2-NEXT: addi a0, a1, 32
; LMULMAX2-NEXT: vse32.v v12, (a0)
; LMULMAX2-NEXT: ret
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Expand Up @@ -52,24 +52,22 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V128-LABEL: interleave_v2i64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2i64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
Expand Down Expand Up @@ -383,9 +381,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
Expand Down Expand Up @@ -433,9 +431,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV64-V128-NEXT: vle32.v v24, (a0)
Expand Down

0 comments on commit ed242b5

Please sign in to comment.