diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index bb50b5b7934cd..585eb9a19c0a8 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1473,3 +1473,14 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { // TODO: Figure out constant materialization cost modeling and remove. return SLPMaxVF; } + +bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { + // RISCV specific here are "instruction number 1st priority". + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 78e035bcd2c71..2bde679c184bc 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -336,6 +336,9 @@ class RISCVTTIImpl : public BasicTTIImplBase { } llvm_unreachable("unknown register class"); } + + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); }; } // end namespace llvm diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll index 6313845ba8890..8b22046cb6243 100644 --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -53,24 +53,26 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind { ; RV64: # %bb.0: # %entry ; RV64-NEXT: blez a1, .LBB0_3 ; RV64-NEXT: # %bb.1: # %cond_true.preheader -; RV64-NEXT: li a2, 0 +; RV64-NEXT: negw a1, a1 ; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: lui a3, %hi(A) -; RV64-NEXT: addi a3, a3, %lo(A) -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: addi a3, a0, 4 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: addi a2, a0, 4 +; RV64-NEXT: li a3, 2 ; RV64-NEXT: li a4, 4 ; RV64-NEXT: li a5, 5 +; RV64-NEXT: li a6, 2 ; RV64-NEXT: .LBB0_2: # %cond_true ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NEXT: sw a4, 0(a3) -; RV64-NEXT: addiw a6, a2, 2 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a0, a6 -; RV64-NEXT: sw a5, 0(a6) -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addi a3, a3, 4 -; RV64-NEXT: bne a1, a2, .LBB0_2 +; RV64-NEXT: sw a4, 0(a2) +; RV64-NEXT: slli a7, a6, 2 +; RV64-NEXT: add a7, a0, a7 +; RV64-NEXT: sw a5, 0(a7) +; RV64-NEXT: addiw a6, a6, 1 +; RV64-NEXT: addw a7, a1, a6 +; RV64-NEXT: addi a2, a2, 4 +; RV64-NEXT: bne a7, a3, .LBB0_2 ; RV64-NEXT: .LBB0_3: # %return ; RV64-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll index d3f19c0c20cab..34002c4015b53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -13,21 +13,20 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB0_1 +; CHECK-NEXT: bnez a2, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -55,51 +54,49 @@ for.cond.cleanup: ; preds = %vector.body define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { ; V-LABEL: gather_masked: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: lui a3, 983765 ; V-NEXT: addiw a3, a3, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a3 ; V-NEXT: li a3, 32 ; V-NEXT: li a4, 5 -; V-NEXT: li a5, 1024 ; V-NEXT: .LBB1_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; V-NEXT: vmv1r.v v9, v8 ; V-NEXT: vlse8.v v9, (a1), a4, v0.t -; V-NEXT: add a6, a0, a2 -; V-NEXT: vle8.v v10, (a6) +; V-NEXT: vle8.v v10, (a0) ; V-NEXT: vadd.vv v9, v10, v9 -; V-NEXT: vse8.v v9, (a6) -; V-NEXT: addi a2, a2, 32 +; V-NEXT: vse8.v v9, (a0) +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a0, a0, 32 ; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a5, .LBB1_1 +; V-NEXT: bnez a2, .LBB1_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: gather_masked: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: lui a3, 983765 ; ZVE32F-NEXT: addiw a3, a3, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a3 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: li a4, 5 -; ZVE32F-NEXT: li a5, 1024 ; ZVE32F-NEXT: .LBB1_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; ZVE32F-NEXT: vmv1r.v v9, v8 ; ZVE32F-NEXT: vlse8.v v9, (a1), a4, v0.t -; ZVE32F-NEXT: add a6, a0, a2 -; ZVE32F-NEXT: vle8.v v10, (a6) +; ZVE32F-NEXT: vle8.v v10, (a0) ; ZVE32F-NEXT: vadd.vv v9, v10, v9 -; ZVE32F-NEXT: vse8.v v9, (a6) -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: vse8.v v9, (a0) +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a0, a0, 32 ; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a5, .LBB1_1 +; ZVE32F-NEXT: bnez a2, .LBB1_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret entry: @@ -127,22 +124,21 @@ for.cond.cleanup: ; preds = %vector.body define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather_negative_stride: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: addi a1, a1, 155 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, -5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB2_1 +; CHECK-NEXT: bnez a2, .LBB2_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -170,20 +166,19 @@ for.cond.cleanup: ; preds = %vector.body define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather_zero_stride: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lbu a5, 0(a1) -; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: lbu a4, 0(a1) ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: vadd.vx v8, v8, a5 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vadd.vx v8, v8, a4 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a4, .LBB3_1 +; CHECK-NEXT: bnez a2, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -211,59 +206,56 @@ for.cond.cleanup: ; preds = %vector.body define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; V-LABEL: gather_zero_stride_unfold: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: li a3, 32 -; V-NEXT: li a4, 1024 ; V-NEXT: .LBB4_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; V-NEXT: vlse8.v v8, (a1), zero -; V-NEXT: add a5, a0, a2 -; V-NEXT: vle8.v v9, (a5) +; V-NEXT: vle8.v v9, (a0) ; V-NEXT: vdivu.vv v8, v8, v9 -; V-NEXT: vse8.v v8, (a5) -; V-NEXT: addi a2, a2, 32 +; V-NEXT: vse8.v v8, (a0) +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a0, a0, 32 ; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a4, .LBB4_1 +; V-NEXT: bnez a2, .LBB4_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: gather_zero_stride_unfold: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: li a3, 32 -; ZVE32F-NEXT: li a4, 1024 ; ZVE32F-NEXT: .LBB4_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; ZVE32F-NEXT: vlse8.v v8, (a1), zero -; ZVE32F-NEXT: add a5, a0, a2 -; ZVE32F-NEXT: vle8.v v9, (a5) +; ZVE32F-NEXT: vle8.v v9, (a0) ; ZVE32F-NEXT: vdivu.vv v8, v8, v9 -; ZVE32F-NEXT: vse8.v v8, (a5) -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: vse8.v v8, (a0) +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a0, a0, 32 ; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a4, .LBB4_1 +; ZVE32F-NEXT: bnez a2, .LBB4_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret ; ; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold: ; NOT-OPTIMIZED: # %bb.0: # %entry -; NOT-OPTIMIZED-NEXT: li a2, 0 +; NOT-OPTIMIZED-NEXT: li a2, 1024 ; NOT-OPTIMIZED-NEXT: li a3, 32 -; NOT-OPTIMIZED-NEXT: li a4, 1024 ; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body ; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1 -; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1) +; NOT-OPTIMIZED-NEXT: lbu a4, 0(a1) ; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; NOT-OPTIMIZED-NEXT: add a6, a0, a2 -; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a5 +; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0) +; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a4 ; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8 -; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: addi a2, a2, 32 +; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0) +; NOT-OPTIMIZED-NEXT: addi a2, a2, -32 +; NOT-OPTIMIZED-NEXT: addi a0, a0, 32 ; NOT-OPTIMIZED-NEXT: addi a1, a1, 160 -; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB4_1 +; NOT-OPTIMIZED-NEXT: bnez a2, .LBB4_1 ; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup ; NOT-OPTIMIZED-NEXT: ret entry: @@ -295,21 +287,20 @@ for.cond.cleanup: ; preds = %vector.body define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: scatter: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB5_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a6, a1, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vlse8.v v9, (a0), a4 ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: vsse8.v v8, (a0), a4 -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a1, a1, 32 ; CHECK-NEXT: addi a0, a0, 160 -; CHECK-NEXT: bne a2, a5, .LBB5_1 +; CHECK-NEXT: bnez a2, .LBB5_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -337,51 +328,49 @@ for.cond.cleanup: ; preds = %vector.body define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { ; V-LABEL: scatter_masked: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: li a3, 32 ; V-NEXT: lui a4, 983765 ; V-NEXT: addiw a4, a4, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a4 ; V-NEXT: li a4, 5 -; V-NEXT: li a5, 1024 ; V-NEXT: .LBB6_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 -; V-NEXT: add a6, a1, a2 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; V-NEXT: vle8.v v9, (a6) +; V-NEXT: vle8.v v9, (a1) ; V-NEXT: vmv1r.v v10, v8 ; V-NEXT: vlse8.v v10, (a0), a4, v0.t ; V-NEXT: vadd.vv v9, v10, v9 ; V-NEXT: vsse8.v v9, (a0), a4, v0.t -; V-NEXT: addi a2, a2, 32 +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a1, a1, 32 ; V-NEXT: addi a0, a0, 160 -; V-NEXT: bne a2, a5, .LBB6_1 +; V-NEXT: bnez a2, .LBB6_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: scatter_masked: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: lui a4, 983765 ; ZVE32F-NEXT: addiw a4, a4, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a4 ; ZVE32F-NEXT: li a4, 5 -; ZVE32F-NEXT: li a5, 1024 ; ZVE32F-NEXT: .LBB6_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: add a6, a1, a2 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; ZVE32F-NEXT: vle8.v v9, (a6) +; ZVE32F-NEXT: vle8.v v9, (a1) ; ZVE32F-NEXT: vmv1r.v v10, v8 ; ZVE32F-NEXT: vlse8.v v10, (a0), a4, v0.t ; ZVE32F-NEXT: vadd.vv v9, v10, v9 ; ZVE32F-NEXT: vsse8.v v9, (a0), a4, v0.t -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a1, a1, 32 ; ZVE32F-NEXT: addi a0, a0, 160 -; ZVE32F-NEXT: bne a2, a5, .LBB6_1 +; ZVE32F-NEXT: bnez a2, .LBB6_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index bebebdf5414df..01be29994cbe6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -2075,48 +2075,48 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a4, 1024 -; CHECK-NEXT: bgeu a4, a3, .LBB34_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: addiw a4, a3, -1 -; CHECK-NEXT: andi a5, a4, 1024 -; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; CHECK-NEXT: mv a7, a4 +; CHECK-NEXT: addiw a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (t0) -; CHECK-NEXT: add t1, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: sub a7, a7, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bnez a7, .LBB34_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a4 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bnez t0, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 -; CHECK-NEXT: slli a4, a4, 2 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB34_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) ; CHECK-NEXT: flw ft1, 0(a1) ; CHECK-NEXT: fmadd.s ft0, ft0, fa0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB34_6 +; CHECK-NEXT: bnez a3, .LBB34_6 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2175,48 +2175,48 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a4, 1024 -; CHECK-NEXT: bgeu a4, a3, .LBB35_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: addiw a4, a3, -1 -; CHECK-NEXT: andi a5, a4, 1024 -; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; CHECK-NEXT: mv a7, a4 +; CHECK-NEXT: addiw a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (t0) -; CHECK-NEXT: add t1, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: sub a7, a7, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bnez a7, .LBB35_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a4 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bnez t0, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 -; CHECK-NEXT: slli a4, a4, 2 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB35_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) ; CHECK-NEXT: flw ft1, 0(a1) ; CHECK-NEXT: fmadd.s ft0, fa0, ft0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB35_6 +; CHECK-NEXT: bnez a3, .LBB35_6 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 0a2bf27096686..596dba6a0cf79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -592,22 +592,20 @@ define void @vlmax(i64 %N, double* %c, double* %a, double* %b) { ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, mu ; CHECK-NEXT: blez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: li t1, 0 -; CHECK-NEXT: slli a7, a6, 3 +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: slli a4, a6, 3 ; CHECK-NEXT: .LBB11_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a2, a4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (t0) -; CHECK-NEXT: add a5, a3, a4 -; CHECK-NEXT: vle64.v v9, (a5) +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v9, (a3) ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: add a5, a1, a4 -; CHECK-NEXT: vse64.v v8, (a5) -; CHECK-NEXT: add t1, t1, a6 -; CHECK-NEXT: add a4, a4, a7 -; CHECK-NEXT: blt t1, a0, .LBB11_2 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: add a5, a5, a6 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: blt a5, a0, .LBB11_2 ; CHECK-NEXT: .LBB11_3: # %for.end ; CHECK-NEXT: ret entry: @@ -962,17 +960,16 @@ if.end: define void @pre_over_vle(ptr %A) { ; CHECK-LABEL: pre_over_vle: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: li a2, 800 +; CHECK-NEXT: li a1, 100 ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a3, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle8.v v8, (a3) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf4 v9, v8 -; CHECK-NEXT: addi a1, a1, 8 -; CHECK-NEXT: vse32.v v9, (a3) -; CHECK-NEXT: bne a1, a2, .LBB22_1 +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: addi a0, a0, 8 +; CHECK-NEXT: bnez a1, .LBB22_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: ret entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll index 7a78b8ea018c3..d78f34cc264fb 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll @@ -41,11 +41,12 @@ define void @test2(ptr %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 128000, [[LSR_IV_NEXT]] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ] +; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]]) @@ -111,14 +112,16 @@ define void @test4(ptr %a, ptr %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[UGLYGEP1]], align 4 +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[UGLYGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ] +; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4 ; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: store float [[T18]], ptr [[UGLYGEP]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 128000, [[LSR_IV_NEXT]] +; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4 +; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]])