[RISCV] Optimize more redundant VSETVLIs

D99717 introduced some test cases which showed that the output of one vsetvli into another would not be picked up by the RISCVCleanupVSETVLI pass. This patch teaches the optimization about such a pattern. The pattern is quite common when using the RVV vsetvli intrinsic to pass the VL onto other intrinsics. The second test case introduced by D99717 is left unoptimized by this patch. It is a rarer case and will require us to rewire any uses of the redundant vset[i]vli's output to the previous one's. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D99730
llvm · Apr 2, 2021 · 3b48d84 · 3b48d84
1 parent a4ac847
commit 3b48d84
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 16 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -75,11 +75,19 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
 
   assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
   Register AVLReg = MI.getOperand(1).getReg();
+  Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
 
   // If this VSETVLI isn't changing VL, it is redundant.
   if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
     return true;
 
+  // If the previous VSET{I}VLI's output (which isn't X0) is fed into this
+  // VSETVLI, this one isn't changing VL so is redundant.
+  // Only perform this on virtual registers to avoid the complexity of having
+  // to work out if the physical register was clobbered somewhere in between.
+  if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
+    return true;
+
   // If the previous opcode isn't vsetvli we can't do any more comparison.
   if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
     return false;
@@ -94,7 +102,6 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
     // This instruction is setting VL to VLMAX, this is redundant if the
     // previous VSETVLI was also setting VL to VLMAX. But it is not redundant
     // if they were setting it to any other value or leaving VL unchanged.
-    Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
     return PrevOutVL != RISCV::X0;
   }
 

diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
@@ -1,25 +1,28 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
 
-# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
-# keeps the previous value of VL, the second time sets it to VLMAX. We can't
-# remove the first since we can't tell if this is a change VL.
-
 --- |
   ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
   source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
   target triple = "riscv64"
 
-  define void @cleanup_vsetvli() #0 {
+  define void @cleanup_vsetvli0() #0 {
+    ret void
+  }
+
+  define void @cleanup_vsetvli1() #0 {
     ret void
   }
 
   attributes #0 = { "target-features"="+experimental-v" }
 
 ...
 ---
-name:            cleanup_vsetvli
+# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
+# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
+# the first since we can't tell if this is a change of VL.
+name:            cleanup_vsetvli0
 alignment:       4
 tracksRegLiveness: true
 registers:
@@ -29,7 +32,7 @@ frameInfo:
 machineFunctionInfo: {}
 body:             |
   bb.0 (%ir-block.0):
-    ; CHECK-LABEL: name: cleanup_vsetvli
+    ; CHECK-LABEL: name: cleanup_vsetvli0
     ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
     ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
     ; CHECK: PseudoRET
@@ -38,3 +41,39 @@ body:             |
     PseudoRET
 
 ...
+---
+# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
+# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
+# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
+# physical register which is clobbered by a later instruction.
+name:            cleanup_vsetvli1
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $x3
+    ; CHECK-LABEL: name: cleanup_vsetvli1
+    ; CHECK: liveins: $x3
+    ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: $x1 = COPY $x3
+    ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: PseudoRET
+    %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    dead %1:gpr  = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
+
+    %2:gpr  = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
+    dead %3:gpr  = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
+
+    $x1  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    $x1 = COPY $x3
+    dead %4:gpr  = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
+    PseudoRET
+
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -34,12 +34,10 @@ define void @test_vsetvlimax_e64m8() nounwind {
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
-; FIXME: We don't
 define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: redundant_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
@@ -49,13 +47,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
 
 ; Check that we remove the repeated/redundant vsetvli when followed by another
 ; operation
-; FIXME: We don't
+; FIXME: We don't catch the second vsetvli because it has a use of its output.
+; We could replace it with the output of the first vsetvli.
 define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: repeated_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)

diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -52,12 +52,10 @@ define void @test_vsetvlimax_e64m4() nounwind {
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
-; FIXME: We don't
 define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: redundant_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
@@ -67,13 +65,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
 
 ; Check that we remove the repeated/redundant vsetvli when followed by another
 ; operation
-; FIXME: We don't
+; FIXME: We don't catch the second vsetvli because it has a use of its output.
+; We could replace it with the output of the first vsetvli.
 define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: repeated_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)