Skip to content

Commit

Permalink
[RISCV] Optimize more redundant VSETVLIs
Browse files Browse the repository at this point in the history
D99717 introduced some test cases which showed that the output of one
vsetvli into another would not be picked up by the RISCVCleanupVSETVLI
pass. This patch teaches the optimization about such a pattern. The
pattern is quite common when using the RVV vsetvli intrinsic to pass the
VL onto other intrinsics.

The second test case introduced by D99717 is left unoptimized by this
patch. It is a rarer case and will require us to rewire any uses of the
redundant vset[i]vli's output to the previous one's.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D99730
  • Loading branch information
frasercrmck committed Apr 2, 2021
1 parent a4ac847 commit 3b48d84
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 16 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
Expand Up @@ -75,11 +75,19 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {

assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
Register AVLReg = MI.getOperand(1).getReg();
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();

// If this VSETVLI isn't changing VL, it is redundant.
if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
return true;

// If the previous VSET{I}VLI's output (which isn't X0) is fed into this
// VSETVLI, this one isn't changing VL so is redundant.
// Only perform this on virtual registers to avoid the complexity of having
// to work out if the physical register was clobbered somewhere in between.
if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
return true;

// If the previous opcode isn't vsetvli we can't do any more comparison.
if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
return false;
Expand All @@ -94,7 +102,6 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
// This instruction is setting VL to VLMAX, this is redundant if the
// previous VSETVLI was also setting VL to VLMAX. But it is not redundant
// if they were setting it to any other value or leaving VL unchanged.
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
return PrevOutVL != RISCV::X0;
}

Expand Down
53 changes: 46 additions & 7 deletions llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
@@ -1,25 +1,28 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s

# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
# keeps the previous value of VL, the second time sets it to VLMAX. We can't
# remove the first since we can't tell if this is a change VL.

--- |
; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"

define void @cleanup_vsetvli() #0 {
define void @cleanup_vsetvli0() #0 {
ret void
}

define void @cleanup_vsetvli1() #0 {
ret void
}

attributes #0 = { "target-features"="+experimental-v" }

...
---
name: cleanup_vsetvli
# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
# the first since we can't tell if this is a change of VL.
name: cleanup_vsetvli0
alignment: 4
tracksRegLiveness: true
registers:
Expand All @@ -29,7 +32,7 @@ frameInfo:
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
; CHECK-LABEL: name: cleanup_vsetvli
; CHECK-LABEL: name: cleanup_vsetvli0
; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoRET
Expand All @@ -38,3 +41,39 @@ body: |
PseudoRET
...
---
# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
# physical register which is clobbered by a later instruction.
name: cleanup_vsetvli1
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $x3
; CHECK-LABEL: name: cleanup_vsetvli1
; CHECK: liveins: $x3
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: $x1 = COPY $x3
; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoRET
%0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
%2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
$x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
$x1 = COPY $x3
dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
PseudoRET
...
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
Expand Up @@ -34,12 +34,10 @@ define void @test_vsetvlimax_e64m8() nounwind {
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)

; Check that we remove the redundant vsetvli when followed by another operation
; FIXME: We don't
define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: redundant_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
Expand All @@ -49,13 +47,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)

; Check that we remove the repeated/redundant vsetvli when followed by another
; operation
; FIXME: We don't
; FIXME: We don't catch the second vsetvli because it has a use of its output.
; We could replace it with the output of the first vsetvli.
define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: repeated_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
Expand Up @@ -52,12 +52,10 @@ define void @test_vsetvlimax_e64m4() nounwind {
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)

; Check that we remove the redundant vsetvli when followed by another operation
; FIXME: We don't
define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: redundant_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
Expand All @@ -67,13 +65,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)

; Check that we remove the repeated/redundant vsetvli when followed by another
; operation
; FIXME: We don't
; FIXME: We don't catch the second vsetvli because it has a use of its output.
; We could replace it with the output of the first vsetvli.
define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: repeated_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
Expand Down

0 comments on commit 3b48d84

Please sign in to comment.