[RISCV] Look through copies when trying to find an implicit def in ad…

…dVSetVL. The InstrEmitter can sometimes insert a copy after an IMPLICIT_DEF before connecting it to the vector instruction. This occurs when constrainRegClass reduces to a class with less than 4 registers. I believe LMUL8 on masked instructions triggers this since the result can only use the v8, v16, or v24 register group as the mask is using v0. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D98567
llvm · Mar 16, 2021 · 229eeb1 · 229eeb1
1 parent 61ca706
commit 229eeb1
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 9 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4845,6 +4845,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
   return TailMBB;
 }
 
+static MachineInstr *elideCopies(MachineInstr *MI,
+                                 const MachineRegisterInfo &MRI) {
+  while (true) {
+    if (!MI->isFullCopy())
+      return MI;
+    if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
+      return nullptr;
+    MI = MRI.getVRegDef(MI->getOperand(1).getReg());
+    if (!MI)
+      return nullptr;
+  }
+}
+
 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
                                     int VLIndex, unsigned SEWIndex,
                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
@@ -4905,8 +4918,11 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
-    if (UseMI && UseMI->isImplicitDef())
-      TailAgnostic = true;
+    if (UseMI) {
+      UseMI = elideCopies(UseMI, MRI);
+      if (UseMI && UseMI->isImplicitDef())
+        TailAgnostic = true;
+    }
   }
 
   // For simplicity we reuse the vtype representation here.

diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll
@@ -126,7 +126,7 @@ declare <vscale x 8 x float> @llvm.masked.load.nxv8f32(<vscale x 8 x float>*, i3
 define <vscale x 8 x double> @masked_load_nxv8f64(<vscale x 8 x double>* %a, <vscale x 8 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 8 x double> @llvm.masked.load.nxv8f64(<vscale x 8 x double>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x double> undef)
@@ -148,7 +148,7 @@ declare <vscale x 16 x half> @llvm.masked.load.nxv16f16(<vscale x 16 x half>*, i
 define <vscale x 16 x float> @masked_load_nxv16f32(<vscale x 16 x float>* %a, <vscale x 16 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x float> undef)
@@ -159,7 +159,7 @@ declare <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>*,
 define <vscale x 32 x half> @masked_load_nxv32f16(<vscale x 32 x half>* %a, <vscale x 32 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 32 x half> @llvm.masked.load.nxv32f16(<vscale x 32 x half>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x half> undef)

diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -170,7 +170,7 @@ declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32>*, i32, <
 define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
@@ -203,7 +203,7 @@ declare <vscale x 16 x i16> @llvm.masked.load.nxv16i16(<vscale x 16 x i16>*, i32
 define <vscale x 16 x i32> @masked_load_nxv16i32(<vscale x 16 x i32>* %a, <vscale x 16 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 16 x i32> @llvm.masked.load.nxv16i32(<vscale x 16 x i32>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x i32> undef)
@@ -225,7 +225,7 @@ declare <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8>*, i32, <
 define <vscale x 32 x i16> @masked_load_nxv32i16(<vscale x 32 x i16>* %a, <vscale x 32 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x i16> undef)
@@ -236,7 +236,7 @@ declare <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>*, i32
 define <vscale x 64 x i8> @masked_load_nxv64i8(<vscale x 64 x i8>* %a, <vscale x 64 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8,m8,tu,mu
+; CHECK-NEXT:    vsetvli a1, zero, e8,m8,ta,mu
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8(<vscale x 64 x i8>* %a, i32 1, <vscale x 64 x i1> %mask, <vscale x 64 x i8> undef)

diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
@@ -0,0 +1,68 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s
+
+# This test makes sure we peak through the COPY instruction between the
+# IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic
+# policy. The test is working if the second argument to PseudoVSETVLI has bit 6
+# set.
+
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+  target triple = "riscv64"
+
+  ; Function Attrs: nounwind
+  define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) #0 {
+    %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
+    ret <vscale x 8 x i64> %load
+  }
+
+  ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+  declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i64>) #1
+
+  attributes #0 = { nounwind "target-features"="+experimental-v" }
+  attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" }
+
+...
+---
+name:            masked_load_nxv8i64
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: vr }
+  - { id: 2, class: vrm8nov0 }
+  - { id: 3, class: vrm8 }
+  - { id: 4, class: vrm8nov0 }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$v0', virtual-reg: '%1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $x10, $v0
+
+    ; CHECK-LABEL: name: masked_load_nxv8i64
+    ; CHECK: liveins: $x10, $v0
+    ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: $v0 = COPY [[COPY]]
+    ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+    ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]]
+    ; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
+    ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]]
+    ; CHECK: PseudoRET implicit $v8m8
+    %1:vr = COPY $v0
+    %0:gpr = COPY $x10
+    $v0 = COPY %1
+    %3:vrm8 = IMPLICIT_DEF
+    %4:vrm8nov0 = COPY %3
+    %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
+    $v8m8 = COPY %2
+    PseudoRET implicit $v8m8
+
+...