[RISCV] Add patterns for RV64I SLLW/SRLW/SRAW instructions

This restores support for selecting the SLLW/SRLW/SRAW instructions, which was removed in rL348067 as the previous patterns made some unsafe assumptions. Also see the related llvm-dev discussion <http://lists.llvm.org/pipermail/llvm-dev/2018-December/128497.html> Ultimately I didn't introduce a custom SelectionDAG node, but instead added a DAG combine that inserts an AssertZext i5 on the shift amount for an i32 variable-length shift and also added an ANY_EXTEND DAG-combine which will instead produce a SIGN_EXTEND for an i32 variable-length shift, increasing the opportunity to safely select SLLW/SRLW/SRAW. There are obviously different ways of addressing this (a number discussed in the llvm-dev thread), so I'd welcome further feedback and comments. Note that there are now some cases in test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll where sraw/srlw/sllw is selected even though sra/srl/sll could be used without any extra instructions. Given both are semantically equivalent, there doesn't seem a good reason to prefer one vs the other. Given that would require more logic to still select sra/srl/sll in those cases, I've left it preferring the *w variants. Differential Revision: https://reviews.llvm.org/D56264 llvm-svn: 350992
llvm · Jan 12, 2019 · d05eae7 · d05eae7
1 parent a69d903
commit d05eae7
Show file tree

Hide file tree

Showing 5 changed files with 184 additions and 185 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -80,6 +80,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
+  if (Subtarget.is64Bit()) {
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::ANY_EXTEND);
+  }
+
   if (!Subtarget.hasStdExtM()) {
     setOperationAction(ISD::MUL, XLenVT, Expand);
     setOperationAction(ISD::MULHS, XLenVT, Expand);
@@ -506,11 +513,55 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }
 
+// Return true if the given node is a shift with a non-constant shift amount.
+static bool isVariableShift(SDValue Val) {
+  switch (Val.getOpcode()) {
+  default:
+    return false;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    return Val.getOperand(1).getOpcode() != ISD::Constant;
+  }
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
   switch (N->getOpcode()) {
   default:
     break;
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA: {
+    assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
+    if (!DCI.isBeforeLegalize())
+      break;
+    SDValue RHS = N->getOperand(1);
+    if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
+        (RHS->getOpcode() == ISD::AssertZext &&
+         cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
+      break;
+    SDValue LHS = N->getOperand(0);
+    SDLoc DL(N);
+    SDValue NewRHS =
+        DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
+                    DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
+    return DCI.CombineTo(
+        N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
+  }
+  case ISD::ANY_EXTEND: {
+    // If any-extending an i32 variable-length shift to i64, then instead
+    // sign-extend in order to increase the chance of being able to select the
+    // sllw/srlw/sraw instruction.
+    SDValue Src = N->getOperand(0);
+    if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32 ||
+        !isVariableShift(Src))
+      break;
+    SDLoc DL(N);
+    return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
+  }
   case RISCVISD::SplitF64: {
     // If the input to SplitF64 is just BuildPairF64 then the operation is
     // redundant. Instead, use BuildPairF64's operands directly.

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -654,6 +654,30 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
 def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
   return isOrEquivalentToAdd(N);
 }]>;
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def sexti32 : PatFrags<(ops node:$src),
+                       [(sext_inreg node:$src, i32),
+                        (assertsexti32 node:$src)]>;
+def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
+}]>;
+def zexti32 : PatFrags<(ops node:$src),
+                       [(and node:$src, 0xffffffff),
+                        (assertzexti32 node:$src)]>;
+// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
+// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
+// upper 32 bits.
+def shiftwamt_mask : ImmLeaf<XLenVT, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
+}]>;
+def shiftwamt : PatFrags<(ops node:$src),
+                         [(assertzexti5 (and node:$src, shiftwamt_mask)),
+                          (assertzexti5 node:$src)]>;
 
 /// Immediates
 
@@ -911,7 +935,28 @@ def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
 def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
           (SRAIW GPR:$rs1, uimm5:$shamt)>;
 
-// TODO: patterns for SLLW/SRLW/SRAW.
+// For variable-length shifts, we rely on assertzexti5 being inserted during
+// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
+// guarantee that selecting a 32-bit variable shift is legal (as the variable
+// shift is known to be <= 32). We must also be careful not to create
+// semantically incorrect patterns. For instance, selecting SRLW for
+// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
+// the result are used or not (in the case where rs2=0, this is a
+// sign-extension operation).
+
+def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
+          (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
+          (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
+          (SRLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
+          (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+          (SRAW GPR:$rs1, GPR:$rs2)>;
 
 /// Loads
 

diff --git a/llvm/test/CodeGen/RISCV/alu32.ll b/llvm/test/CodeGen/RISCV/alu32.ll
@@ -181,7 +181,7 @@ define i32 @sll(i32 %a, i32 %b) nounwind {
 ;
 ; RV64I-LABEL: sll:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    sllw a0, a0, a1
 ; RV64I-NEXT:    ret
   %1 = shl i32 %a, %b
   ret i32 %1
@@ -235,8 +235,6 @@ define i32 @xor(i32 %a, i32 %b) nounwind {
   ret i32 %1
 }
 
-; TODO: should select srlw for RV64.
-
 define i32 @srl(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: srl:
 ; RV32I:       # %bb.0:
@@ -245,16 +243,12 @@ define i32 @srl(i32 %a, i32 %b) nounwind {
 ;
 ; RV64I-LABEL: srl:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    srlw a0, a0, a1
 ; RV64I-NEXT:    ret
   %1 = lshr i32 %a, %b
   ret i32 %1
 }
 
-; TODO: should select sraw for RV64.
-
 define i32 @sra(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: sra:
 ; RV32I:       # %bb.0:
@@ -263,8 +257,7 @@ define i32 @sra(i32 %a, i32 %b) nounwind {
 ;
 ; RV64I-LABEL: sra:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sra a0, a0, a1
+; RV64I-NEXT:    sraw a0, a0, a1
 ; RV64I-NEXT:    ret
   %1 = ashr i32 %a, %b
   ret i32 %1

diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -444,13 +444,10 @@ define signext i32 @subw(i32 signext %a, i32 signext %b) {
   ret i32 %1
 }
 
-; TODO: should select sllw for RV64.
-
 define signext i32 @sllw(i32 signext %a, i32 zeroext %b) {
 ; RV64I-LABEL: sllw:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sll a0, a0, a1
-; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sllw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32I-LABEL: sllw:
@@ -461,15 +458,10 @@ define signext i32 @sllw(i32 signext %a, i32 zeroext %b) {
   ret i32 %1
 }
 
-; TODO: should select srlw for RV64.
-
 define signext i32 @srlw(i32 signext %a, i32 zeroext %b) {
 ; RV64I-LABEL: srlw:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    srl a0, a0, a1
-; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    srlw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32I-LABEL: srlw:
@@ -480,13 +472,10 @@ define signext i32 @srlw(i32 signext %a, i32 zeroext %b) {
   ret i32 %1
 }
 
-; TODO: should select sraw for RV64.
-
 define signext i32 @sraw(i64 %a, i32 zeroext %b) {
 ; RV64I-LABEL: sraw:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sra a0, a0, a1
+; RV64I-NEXT:    sraw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32I-LABEL: sraw: