[MSP430] Add codegen support for MSP430X shift instructions

MSP430X devices can utilize the RxxX and RxxM types of shift instructions, which are more efficient than the default MSP430 shift instructions. MSP430 shifts can only shift by one bit position at a time, whilst RxxM and RxxX can shift by multiple bit positions. RxxM shifts only support shift counts up to 4, but are cheaper than RxxX shifts, which can shift by up to 16 bit positions. Therefore, there are some situations where it is preferable to generate one or two RxxM shifts instead of an RxxX shift. RRCM and RRCX are not currently implemented as they are not required when MSP430TargetLowering only lowers shifts with 8-bit or 16-bit operands. They could be used in the future to improve the efficiency of shifts for 32-bit or 64-bit operands. Differential Revision: https://reviews.llvm.org/D110725
jozefl · Oct 12, 2021 · 05053b8 · 05053b8
1 parent ea81f1c
commit 05053b8
Show file tree

Hide file tree

Showing 7 changed files with 1,226 additions and 104 deletions.
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -360,7 +360,8 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
 // Define non profitable transforms into shifts
 bool MSP430TargetLowering::shouldAvoidTransformToShift(EVT VT,
                                                        unsigned Amount) const {
-  return !(Amount == 8 || Amount == 9 || Amount<=2);
+  return !(Subtarget->hasMSP430X() || Amount == 8 || Amount == 9 ||
+           Amount <= 2);
 }
 
 // Implemented to verify test case assertions in
@@ -953,59 +954,183 @@ SDValue MSP430TargetLowering::LowerCallResult(
   return Chain;
 }
 
-SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
-                                          SelectionDAG &DAG) const {
+/// Efficiently lower a shift count of 8 by swapping bytes of the value to be
+/// shifted, instead of using shift instructions.
+static SDValue shiftBy8(SDValue Op, SelectionDAG &DAG) {
   unsigned Opc = Op.getOpcode();
-  SDNode* N = Op.getNode();
   EVT VT = Op.getValueType();
-  SDLoc dl(N);
+  SDLoc Dl(Op);
+  SDValue Victim = Op.getOperand(0);
 
-  // Expand non-constant shifts to loops:
-  if (!isa<ConstantSDNode>(N->getOperand(1)))
-    return Op;
-
-  uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unknown shift");
+  case ISD::SHL:
+    // foo << (8 + N) => swpb(zext(foo)) << N
+    Victim = DAG.getZeroExtendInReg(Victim, Dl, MVT::i8);
+    Victim = DAG.getNode(ISD::BSWAP, Dl, VT, Victim);
+    break;
+  case ISD::SRA:
+  case ISD::SRL:
+    // foo >> (8 + N) => sxt(swpb(foo)) >> N
+    Victim = DAG.getNode(ISD::BSWAP, Dl, VT, Victim);
+    Victim = (Opc == ISD::SRA) ? DAG.getNode(ISD::SIGN_EXTEND_INREG, Dl, VT,
+                                             Victim, DAG.getValueType(MVT::i8))
+                               : DAG.getZeroExtendInReg(Victim, Dl, MVT::i8);
+    break;
+  }
+  return Victim;
+}
 
-  // Expand the stuff into sequence of shifts.
-  SDValue Victim = N->getOperand(0);
+/// Lower shifts of byte or word-sized operands, using the 430 instructions RRC,
+/// RLA and RRA.
+static SDValue lowerShiftToRxx(SDValue Op, SelectionDAG &DAG) {
+  unsigned Opc = Op.getOpcode();
+  EVT VT = Op.getValueType();
+  SDLoc Dl(Op);
+  SDValue Victim = Op.getOperand(0);
+  assert(isa<ConstantSDNode>(Op.getOperand(1)));
+  int ShiftAmount = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
   if (ShiftAmount >= 8) {
-    assert(VT == MVT::i16 && "Can not shift i8 by 8 and more");
-    switch(Opc) {
-    default:
-      llvm_unreachable("Unknown shift");
-    case ISD::SHL:
-      // foo << (8 + N) => swpb(zext(foo)) << N
-      Victim = DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
-      Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
-      break;
-    case ISD::SRA:
-    case ISD::SRL:
-      // foo >> (8 + N) => sxt(swpb(foo)) >> N
-      Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
-      Victim = (Opc == ISD::SRA)
-                   ? DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Victim,
-                                 DAG.getValueType(MVT::i8))
-                   : DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
-      break;
-    }
+    Victim = shiftBy8(Op, DAG);
+    // For SRL, the top byte is now 0, so we can go straight to emitting RRA.
+    // CLRC, RRC is not required.
     ShiftAmount -= 8;
-  }
-
-  if (Opc == ISD::SRL && ShiftAmount) {
+  } else if (Opc == ISD::SRL) {
     // Emit a special goodness here:
     // srl A, 1 => clrc; rrc A
-    Victim = DAG.getNode(MSP430ISD::RRCL, dl, VT, Victim);
+    Victim = DAG.getNode(MSP430ISD::RRCL, Dl, VT, Victim);
     ShiftAmount -= 1;
   }
 
   while (ShiftAmount--)
     Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
-                         dl, VT, Victim);
+                         Dl, VT, Victim);
+  return Victim;
+}
+
+/// Lower shifts of byte or word-sized operands, using the 430X extended
+/// instructions RLAX, RRUX and RRAX.
+static SDValue lowerShiftToRxxX(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  SDLoc Dl(Op);
+  SDValue Victim = Op.getOperand(0);
+  assert(isa<ConstantSDNode>(Op.getOperand(1)));
+  int ShiftAmount = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+  if (ShiftAmount == 8)
+    return shiftBy8(Op, DAG);
+
+  switch (Op.getOpcode()) {
+  case ISD::SHL:
+    return DAG.getNode(MSP430ISD::RLAX, Dl, VT, Victim,
+                       DAG.getConstant(ShiftAmount, Dl, MVT::i8));
+  case ISD::SRA:
+    return DAG.getNode(MSP430ISD::RRAX, Dl, VT, Victim,
+                       DAG.getConstant(ShiftAmount, Dl, MVT::i8));
+  case ISD::SRL:
+    return DAG.getNode(MSP430ISD::RRUX, Dl, VT, Victim,
+                       DAG.getConstant(ShiftAmount, Dl, MVT::i8));
+  default:
+    llvm_unreachable("unhandled shift instruction");
+  }
+}
+
+/// Lower shifts of word-sized operands, using the 430X extended
+/// format exception instructions RLAM, RRUM and RRAM.
+static SDValue lowerShiftToRxxM(SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getValueSizeInBits() == 16 &&
+         "can only lower shifts to RxxM for word-sized ops");
+  EVT VT = Op.getValueType();
+  SDLoc Dl(Op);
+  SDValue Victim = Op.getOperand(0);
+  assert(isa<ConstantSDNode>(Op.getOperand(1)));
+  int ShiftAmount = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+  if (ShiftAmount > 8) {
+    Victim = shiftBy8(Op, DAG);
+    ShiftAmount -= 8;
+  }
+
+  while (ShiftAmount > 0) {
+    int CurrentShiftAmount = ShiftAmount;
+    if (CurrentShiftAmount > 4)
+      CurrentShiftAmount = 4;
 
+    switch (Op.getOpcode()) {
+    case ISD::SHL:
+      Victim = DAG.getNode(MSP430ISD::RLAM, Dl, VT, Victim,
+                           DAG.getConstant(CurrentShiftAmount, Dl, MVT::i8));
+      break;
+    case ISD::SRA:
+      Victim = DAG.getNode(MSP430ISD::RRAM, Dl, VT, Victim,
+                           DAG.getConstant(CurrentShiftAmount, Dl, MVT::i8));
+      break;
+    case ISD::SRL:
+      Victim = DAG.getNode(MSP430ISD::RRUM, Dl, VT, Victim,
+                           DAG.getConstant(CurrentShiftAmount, Dl, MVT::i8));
+      break;
+    default:
+      llvm_unreachable("unhandled shift instruction");
+      break;
+    }
+    ShiftAmount -= CurrentShiftAmount;
+  }
   return Victim;
 }
 
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
+
+  // Expand non-constant shifts to loops.
+  //
+  // TODO: For the MSP430X CPU, if the non-constant shift amount is in a
+  // register, we can use the RPT flag in the extension word to shift by the
+  // amount stored in the register, without using loops.
+  if (!isa<ConstantSDNode>(Op.getOperand(1)))
+    return Op;
+
+  // Let the middle-end handle shifts of 32-bit/64-bit operands.
+  TypeSize OpSize = Op.getValueSizeInBits();
+  if (OpSize > 16)
+    return Op;
+
+  if (!Subtarget->hasMSP430X())
+    return lowerShiftToRxx(Op, DAG);
+
+  int ShiftAmount = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  if (ShiftAmount == 1) {
+    // SRL -> RRC requires a CLRC first, for a total of 2 words. We can do it in
+    // 1 word with RRUM.
+    //
+    // TODO: If op0 is not a register, it would be more efficient to use an Rxx
+    // shift.
+    if (OpSize == 16 && Op.getOpcode() == ISD::SRL)
+      return lowerShiftToRxxM(Op, DAG);
+    return lowerShiftToRxx(Op, DAG);
+  }
+  if (ShiftAmount == 8)
+    return shiftBy8(Op, DAG);
+  if (OpSize == 8) {
+    // RxxM shifts can't handle byte-sized operands.
+    return lowerShiftToRxxX(Op, DAG);
+  }
+
+  // Even though RxxM can only shift by <= 4, for shift counts < 8 it's cheaper
+  // to chain the RxxM shifts than to use the RxxX shifts with rpt. This saves
+  // one cycle and uses the same number of words.
+  //
+  // Only when not optimizing for size, use shiftBy8 followed by an RxxM chain
+  // to shift by > 8. Compared to using an RxxX insn, this saves 7 cycles but
+  // adds 2 words.
+  if (Op.getValueSizeInBits() == 16 &&
+      (ShiftAmount < 8 || !DAG.getMachineFunction().getFunction().hasOptSize()))
+    return lowerShiftToRxxM(Op, DAG);
+
+  assert(OpSize == 16 && ShiftAmount > 8);
+  return lowerShiftToRxxX(Op, DAG);
+}
+
 SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
                                                  SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -1366,20 +1491,46 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 
 const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((MSP430ISD::NodeType)Opcode) {
-  case MSP430ISD::FIRST_NUMBER:       break;
-  case MSP430ISD::RET_FLAG:           return "MSP430ISD::RET_FLAG";
-  case MSP430ISD::RETI_FLAG:          return "MSP430ISD::RETI_FLAG";
-  case MSP430ISD::RRA:                return "MSP430ISD::RRA";
-  case MSP430ISD::RLA:                return "MSP430ISD::RLA";
-  case MSP430ISD::RRC:                return "MSP430ISD::RRC";
-  case MSP430ISD::RRCL:               return "MSP430ISD::RRCL";
-  case MSP430ISD::CALL:               return "MSP430ISD::CALL";
-  case MSP430ISD::Wrapper:            return "MSP430ISD::Wrapper";
-  case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
-  case MSP430ISD::CMP:                return "MSP430ISD::CMP";
-  case MSP430ISD::SETCC:              return "MSP430ISD::SETCC";
-  case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
-  case MSP430ISD::DADD:               return "MSP430ISD::DADD";
+  case MSP430ISD::FIRST_NUMBER:
+    break;
+  case MSP430ISD::RET_FLAG:
+    return "MSP430ISD::RET_FLAG";
+  case MSP430ISD::RETI_FLAG:
+    return "MSP430ISD::RETI_FLAG";
+  case MSP430ISD::RRA:
+    return "MSP430ISD::RRA";
+  case MSP430ISD::RLA:
+    return "MSP430ISD::RLA";
+  case MSP430ISD::RRC:
+    return "MSP430ISD::RRC";
+  case MSP430ISD::RRCL:
+    return "MSP430ISD::RRCL";
+  case MSP430ISD::CALL:
+    return "MSP430ISD::CALL";
+  case MSP430ISD::Wrapper:
+    return "MSP430ISD::Wrapper";
+  case MSP430ISD::BR_CC:
+    return "MSP430ISD::BR_CC";
+  case MSP430ISD::CMP:
+    return "MSP430ISD::CMP";
+  case MSP430ISD::SETCC:
+    return "MSP430ISD::SETCC";
+  case MSP430ISD::SELECT_CC:
+    return "MSP430ISD::SELECT_CC";
+  case MSP430ISD::DADD:
+    return "MSP430ISD::DADD";
+  case MSP430ISD::RRAM:
+    return "MSP430ISD::RRAM";
+  case MSP430ISD::RRUM:
+    return "MSP430ISD::RRUM";
+  case MSP430ISD::RLAM:
+    return "MSP430ISD::RLAM";
+  case MSP430ISD::RRAX:
+    return "MSP430ISD::RRAX";
+  case MSP430ISD::RLAX:
+    return "MSP430ISD::RLAX";
+  case MSP430ISD::RRUX:
+    return "MSP430ISD::RRUX";
   }
   return nullptr;
 }

diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -20,54 +20,63 @@
 
 namespace llvm {
   namespace MSP430ISD {
-    enum NodeType : unsigned {
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+  enum NodeType : unsigned {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
-      /// Return with a flag operand. Operand 0 is the chain operand.
-      RET_FLAG,
+    /// Return with a flag operand. Operand 0 is the chain operand.
+    RET_FLAG,
 
-      /// Same as RET_FLAG, but used for returning from ISRs.
-      RETI_FLAG,
+    /// Same as RET_FLAG, but used for returning from ISRs.
+    RETI_FLAG,
 
-      /// Y = R{R,L}A X, rotate right (left) arithmetically
-      RRA, RLA,
+    /// Y = R{R,L}A X, rotate right (left) arithmetically
+    RRA,
+    RLA,
 
-      /// Y = RRC X, rotate right via carry
-      RRC,
+    /// Y = RRC X, rotate right via carry
+    RRC,
 
-      /// Rotate right via carry, carry gets cleared beforehand by clrc
-      RRCL,
+    /// Rotate right via carry, carry gets cleared beforehand by clrc
+    RRCL,
 
-      /// CALL - These operations represent an abstract call
-      /// instruction, which includes a bunch of information.
-      CALL,
+    /// CALL - These operations represent an abstract call
+    /// instruction, which includes a bunch of information.
+    CALL,
 
-      /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
-      /// and TargetGlobalAddress.
-      Wrapper,
+    /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+    /// and TargetGlobalAddress.
+    Wrapper,
 
-      /// CMP - Compare instruction.
-      CMP,
+    /// CMP - Compare instruction.
+    CMP,
 
-      /// SetCC - Operand 0 is condition code, and operand 1 is the flag
-      /// operand produced by a CMP instruction.
-      SETCC,
+    /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+    /// operand produced by a CMP instruction.
+    SETCC,
 
-      /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
-      /// is the block to branch if condition is true, operand 2 is the
-      /// condition code, and operand 3 is the flag operand produced by a CMP
-      /// instruction.
-      BR_CC,
+    /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+    /// is the block to branch if condition is true, operand 2 is the
+    /// condition code, and operand 3 is the flag operand produced by a CMP
+    /// instruction.
+    BR_CC,
 
-      /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
-      /// is condition code and operand 4 is flag operand.
-      SELECT_CC,
+    /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+    /// is condition code and operand 4 is flag operand.
+    SELECT_CC,
 
-      /// DADD - Decimal addition with carry
-      /// TODO Nothing generates a node of this type yet.
-      DADD,
-    };
-  }
+    /// DADD - Decimal addition with carry
+    /// TODO Nothing generates a node of this type yet.
+    DADD,
+
+    /// Extended versions of the shift instructions, supported by the 430X CPU.
+    RRAM,
+    RRUM,
+    RLAM,
+    RRAX,
+    RLAX,
+    RRUX
+  };
+  } // namespace MSP430ISD
 
   class MSP430Subtarget;
   class MSP430TargetLowering : public TargetLowering {