Permalink
Browse files

AArch64: add initial NEON support

Patch by Ana Pazos.

- Completed implementation of instruction formats:
AdvSIMD three same
AdvSIMD modified immediate
AdvSIMD scalar pairwise

- Completed implementation of instruction classes
(some of the instructions in these classes
belong to yet unfinished instruction formats):
Vector Arithmetic
Vector Immediate
Vector Pairwise Arithmetic

- Initial implementation of instruction formats:
AdvSIMD scalar two-reg misc
AdvSIMD scalar three same

- Intial implementation of instruction class:
Scalar Arithmetic

- Initial clang changes to support arm v8 intrinsics.
Note: no clang changes for scalar intrinsics function name mangling yet.

- Comprehensive test cases for added instructions
To verify auto codegen, encoding, decoding, diagnosis, intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187567 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
1 parent 691aa09 commit 87773c318fcee853fb34a80a10c4347d523bdafb @TNorthover TNorthover committed Aug 1, 2013
Showing with 12,503 additions and 41 deletions.
  1. +1 −0 include/llvm/IR/Intrinsics.td
  2. +41 −0 include/llvm/IR/IntrinsicsAArch64.td
  3. +1 −1 lib/Target/AArch64/AArch64CallingConv.td
  4. +521 −1 lib/Target/AArch64/AArch64ISelLowering.cpp
  5. +30 −3 lib/Target/AArch64/AArch64ISelLowering.h
  6. +93 −0 lib/Target/AArch64/AArch64InstrFormats.td
  7. +40 −0 lib/Target/AArch64/AArch64InstrInfo.td
  8. +1,634 −0 lib/Target/AArch64/AArch64InstrNEON.td
  9. +5 −0 lib/Target/AArch64/AArch64MCInstLower.cpp
  10. +1 −1 lib/Target/AArch64/AArch64RegisterInfo.td
  11. +2 −4 lib/Target/AArch64/AArch64Subtarget.cpp
  12. +3 −0 lib/Target/AArch64/AArch64Subtarget.h
  13. +120 −20 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
  14. +39 −1 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
  15. +81 −0 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
  16. +7 −2 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
  17. +1 −1 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
  18. +66 −0 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
  19. +5 −1 lib/Target/AArch64/Utils/AArch64BaseInfo.h
  20. +21 −0 test/CodeGen/AArch64/complex-copy-noneon.ll
  21. +21 −1 test/CodeGen/AArch64/inline-asm-constraints.ll
  22. +226 −0 test/CodeGen/AArch64/neon-aba-abd.ll
  23. +92 −0 test/CodeGen/AArch64/neon-add-pairwise.ll
  24. +132 −0 test/CodeGen/AArch64/neon-add-sub.ll
  25. +574 −0 test/CodeGen/AArch64/neon-bitcast.ll
  26. +594 −0 test/CodeGen/AArch64/neon-bitwise-instructions.ll
  27. +1,982 −0 test/CodeGen/AArch64/neon-compare-instructions.ll
  28. +56 −0 test/CodeGen/AArch64/neon-facge-facgt.ll
  29. +112 −0 test/CodeGen/AArch64/neon-fma.ll
  30. +54 −0 test/CodeGen/AArch64/neon-frsqrt-frecp.ll
  31. +207 −0 test/CodeGen/AArch64/neon-halving-add-sub.ll
  32. +310 −0 test/CodeGen/AArch64/neon-max-min-pairwise.ll
  33. +310 −0 test/CodeGen/AArch64/neon-max-min.ll
  34. +88 −0 test/CodeGen/AArch64/neon-mla-mls.ll
  35. +205 −0 test/CodeGen/AArch64/neon-mov.ll
  36. +181 −0 test/CodeGen/AArch64/neon-mul-div.ll
  37. +105 −0 test/CodeGen/AArch64/neon-rounding-halving-add.ll
  38. +138 −0 test/CodeGen/AArch64/neon-rounding-shift.ll
  39. +274 −0 test/CodeGen/AArch64/neon-saturating-add-sub.ll
  40. +138 −0 test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
  41. +138 −0 test/CodeGen/AArch64/neon-saturating-shift.ll
  42. +140 −0 test/CodeGen/AArch64/neon-shift.ll
  43. +4 −4 test/MC/AArch64/basic-a64-diagnostics.s
  44. +1 −1 test/MC/AArch64/basic-a64-instructions.s
  45. +78 −0 test/MC/AArch64/neon-aba-abd.s
  46. +35 −0 test/MC/AArch64/neon-add-pairwise.s
  47. +82 −0 test/MC/AArch64/neon-add-sub-instructions.s
  48. +60 −0 test/MC/AArch64/neon-bitwise-instructions.s
  49. +405 −0 test/MC/AArch64/neon-compare-instructions.s
  50. +1,207 −0 test/MC/AArch64/neon-diagnostics.s
  51. +41 −0 test/MC/AArch64/neon-facge-facgt.s
  52. +27 −0 test/MC/AArch64/neon-frsqrt-frecp.s
  53. +74 −0 test/MC/AArch64/neon-halving-add-sub.s
  54. +110 −0 test/MC/AArch64/neon-max-min-pairwise.s
  55. +110 −0 test/MC/AArch64/neon-max-min.s
  56. +61 −0 test/MC/AArch64/neon-mla-mls-instructions.s
  57. +207 −0 test/MC/AArch64/neon-mov.s
  58. +86 −0 test/MC/AArch64/neon-mul-div-instructions.s
  59. +39 −0 test/MC/AArch64/neon-rounding-halving-add.s
  60. +57 −0 test/MC/AArch64/neon-rounding-shift.s
  61. +133 −0 test/MC/AArch64/neon-saturating-add-sub.s
  62. +70 −0 test/MC/AArch64/neon-saturating-rounding-shift.s
  63. +69 −0 test/MC/AArch64/neon-saturating-shift.s
  64. +57 −0 test/MC/AArch64/neon-shift.s
  65. +28 −0 test/MC/AArch64/noneon-diagnostics.s
  66. +673 −0 test/MC/Disassembler/AArch64/neon-instructions.txt
View
1 include/llvm/IR/Intrinsics.td
@@ -494,6 +494,7 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
include "llvm/IR/IntrinsicsPowerPC.td"
include "llvm/IR/IntrinsicsX86.td"
include "llvm/IR/IntrinsicsARM.td"
+include "llvm/IR/IntrinsicsAArch64.td"
include "llvm/IR/IntrinsicsXCore.td"
include "llvm/IR/IntrinsicsHexagon.td"
include "llvm/IR/IntrinsicsNVVM.td"
View
41 include/llvm/IR/IntrinsicsAArch64.td
@@ -0,0 +1,41 @@
+//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the AArch64-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON)
+
+let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+
+// Vector Absolute Compare (Floating Point)
+def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+
+// Vector maxNum (Floating Point)
+def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
+
+// Vector minNum (Floating Point)
+def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
+
+// Vector Pairwise maxNum (Floating Point)
+def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
+
+// Vector Pairwise minNum (Floating Point)
+def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
+
+// Vector Multiply Extended (Floating Point)
+def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
+}
View
2 lib/Target/AArch64/AArch64CallingConv.td
@@ -61,7 +61,7 @@ def CC_A64_APCS : CallingConv<[
// Vectors and Floating-point types.
CCIfType<[v2i8], CCBitConvertToType<f16>>,
CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>,
View
522 lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -42,6 +42,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
+ const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
// SIMD compares set the entire lane's bits to 1
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -53,6 +55,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+ if (Subtarget->hasNEON()) {
+ // And the vectors
+ addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
+ addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
+ addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
+ addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
+ addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
+ }
+
computeRegisterProperties();
// We combine OR nodes for bitfield and NEON BSL operations.
@@ -251,6 +268,31 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionPointerRegister(AArch64::X0);
setExceptionSelectorRegister(AArch64::X1);
+
+ if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+ }
}
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@@ -777,7 +819,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
- default: return NULL;
+ case AArch64ISD::NEON_BSL:
+ return "AArch64ISD::NEON_BSL";
+ case AArch64ISD::NEON_MOVIMM:
+ return "AArch64ISD::NEON_MOVIMM";
+ case AArch64ISD::NEON_MVNIMM:
+ return "AArch64ISD::NEON_MVNIMM";
+ case AArch64ISD::NEON_FMOVIMM:
+ return "AArch64ISD::NEON_FMOVIMM";
+ case AArch64ISD::NEON_CMP:
+ return "AArch64ISD::NEON_CMP";
+ case AArch64ISD::NEON_CMPZ:
+ return "AArch64ISD::NEON_CMPZ";
+ case AArch64ISD::NEON_TST:
+ return "AArch64ISD::NEON_TST";
+ default:
+ return NULL;
}
}
@@ -2230,6 +2287,213 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(A64CC::NE, MVT::i32));
}
+static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ EVT VT = Op.getValueType();
+ bool Invert = false;
+ SDValue Op0, Op1;
+ unsigned Opcode;
+
+ if (LHS.getValueType().isInteger()) {
+
+ // Attempt to use Vector Integer Compare Mask Test instruction.
+ // TST = icmp ne (and (op0, op1), zero).
+ if (CC == ISD::SETNE) {
+ if (((LHS.getOpcode() == ISD::AND) &&
+ ISD::isBuildVectorAllZeros(RHS.getNode())) ||
+ ((RHS.getOpcode() == ISD::AND) &&
+ ISD::isBuildVectorAllZeros(LHS.getNode()))) {
+
+ SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
+ SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
+ SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
+ return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
+ }
+ }
+
+ // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
+ // Note: Compare against Zero does not support unsigned predicates.
+ if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
+ ISD::isBuildVectorAllZeros(LHS.getNode())) &&
+ !isUnsignedIntSetCC(CC)) {
+
+ // If LHS is the zero value, swap operands and CondCode.
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+ CC = getSetCCSwappedOperands(CC);
+ Op0 = RHS;
+ } else
+ Op0 = LHS;
+
+ // Ensure valid CondCode for Compare Mask against Zero instruction:
+ // EQ, GE, GT, LE, LT.
+ if (ISD::SETNE == CC) {
+ Invert = true;
+ CC = ISD::SETEQ;
+ }
+
+ // Using constant type to differentiate integer and FP compares with zero.
+ Op1 = DAG.getConstant(0, MVT::i32);
+ Opcode = AArch64ISD::NEON_CMPZ;
+
+ } else {
+ // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
+ // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
+ bool Swap = false;
+ switch (CC) {
+ default:
+ llvm_unreachable("Illegal integer comparison.");
+ case ISD::SETEQ:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ break;
+ case ISD::SETNE:
+ Invert = true;
+ CC = ISD::SETEQ;
+ break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Swap = true;
+ CC = getSetCCSwappedOperands(CC);
+ }
+
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ Opcode = AArch64ISD::NEON_CMP;
+ Op0 = LHS;
+ Op1 = RHS;
+ }
+
+ // Generate Compare Mask instr or Compare Mask against Zero instr.
+ SDValue NeonCmp =
+ DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
+
+ if (Invert)
+ NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
+
+ return NeonCmp;
+ }
+
+ // Now handle Floating Point cases.
+ // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
+ if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
+ ISD::isBuildVectorAllZeros(LHS.getNode())) {
+
+ // If LHS is the zero value, swap operands and CondCode.
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+ CC = getSetCCSwappedOperands(CC);
+ Op0 = RHS;
+ } else
+ Op0 = LHS;
+
+ // Using constant type to differentiate integer and FP compares with zero.
+ Op1 = DAG.getConstantFP(0, MVT::f32);
+ Opcode = AArch64ISD::NEON_CMPZ;
+ } else {
+ // Attempt to use Vector Floating Point Compare Mask instruction.
+ Op0 = LHS;
+ Op1 = RHS;
+ Opcode = AArch64ISD::NEON_CMP;
+ }
+
+ SDValue NeonCmpAlt;
+ // Some register compares have to be implemented with swapped CC and operands,
+ // e.g.: OLT implemented as OGT with swapped operands.
+ bool SwapIfRegArgs = false;
+
+ // Ensure valid CondCode for FP Compare Mask against Zero instruction:
+ // EQ, GE, GT, LE, LT.
+ // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
+ switch (CC) {
+ default:
+ llvm_unreachable("Illegal FP comparison");
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ:
+ CC = ISD::SETEQ;
+ break;
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ CC = ISD::SETLT;
+ SwapIfRegArgs = true;
+ break;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ CC = ISD::SETGT;
+ break;
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ CC = ISD::SETLE;
+ SwapIfRegArgs = true;
+ break;
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ CC = ISD::SETGE;
+ break;
+ case ISD::SETUGE:
+ Invert = true;
+ CC = ISD::SETLT;
+ SwapIfRegArgs = true;
+ break;
+ case ISD::SETULE:
+ Invert = true;
+ CC = ISD::SETGT;
+ break;
+ case ISD::SETUGT:
+ Invert = true;
+ CC = ISD::SETLE;
+ SwapIfRegArgs = true;
+ break;
+ case ISD::SETULT:
+ Invert = true;
+ CC = ISD::SETGE;
+ break;
+ case ISD::SETUEQ:
+ Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OGT |OLT).
+ NeonCmpAlt =
+ DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
+ CC = ISD::SETLT;
+ SwapIfRegArgs = true;
+ break;
+ case ISD::SETUO:
+ Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OGE | OLT).
+ NeonCmpAlt =
+ DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
+ CC = ISD::SETLT;
+ SwapIfRegArgs = true;
+ break;
+ }
+
+ if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
+ CC = getSetCCSwappedOperands(CC);
+ std::swap(Op0, Op1);
+ }
+
+ // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
+ SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
+
+ if (NeonCmpAlt.getNode())
+ NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
+
+ if (Invert)
+ NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
+
+ return NeonCmp;
+}
+
// (SETCC lhs, rhs, condcode)
SDValue
AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -2239,6 +2503,9 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorSETCC(Op, DAG);
+
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
// for the rest of the function (some i32 or i64 values).
@@ -2395,11 +2662,155 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
}
return SDValue();
}
+/// Check if the specified splat value corresponds to a valid vector constant
+/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
+/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
+/// values.
+static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ bool is128Bits, NeonModImmType type, EVT &VT,
+ unsigned &Imm, unsigned &OpCmode) {
+ switch (SplatBitSize) {
+ default:
+ llvm_unreachable("unexpected size for isNeonModifiedImm");
+ case 8: {
+ if (type != Neon_Mov_Imm)
+ return false;
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ // Neon movi per byte: Op=0, Cmode=1110.
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+ }
+ case 16: {
+ // Neon move inst per halfword
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn is 0x00nn LSL 0
+ // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
+ // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
+ // Op=x, Cmode=100y
+ Imm = SplatBits;
+ OpCmode = 0x8;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00 is 0x00nn LSL 8
+ // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
+ // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
+ // Op=x, Cmode=101x
+ Imm = SplatBits >> 8;
+ OpCmode = 0xa;
+ break;
+ }
+ // can't handle any other
+ return false;
+ }
+
+ case 32: {
+ // First the LSL variants (MSL is unusable by some interested instructions).
+
+ // Neon move instr per word, shift zeros
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn is 0x000000nn LSL 0
+ // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
+ // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
+ // Op=x, Cmode=000x
+ Imm = SplatBits;
+ OpCmode = 0;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00 is 0x000000nn LSL 8
+ // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
+ // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
+ // Op=x, Cmode=001x
+ Imm = SplatBits >> 8;
+ OpCmode = 0x2;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000 is 0x000000nn LSL 16
+ // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
+ // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
+ // Op=x, Cmode=010x
+ Imm = SplatBits >> 16;
+ OpCmode = 0x4;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000 is 0x000000nn LSL 24
+ // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
+ // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
+ // Op=x, Cmode=011x
+ Imm = SplatBits >> 24;
+ OpCmode = 0x6;
+ break;
+ }
+
+ // Now the MSL immediates.
+
+ // Neon move instr per word, shift ones
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff is 0x000000nn MSL 8
+ // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
+ // Op=x, Cmode=1100
+ Imm = SplatBits >> 8;
+ OpCmode = 0xc;
+ break;
+ }
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff is 0x000000nn MSL 16
+ // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
+ // Op=x, Cmode=1101
+ Imm = SplatBits >> 16;
+ OpCmode = 0xd;
+ break;
+ }
+ // can't handle any other
+ return false;
+ }
+
+ case 64: {
+ if (type != Neon_Mov_Imm)
+ return false;
+ // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
+ // movi Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return false;
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+ SplatBits = Val;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+ }
+
+ return true;
+}
+
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
@@ -2725,6 +3136,7 @@ static SDValue PerformORCombine(SDNode *N,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -2745,6 +3157,44 @@ static SDValue PerformORCombine(SDNode *N,
if (Res.getNode())
return Res;
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ // Attempt to use vector immediate-form BSL
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ !HasAnyUndefs && SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
+ SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ }
+ }
+ }
+
return SDValue();
}
@@ -2819,6 +3269,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue
+AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const AArch64Subtarget *ST) const {
+
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ // Note we favor lowering MOVI over MVNI.
+ // This has implications on the definition of patterns in TableGen to select
+ // BIC immediate instructions but not ORR immediate instructions.
+ // If this lowering order is changed, TableGen patterns for BIC immediate and
+ // ORR immediate instructions have to be updated.
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // First attempt to use vector immediate-form MOVI
+ EVT NeonMovVT;
+ unsigned Imm = 0;
+ unsigned OpCmode = 0;
+
+ if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, VT.is128BitVector(),
+ Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
+ SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
+ SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
+
+ if (ImmVal.getNode() && OpCmodeVal.getNode()) {
+ SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
+ ImmVal, OpCmodeVal);
+ return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
+ }
+ }
+
+ // Then attempt to use vector immediate-form MVNI
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
+ if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
+ Imm, OpCmode)) {
+ SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
+ SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
+ if (ImmVal.getNode() && OpCmodeVal.getNode()) {
+ SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
+ ImmVal, OpCmodeVal);
+ return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
+ }
+ }
+
+ // Attempt to use vector immediate-form FMOV
+ if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
+ (VT == MVT::v2f64 && SplatBitSize == 64)) {
+ APFloat RealVal(
+ SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
+ SplatBits);
+ uint32_t ImmVal;
+ if (A64Imms::isFPImm(RealVal, ImmVal)) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+ return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
View
33 lib/Target/AArch64/AArch64ISelLowering.h
@@ -111,7 +111,28 @@ namespace AArch64ISD {
// created using the small memory model style: i.e. adrp/add or
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
// get selected.
- WrapperSmall
+ WrapperSmall,
+
+ // Vector bitwise select
+ NEON_BSL,
+
+ // Vector move immediate
+ NEON_MOVIMM,
+
+ // Vector Move Inverted Immediate
+ NEON_MVNIMM,
+
+ // Vector FP move immediate
+ NEON_FMOVIMM,
+
+ // Vector compare
+ NEON_CMP,
+
+ // Vector compare zero
+ NEON_CMPZ,
+
+ // Vector compare bitwise test
+ NEON_TST
};
}
@@ -148,9 +169,11 @@ class AArch64TargetLowering : public TargetLowering {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc DL, SDValue &Chain) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const AArch64Subtarget *ST) const;
+ void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
+ SDValue &Chain) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
@@ -253,6 +276,10 @@ class AArch64TargetLowering : public TargetLowering {
return &getTargetMachine().getSubtarget<AArch64Subtarget>();
}
};
+enum NeonModImmType {
+ Neon_Mov_Imm,
+ Neon_Mvn_Imm
+};
} // namespace llvm
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
View
93 lib/Target/AArch64/AArch64InstrFormats.td
@@ -959,3 +959,96 @@ class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
let Inst{4-0} = op4;
}
+
+//===----------------------------------------------------------------------===//
+//
+// Neon Instruction Format Definitions.
+//
+
+let Predicates = [HasNEON] in {
+
+class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit> {
+}
+
+// Format AdvSIMD 3 vector registers with same vector type
+class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD 1 vector register with modified immediate
+class NeonI_1VModImm<bit q, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs,ins, asmstr, patterns, itin>
+{
+ bits<8> Imm;
+ bits<4> cmode;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = op;
+ let Inst{28-19} = 0b0111100000;
+ let Inst{15-12} = cmode;
+ let Inst{11} = 0b0; // o2
+ let Inst{10} = 1;
+ // Inherit Rd in 4-0
+ let Inst{18-16} = Imm{7-5}; // imm a:b:c
+ let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
+}
+
+// Format AdvSIMD 3 scalar registers with same type
+
+class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format AdvSIMD 2 vector registers miscellaneous
+class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+}
+
View
40 lib/Target/AArch64/AArch64InstrInfo.td
@@ -11,6 +11,17 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON", "neon">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto","crypto">;
+
+// Use fused MAC if more precision in FP computation is allowed.
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast)">;
include "AArch64InstrFormats.td"
//===----------------------------------------------------------------------===//
@@ -2173,6 +2184,29 @@ def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+// Extra patterns for when we're allowed to optimise separate multiplication and
+// addition.
+let Predicates = [UseFusedMAC] in {
+def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+ (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+ (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
+ (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
+ (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+
+def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+ (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+ (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
+ (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
+ (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+}
+
+
//===----------------------------------------------------------------------===//
// Floating-point <-> fixed-point conversion instructions
//===----------------------------------------------------------------------===//
@@ -5123,3 +5157,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
(i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "AArch64InstrNEON.td"
View
1,634 lib/Target/AArch64/AArch64InstrNEON.td
@@ -0,0 +1,1634 @@
+//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AArch64 NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
+// (outs Result), (ins Imm, OpCmode)
+def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+
+def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
+
+def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
+
+// (outs Result), (ins Imm)
+def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
+
+// (outs Result), (ins LHS, RHS, CondCode)
+def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
+
+// (outs Result), (ins LHS, 0/0.0 constant, CondCode)
+def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisVec<1>]>>;
+
+// (outs Result), (ins LHS, RHS)
+def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode8B,
+ SDPatternOperator opnode16B,
+ bit Commutable = 0>
+{
+ let isCommutable = Commutable in {
+ def _8B : NeonI_3VSame<0b0, u, size, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
+ NoItinerary>;
+
+ def _16B : NeonI_3VSame<0b1, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
+ NoItinerary>;
+ }
+
+}
+
+multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode,
+ bit Commutable = 0>
+{
+ let isCommutable = Commutable in {
+ def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
+ NoItinerary>;
+
+ def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
+ NoItinerary>;
+
+ def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
+ NoItinerary>;
+
+ def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
+ NoItinerary>;
+ }
+}
+multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode,
+ bit Commutable = 0>
+ : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
+{
+ let isCommutable = Commutable in {
+ def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
+ NoItinerary>;
+
+ def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
+ NoItinerary>;
+ }
+}
+
+multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode,
+ bit Commutable = 0>
+ : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
+{
+ let isCommutable = Commutable in {
+ def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
+ NoItinerary>;
+ }
+}
+
+// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
+// but Result types can be integer or floating point types.
+multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode2S,
+ SDPatternOperator opnode4S,
+ SDPatternOperator opnode2D,
+ ValueType ResTy2S, ValueType ResTy4S,
+ ValueType ResTy2D, bit Commutable = 0>
+{
+ let isCommutable = Commutable in {
+ def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
+ [(set (ResTy2S VPR64:$Rd),
+ (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
+ NoItinerary>;
+
+ def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
+ [(set (ResTy4S VPR128:$Rd),
+ (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
+ NoItinerary>;
+
+ def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
+ [(set (ResTy2D VPR128:$Rd),
+ (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
+ NoItinerary>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions
+//===----------------------------------------------------------------------===//
+
+// Vector Arithmetic Instructions
+
+// Vector Add (Integer and Floating-Point)
+
+defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
+defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Sub (Integer and Floating-Point)
+
+defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
+defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
+ v2f32, v4f32, v2f64, 0>;
+
+// Vector Multiply (Integer and Floating-Point)
+
+defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
+defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Multiply (Polynomial)
+
+defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
+ int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
+
+// Vector Multiply-accumulate and Multiply-subtract (Integer)
+
+// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
+// two operands constraints.
+class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
+ RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
+ SDPatternOperator opnode>
+ : NeonI_3VSame<q, u, size, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
+ asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
+ [(set (OpTy VPRC:$Rd),
+ (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (add node:$Ra, (mul node:$Rn, node:$Rm))>;
+
+def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
+
+
+def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
+ 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
+def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
+ 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
+def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
+ 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
+def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
+ 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
+def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
+ 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
+def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
+ 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
+
+def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
+ 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
+def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
+ 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
+def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
+ 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
+def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
+ 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
+def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
+ 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
+def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
+ 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
+
+// Vector Multiply-accumulate and Multiply-subtract (Floating Point)
+
+def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
+
+def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
+
+let Predicates = [HasNEON, UseFusedMAC] in {
+def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
+ 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
+def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
+ 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
+def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
+ 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
+
+def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
+ 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
+def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
+ 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
+def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
+ 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
+}
+
+// We're also allowed to match the fma instruction regardless of compile
+// options.
+def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
+ (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
+def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
+ (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
+def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
+ (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
+
+def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
+ (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
+def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
+ (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
+def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
+ (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
+
+// Vector Divide (Floating-Point)
+
+defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
+ v2f32, v4f32, v2f64, 0>;
+
+// Vector Bitwise Operations
+
+// Vector Bitwise AND
+
+defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
+
+// Vector Bitwise Exclusive OR
+
+defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
+
+// Vector Bitwise OR
+
+defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
+
+// ORR disassembled as MOV if Vn==Vm
+
+// Vector Move - register
+// Alias for ORR if Vn=Vm and it is the preferred syntax
+def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
+ (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
+def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
+ (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
+
+def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
+ ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
+ unsigned EltBits;
+ uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
+ OpCmodeConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+
+def Neon_not8B : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
+def Neon_not16B : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
+
+def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
+ (or node:$Rn, (Neon_not8B node:$Rm))>;
+
+def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
+ (or node:$Rn, (Neon_not16B node:$Rm))>;
+
+def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
+ (and node:$Rn, (Neon_not8B node:$Rm))>;
+
+def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
+ (and node:$Rn, (Neon_not16B node:$Rm))>;
+
+
+// Vector Bitwise OR NOT - register
+
+defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
+ Neon_orn8B, Neon_orn16B, 0>;
+
+// Vector Bitwise Bit Clear (AND NOT) - register
+
+defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
+ Neon_bic8B, Neon_bic16B, 0>;
+
+multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
+ SDPatternOperator opnode16B,
+ Instruction INST8B,
+ Instruction INST16B> {
+ def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$Rn, VPR128:$Rm)>;
+}
+
+// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
+defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
+defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
+defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
+defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
+defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
+
+// Vector Bitwise Select
+def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
+ 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
+
+def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
+ 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
+
+multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
+ Instruction INST8B,
+ Instruction INST16B> {
+ // Disassociate type from instruction definition
+ def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+
+ // Allow to match BSL instruction pattern with non-constant operand
+ def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
+ (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
+ (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
+ (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
+ (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
+ (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
+ (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
+ (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
+ (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
+ (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
+ (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
+ (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
+ (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
+ (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
+ (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
+ (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
+ (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
+
+ // Allow to match llvm.arm.* intrinsics.
+ def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
+ (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
+ (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
+ (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
+ (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
+ (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
+ (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
+ (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
+ (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
+ (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+ def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
+ (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
+ (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+}
+
+// Additional patterns for bitwise instruction BSL
+defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
+
+def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
+ (Neon_bsl node:$src, node:$Rn, node:$Rm),
+ [{ (void)N; return false; }]>;
+
+// Vector Bitwise Insert if True
+
+def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
+ 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
+def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
+ 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
+
+// Vector Bitwise Insert if False
+
+def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
+ 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
+def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
+ 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
+
+// Vector Absolute Difference and Accumulate (Signed, Unsigned)
+
+def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
+def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
+
+// Vector Absolute Difference and Accumulate (Unsigned)
+def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
+ 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
+def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
+ 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
+def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
+ 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
+def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
+ 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
+def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
+ 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
+def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
+ 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
+
+// Vector Absolute Difference and Accumulate (Signed)
+def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
+ 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
+def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
+ 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
+def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
+ 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
+def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
+ 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
+def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
+ 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
+def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
+ 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
+
+
+// Vector Absolute Difference (Signed, Unsigned)
+defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
+defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
+
+// Vector Absolute Difference (Floating Point)
+defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
+ int_arm_neon_vabds, int_arm_neon_vabds,
+ int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
+
+// Vector Reciprocal Step (Floating Point)
+defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
+ int_arm_neon_vrecps, int_arm_neon_vrecps,
+ int_arm_neon_vrecps,
+ v2f32, v4f32, v2f64, 0>;
+
+// Vector Reciprocal Square Root Step (Floating Point)
+defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
+ int_arm_neon_vrsqrts,
+ int_arm_neon_vrsqrts,
+ int_arm_neon_vrsqrts,
+ v2f32, v4f32, v2f64, 0>;
+
+// Vector Comparisons
+
+def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
+ (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
+def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
+ (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
+def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
+ (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
+def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
+ (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
+def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
+ (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
+
+// NeonI_compare_aliases class: swaps register operands to implement
+// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
+class NeonI_compare_aliases<string asmop, string asmlane,
+ Instruction inst, RegisterClass VPRC>
+ : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
+ ", $Rm" # asmlane,
+ (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
+
+// Vector Comparisons (Integer)
+
+// Vector Compare Mask Equal (Integer)
+let isCommutable =1 in {
+defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
+}
+
+// Vector Compare Mask Higher or Same (Unsigned Integer)
+defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
+
+// Vector Compare Mask Greater Than or Equal (Integer)
+defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
+
+// Vector Compare Mask Higher (Unsigned Integer)
+defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
+
+// Vector Compare Mask Greater Than (Integer)
+defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
+
+// Vector Compare Mask Bitwise Test (Integer)
+defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
+
+// Vector Compare Mask Less or Same (Unsigned Integer)
+// CMLS is alias for CMHS with operands reversed.
+def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
+def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
+def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
+def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
+def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
+def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
+def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
+
+// Vector Compare Mask Less Than or Equal (Integer)
+// CMLE is alias for CMGE with operands reversed.
+def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
+def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
+def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
+def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
+def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
+def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
+def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
+
+// Vector Compare Mask Lower (Unsigned Integer)
+// CMLO is alias for CMHI with operands reversed.
+def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
+def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
+def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
+def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
+def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
+def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
+def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
+
+// Vector Compare Mask Less Than (Integer)
+// CMLT is alias for CMGT with operands reversed.
+def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
+def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
+def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
+def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
+def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
+def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
+def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
+
+
+def neon_uimm0_asmoperand : AsmOperandClass
+{
+ let Name = "UImm0";
+ let PredicateMethod = "isUImm<0>";
+ let RenderMethod = "addImmOperands";
+}
+
+def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
+ let ParserMatchClass = neon_uimm0_asmoperand;
+ let PrintMethod = "printNeonUImm0Operand";
+
+}
+
+multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
+{
+ def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.8b, $Rn.8b, $Imm",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.16b, $Rn.16b, $Imm",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.4h, $Rn.4h, $Imm",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.8h, $Rn.8h, $Imm",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.2s, $Rn.2s, $Imm",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Imm",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+
+ def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
+ asmop # "\t$Rd.2d, $Rn.2d, $Imm",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
+ NoItinerary>;
+}
+
+// Vector Compare Mask Equal to Zero (Integer)
+defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
+
+// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
+defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
+
+// Vector Compare Mask Greater Than Zero (Signed Integer)
+defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
+
+// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
+defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
+
+// Vector Compare Mask Less Than Zero (Signed Integer)
+defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
+
+// Vector Comparisons (Floating Point)
+
+// Vector Compare Mask Equal (Floating Point)
+let isCommutable =1 in {
+defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
+ Neon_cmeq, Neon_cmeq,
+ v2i32, v4i32, v2i64, 0>;
+}
+
+// Vector Compare Mask Greater Than Or Equal (Floating Point)
+defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
+ Neon_cmge, Neon_cmge,
+ v2i32, v4i32, v2i64, 0>;
+
+// Vector Compare Mask Greater Than (Floating Point)
+defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
+ Neon_cmgt, Neon_cmgt,
+ v2i32, v4i32, v2i64, 0>;
+
+// Vector Compare Mask Less Than Or Equal (Floating Point)
+// FCMLE is alias for FCMGE with operands reversed.
+def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
+def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
+def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
+
+// Vector Compare Mask Less Than (Floating Point)
+// FCMLT is alias for FCMGT with operands reversed.
+def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
+def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
+def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
+
+
+multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, CondCode CC>
+{
+ def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
+ asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
+ NoItinerary>;
+
+ def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
+ asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
+ NoItinerary>;
+
+ def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
+ asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
+ NoItinerary>;
+}
+
+// Vector Compare Mask Equal to Zero (Floating Point)
+defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
+
+// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
+defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
+
+// Vector Compare Mask Greater Than Zero (Floating Point)
+defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
+
+// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
+defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
+
+// Vector Compare Mask Less Than Zero (Floating Point)
+defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
+
+// Vector Absolute Comparisons (Floating Point)
+
+// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
+defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
+ int_arm_neon_vacged, int_arm_neon_vacgeq,
+ int_aarch64_neon_vacgeq,
+ v2i32, v4i32, v2i64, 0>;
+
+// Vector Absolute Compare Mask Greater Than (Floating Point)
+defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
+ int_arm_neon_vacgtd, int_arm_neon_vacgtq,
+ int_aarch64_neon_vacgtq,
+ v2i32, v4i32, v2i64, 0>;
+
+// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
+// FACLE is alias for FACGE with operands reversed.
+def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
+def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
+def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
+
+// Vector Absolute Compare Mask Less Than (Floating Point)
+// FACLT is alias for FACGT with operands reversed.
+def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
+def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
+def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
+
+// Vector halving add (Integer Signed, Unsigned)
+defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
+ int_arm_neon_vhadds, 1>;
+defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
+ int_arm_neon_vhaddu, 1>;
+
+// Vector halving sub (Integer Signed, Unsigned)
+defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
+ int_arm_neon_vhsubs, 0>;
+defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
+ int_arm_neon_vhsubu, 0>;
+
+// Vector rouding halving add (Integer Signed, Unsigned)
+defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
+ int_arm_neon_vrhadds, 1>;
+defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
+ int_arm_neon_vrhaddu, 1>;
+
+// Vector Saturating add (Integer Signed, Unsigned)
+defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
+ int_arm_neon_vqadds, 1>;
+defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
+ int_arm_neon_vqaddu, 1>;
+
+// Vector Saturating sub (Integer Signed, Unsigned)
+defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
+ int_arm_neon_vqsubs, 1>;
+defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
+ int_arm_neon_vqsubu, 1>;
+
+// Vector Shift Left (Signed and Unsigned Integer)
+defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
+ int_arm_neon_vshifts, 1>;
+defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
+ int_arm_neon_vshiftu, 1>;
+
+// Vector Saturating Shift Left (Signed and Unsigned Integer)
+defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
+ int_arm_neon_vqshifts, 1>;
+defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
+ int_arm_neon_vqshiftu, 1>;
+
+// Vector Rouding Shift Left (Signed and Unsigned Integer)
+defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
+ int_arm_neon_vrshifts, 1>;
+defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
+ int_arm_neon_vrshiftu, 1>;
+
+// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
+defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
+ int_arm_neon_vqrshifts, 1>;
+defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
+ int_arm_neon_vqrshiftu, 1>;
+
+// Vector Maximum (Signed and Unsigned Integer)
+defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
+defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
+
+// Vector Minimum (Signed and Unsigned Integer)
+defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
+defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
+
+// Vector Maximum (Floating Point)
+defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
+ int_arm_neon_vmaxs, int_arm_neon_vmaxs,
+ int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
+
+// Vector Minimum (Floating Point)
+defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
+ int_arm_neon_vmins, int_arm_neon_vmins,
+ int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
+
+// Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
+defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
+ int_aarch64_neon_vmaxnm,
+ int_aarch64_neon_vmaxnm,
+ int_aarch64_neon_vmaxnm,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector minNum (Floating Point) - prefer a number over a quiet NaN)
+defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
+ int_aarch64_neon_vminnm,
+ int_aarch64_neon_vminnm,
+ int_aarch64_neon_vminnm,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Maximum Pairwise (Signed and Unsigned Integer)
+defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
+defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
+
+// Vector Minimum Pairwise (Signed and Unsigned Integer)
+defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
+defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
+
+// Vector Maximum Pairwise (Floating Point)
+defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
+ int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
+ int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
+
+// Vector Minimum Pairwise (Floating Point)
+defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
+ int_arm_neon_vpmins, int_arm_neon_vpmins,
+ int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
+
+// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
+defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
+ int_aarch64_neon_vpmaxnm,
+ int_aarch64_neon_vpmaxnm,
+ int_aarch64_neon_vpmaxnm,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
+defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
+ int_aarch64_neon_vpminnm,
+ int_aarch64_neon_vpminnm,
+ int_aarch64_neon_vpminnm,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Addition Pairwise (Integer)
+defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
+
+// Vector Addition Pairwise (Floating Point)
+defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
+ int_arm_neon_vpadd,
+ int_arm_neon_vpadd,
+ int_arm_neon_vpadd,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Saturating Doubling Multiply High
+defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
+ int_arm_neon_vqdmulh, 1>;
+
+// Vector Saturating Rouding Doubling Multiply High
+defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
+ int_arm_neon_vqrdmulh, 1>;
+
+// Vector Multiply Extended (Floating Point)
+defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
+ int_aarch64_neon_vmulx,
+ int_aarch64_neon_vmulx,
+ int_aarch64_neon_vmulx,
+ v2f32, v4f32, v2f64, 1>;
+
+// Vector Immediate Instructions
+
+multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
+{
+ def _asmoperand : AsmOperandClass
+ {
+ let Name = "NeonMovImmShift" # PREFIX;
+ let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
+ let PredicateMethod = "isNeonMovImmShift" # PREFIX;
+ }
+}
+
+// Definition of vector immediates shift operands
+
+// The selectable use-cases extract the shift operation
+// information from the OpCmode fields encoded in the immediate.
+def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
+ uint64_t OpCmode = N->getZExtValue();
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ unsigned HasShift =
+ A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
+ if (!HasShift) return SDValue();
+ return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
+}]>;
+
+// Vector immediates shift operands which accept LSL and MSL
+// shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
+// or 0, 8 (LSLH) or 8, 16 (MSL).
+defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
+defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
+// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
+defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
+
+multiclass neon_mov_imm_shift_operands<string PREFIX,
+ string HALF, string ISHALF, code pred>
+{
+ def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
+ {
+ let PrintMethod =
+ "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
+ let DecoderMethod =
+ "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
+ }
+}
+
+defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ unsigned HasShift =
+ A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
+ return (HasShift && !ShiftOnesIn);
+}]>;
+
+defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ unsigned HasShift =
+ A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
+ return (HasShift && ShiftOnesIn);
+}]>;
+
+defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ unsigned HasShift =
+ A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
+ return (HasShift && !ShiftOnesIn);
+}]>;
+
+def neon_uimm8_asmoperand : AsmOperandClass
+{
+ let Name = "UImm8";
+ let PredicateMethod = "isUImm<8>";
+ let RenderMethod = "addImmOperands";
+}
+
+def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm8_asmoperand;
+ let PrintMethod = "printNeonUImm8Operand";
+}
+
+def neon_uimm64_mask_asmoperand : AsmOperandClass
+{
+ let Name = "NeonUImm64Mask";
+ let PredicateMethod = "isNeonUImm64Mask";
+ let RenderMethod = "addNeonUImm64MaskOperands";
+}
+
+// MCOperand for 64-bit bytemask with each byte having only the
+// value 0x00 and 0xff is encoded as an unsigned 8-bit value
+def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm64_mask_asmoperand;
+ let PrintMethod = "printNeonUImm64MaskOperand";
+}
+
+multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
+ SDPatternOperator opnode>
+{
+ // shift zeros, per word
+ def _2S : NeonI_1VModImm<0b0, op,
+ (outs VPR64:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_LSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (opnode (timm:$Imm),
+ (neon_mov_imm_LSL_operand:$Simm))))],
+ NoItinerary> {
+ bits<2> Simm;
+ let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
+ }
+
+ def _4S : NeonI_1VModImm<0b1, op,
+ (outs VPR128:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_LSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (timm:$Imm),
+ (neon_mov_imm_LSL_operand:$Simm))))],
+ NoItinerary> {
+ bits<2> Simm;
+ let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
+ }
+
+ // shift zeros, per halfword
+ def _4H : NeonI_1VModImm<0b0, op,
+ (outs VPR64:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm),
+ !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (opnode (timm:$Imm),
+ (neon_mov_imm_LSLH_operand:$Simm))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b0, Simm, 0b0};
+ }
+
+ def _8H : NeonI_1VModImm<0b1, op,
+ (outs VPR128:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm),
+ !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (opnode (timm:$Imm),
+ (neon_mov_imm_LSLH_operand:$Simm))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b0, Simm, 0b0};
+ }
+}
+
+multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
+ SDPatternOperator opnode,
+ SDPatternOperator neonopnode>
+{
+ let Constraints = "$src = $Rd" in {
+ // shift zeros, per word
+ def _2S : NeonI_1VModImm<0b0, op,
+ (outs VPR64:$Rd),
+ (ins VPR64:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (opnode (v2i32 VPR64:$src),
+ (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))))],
+ NoItinerary> {
+ bits<2> Simm;
+ let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
+ }
+
+ def _4S : NeonI_1VModImm<0b1, op,
+ (outs VPR128:$Rd),
+ (ins VPR128:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))))],
+ NoItinerary> {
+ bits<2> Simm;
+ let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
+ }
+
+ // shift zeros, per halfword
+ def _4H : NeonI_1VModImm<0b0, op,
+ (outs VPR64:$Rd),
+ (ins VPR64:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm),
+ !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (opnode (v4i16 VPR64:$src),
+ (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b0, Simm, 0b1};
+ }
+
+ def _8H : NeonI_1VModImm<0b1, op,
+ (outs VPR128:$Rd),
+ (ins VPR128:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm),
+ !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (opnode (v8i16 VPR128:$src),
+ (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b0, Simm, 0b1};
+ }
+ }
+}
+
+multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
+ SDPatternOperator opnode>
+{
+ // shift ones, per word
+ def _2S : NeonI_1VModImm<0b0, op,
+ (outs VPR64:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_MSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (opnode (timm:$Imm),
+ (neon_mov_imm_MSL_operand:$Simm))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b1, 0b0, Simm};
+ }
+
+ def _4S : NeonI_1VModImm<0b1, op,
+ (outs VPR128:$Rd),
+ (ins neon_uimm8:$Imm,
+ neon_mov_imm_MSL_operand:$Simm),
+ !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (timm:$Imm),
+ (neon_mov_imm_MSL_operand:$Simm))))],
+ NoItinerary> {
+ bit Simm;
+ let cmode = {0b1, 0b1, 0b0, Simm};
+ }
+}
+
+// Vector Move Immediate Shifted
+let isReMaterializable = 1 in {
+defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
+}
+
+// Vector Move Inverted Immediate Shifted
+let isReMaterializable = 1 in {
+defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
+}
+
+// Vector Bitwise Bit Clear (AND NOT) - immediate
+let isReMaterializable = 1 in {
+defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
+ and, Neon_mvni>;
+}
+
+// Vector Bitwise OR - immedidate
+
+let isReMaterializable = 1 in {
+defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
+ or, Neon_movi>;
+}
+
+// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
+// LowerBUILD_VECTOR favors lowering MOVI over MVNI.
+// BIC immediate instructions selection requires additional patterns to
+// transform Neon_movi operands into BIC immediate operands
+
+def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
+ uint64_t OpCmode = N->getZExtValue();
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
+ // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
+ // Transform encoded shift amount 0 to 1 and 1 to 0.
+ return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
+}]>;
+
+def neon_mov_imm_LSLH_transform_operand
+ : ImmLeaf<i32, [{
+ unsigned ShiftImm;
+ unsigned ShiftOnesIn;
+ unsigned HasShift =
+ A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
+ return (HasShift && !ShiftOnesIn); }],
+ neon_mov_imm_LSLH_transform_XFORM>;
+
+// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
+// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
+def : Pat<(v4i16 (and VPR64:$src,
+ (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
+ (BICvi_lsl_4H VPR64:$src, 0,
+ neon_mov_imm_LSLH_transform_operand:$Simm)>;
+
+// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
+// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
+def : Pat<(v8i16 (and VPR128:$src,
+ (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
+ (BICvi_lsl_8H VPR128:$src, 0,
+ neon_mov_imm_LSLH_transform_operand:$Simm)>;
+
+
+multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
+ SDPatternOperator neonopnode,
+ Instruction INST4H,
+ Instruction INST8H> {
+ def : Pat<(v8i8 (opnode VPR64:$src,
+ (bitconvert(v4i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm))))),
+ (INST4H VPR64:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm)>;
+ def : Pat<(v1i64 (opnode VPR64:$src,
+ (bitconvert(v4i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm))))),
+ (INST4H VPR64:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm)>;
+
+ def : Pat<(v16i8 (opnode VPR128:$src,
+ (bitconvert(v8i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm))))),
+ (INST8H VPR128:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm)>;
+ def : Pat<(v4i32 (opnode VPR128:$src,
+ (bitconvert(v8i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm))))),
+ (INST8H VPR128:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm)>;
+ def : Pat<(v2i64 (opnode VPR128:$src,
+ (bitconvert(v8i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm))))),
+ (INST8H VPR128:$src, neon_uimm8:$Imm,
+ neon_mov_imm_LSLH_operand:$Simm)>;
+}
+
+// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
+defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
+
+// Additional patterns for Vector Bitwise OR - immedidate
+defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
+
+
+// Vector Move Immediate Masked
+let isReMaterializable = 1 in {
+defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
+}
+
+// Vector Move Inverted Immediate Masked
+let isReMaterializable = 1 in {
+defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
+}
+
+class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
+ Instruction inst, RegisterClass VPRC>
+ : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
+ (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
+
+// Aliases for Vector Move Immediate Shifted
+def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
+def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
+
+// Aliases for Vector Move Inverted Immediate Shifted
+def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
+def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
+
+// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
+def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
+def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
+
+// Aliases for Vector Bitwise OR - immedidate
+def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
+def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
+def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
+
+// Vector Move Immediate - per byte
+let isReMaterializable = 1 in {
+def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
+ (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
+ "movi\t$Rd.8b, $Imm",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
+ NoItinerary> {
+ let cmode = 0b1110;
+}
+
+def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
+ (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
+ "movi\t$Rd.16b, $Imm",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
+ NoItinerary> {
+ let cmode = 0b1110;
+}
+}
+
+// Vector Move Immediate - bytemask, per double word
+let isReMaterializable = 1 in {
+def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
+ (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
+ "movi\t $Rd.2d, $Imm",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
+ NoItinerary> {
+ let cmode = 0b1110;
+}
+}
+
+// Vector Move Immediate - bytemask, one doubleword
+
+let isReMaterializable = 1 in {
+def MOVIdi : NeonI_1VModImm<0b0, 0b1,
+ (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
+ "movi\t $Rd, $Imm",
+ [(set (f64 FPR64:$Rd),
+ (f64 (bitconvert
+ (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
+ NoItinerary> {
+ let cmode = 0b1110;
+}
+}
+
+// Vector Floating Point Move Immediate
+
+class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
+ Operand immOpType, bit q, bit op>
+ : NeonI_1VModImm<q, op,
+ (outs VPRC:$Rd), (ins immOpType:$Imm),
+ "fmov\t$Rd" # asmlane # ", $Imm",
+ [(set (OpTy VPRC:$Rd),
+ (OpTy (Neon_fmovi (timm:$Imm))))],
+ NoItinerary> {
+ let cmode = 0b1111;
+ }
+
+let isReMaterializable = 1 in {
+def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
+def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
+def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
+}
+
+// Scalar Arithmetic
+
+class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar3Same<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
+ !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
+ string asmop, bit Commutable = 0>
+{
+ let isCommutable = Commutable in {
+ def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
+ !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
+ !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def sss : NeonI_Scalar3Same<u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
+ !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ }
+}
+
+class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
+ (SUBREG_TO_REG (i64 0),
+ (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
+ (EXTRACT_SUBREG VPR64:$Rm, sub_64)),
+ sub_64)>;
+
+
+// Scalar Integer Add
+let isCommutable = 1 in {
+def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
+}
+
+// Scalar Integer Sub
+def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
+
+// Pattern for Scalar Integer Add and Sub with D register
+def : Neon_Scalar_D_size_patterns<add, ADDddd>;
+def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
+
+// Scalar Integer Saturating Add (Signed, Unsigned)
+defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
+defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
+
+// Scalar Integer Saturating Sub (Signed, Unsigned)
+defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
+defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
+
+// Patterns for Scalar Integer Saturating Add, Sub with D register only
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
+
+// Scalar Integer Shift Left (Signed, Unsigned)
+def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
+def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
+
+// Scalar Integer Saturating Shift Left (Signed, Unsigned)
+defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
+defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
+
+// Scalar Integer Rouding Shift Left (Signed, Unsigned)
+def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
+def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
+
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
+defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
+
+// Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
+// Rounding Shift Left, Rounding Saturating Shift Left with D register only
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
+def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
+def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
+def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// 64-bit vector bitcasts...
+
+def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
+
+def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
+
+// ..and 128-bit vector bitcasts...
+
+def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;