Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

AArch64: add initial NEON support

Patch by Ana Pazos.

- Completed implementation of instruction formats:
AdvSIMD three same
AdvSIMD modified immediate
AdvSIMD scalar pairwise

- Completed implementation of instruction classes
(some of the instructions in these classes
belong to yet unfinished instruction formats):
Vector Arithmetic
Vector Immediate
Vector Pairwise Arithmetic

- Initial implementation of instruction formats:
AdvSIMD scalar two-reg misc
AdvSIMD scalar three same

- Intial implementation of instruction class:
Scalar Arithmetic

- Initial clang changes to support arm v8 intrinsics.
Note: no clang changes for scalar intrinsics function name mangling yet.

- Comprehensive test cases for added instructions
To verify auto codegen, encoding, decoding, diagnosis, intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187567 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
commit 87773c318fcee853fb34a80a10c4347d523bdafb 1 parent 691aa09
Tim Northover authored

Showing 66 changed files with 12,503 additions and 41 deletions. Show diff stats Hide diff stats

  1. +1 0  include/llvm/IR/Intrinsics.td
  2. +41 0 include/llvm/IR/IntrinsicsAArch64.td
  3. +1 1  lib/Target/AArch64/AArch64CallingConv.td
  4. +521 1 lib/Target/AArch64/AArch64ISelLowering.cpp
  5. +30 3 lib/Target/AArch64/AArch64ISelLowering.h
  6. +93 0 lib/Target/AArch64/AArch64InstrFormats.td
  7. +40 0 lib/Target/AArch64/AArch64InstrInfo.td
  8. +1,634 0 lib/Target/AArch64/AArch64InstrNEON.td
  9. +5 0 lib/Target/AArch64/AArch64MCInstLower.cpp
  10. +1 1  lib/Target/AArch64/AArch64RegisterInfo.td
  11. +2 4 lib/Target/AArch64/AArch64Subtarget.cpp
  12. +3 0  lib/Target/AArch64/AArch64Subtarget.h
  13. +120 20 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
  14. +39 1 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
  15. +81 0 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
  16. +7 2 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
  17. +1 1  lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
  18. +66 0 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
  19. +5 1 lib/Target/AArch64/Utils/AArch64BaseInfo.h
  20. +21 0 test/CodeGen/AArch64/complex-copy-noneon.ll
  21. +21 1 test/CodeGen/AArch64/inline-asm-constraints.ll
  22. +226 0 test/CodeGen/AArch64/neon-aba-abd.ll
  23. +92 0 test/CodeGen/AArch64/neon-add-pairwise.ll
  24. +132 0 test/CodeGen/AArch64/neon-add-sub.ll
  25. +574 0 test/CodeGen/AArch64/neon-bitcast.ll
  26. +594 0 test/CodeGen/AArch64/neon-bitwise-instructions.ll
  27. +1,982 0 test/CodeGen/AArch64/neon-compare-instructions.ll
  28. +56 0 test/CodeGen/AArch64/neon-facge-facgt.ll
  29. +112 0 test/CodeGen/AArch64/neon-fma.ll
  30. +54 0 test/CodeGen/AArch64/neon-frsqrt-frecp.ll
  31. +207 0 test/CodeGen/AArch64/neon-halving-add-sub.ll
  32. +310 0 test/CodeGen/AArch64/neon-max-min-pairwise.ll
  33. +310 0 test/CodeGen/AArch64/neon-max-min.ll
  34. +88 0 test/CodeGen/AArch64/neon-mla-mls.ll
  35. +205 0 test/CodeGen/AArch64/neon-mov.ll
  36. +181 0 test/CodeGen/AArch64/neon-mul-div.ll
  37. +105 0 test/CodeGen/AArch64/neon-rounding-halving-add.ll
  38. +138 0 test/CodeGen/AArch64/neon-rounding-shift.ll
  39. +274 0 test/CodeGen/AArch64/neon-saturating-add-sub.ll
  40. +138 0 test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
  41. +138 0 test/CodeGen/AArch64/neon-saturating-shift.ll
  42. +140 0 test/CodeGen/AArch64/neon-shift.ll
  43. +4 4 test/MC/AArch64/basic-a64-diagnostics.s
  44. +1 1  test/MC/AArch64/basic-a64-instructions.s
  45. +78 0 test/MC/AArch64/neon-aba-abd.s
  46. +35 0 test/MC/AArch64/neon-add-pairwise.s
  47. +82 0 test/MC/AArch64/neon-add-sub-instructions.s
  48. +60 0 test/MC/AArch64/neon-bitwise-instructions.s
  49. +405 0 test/MC/AArch64/neon-compare-instructions.s
  50. +1,207 0 test/MC/AArch64/neon-diagnostics.s
  51. +41 0 test/MC/AArch64/neon-facge-facgt.s
  52. +27 0 test/MC/AArch64/neon-frsqrt-frecp.s
  53. +74 0 test/MC/AArch64/neon-halving-add-sub.s
  54. +110 0 test/MC/AArch64/neon-max-min-pairwise.s
  55. +110 0 test/MC/AArch64/neon-max-min.s
  56. +61 0 test/MC/AArch64/neon-mla-mls-instructions.s
  57. +207 0 test/MC/AArch64/neon-mov.s
  58. +86 0 test/MC/AArch64/neon-mul-div-instructions.s
  59. +39 0 test/MC/AArch64/neon-rounding-halving-add.s
  60. +57 0 test/MC/AArch64/neon-rounding-shift.s
  61. +133 0 test/MC/AArch64/neon-saturating-add-sub.s
  62. +70 0 test/MC/AArch64/neon-saturating-rounding-shift.s
  63. +69 0 test/MC/AArch64/neon-saturating-shift.s
  64. +57 0 test/MC/AArch64/neon-shift.s
  65. +28 0 test/MC/AArch64/noneon-diagnostics.s
  66. +673 0 test/MC/Disassembler/AArch64/neon-instructions.txt
1  include/llvm/IR/Intrinsics.td
@@ -494,6 +494,7 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
494 494 include "llvm/IR/IntrinsicsPowerPC.td"
495 495 include "llvm/IR/IntrinsicsX86.td"
496 496 include "llvm/IR/IntrinsicsARM.td"
  497 +include "llvm/IR/IntrinsicsAArch64.td"
497 498 include "llvm/IR/IntrinsicsXCore.td"
498 499 include "llvm/IR/IntrinsicsHexagon.td"
499 500 include "llvm/IR/IntrinsicsNVVM.td"
41 include/llvm/IR/IntrinsicsAArch64.td
... ... @@ -0,0 +1,41 @@
  1 +//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
  2 +//
  3 +// The LLVM Compiler Infrastructure
  4 +//
  5 +// This file is distributed under the University of Illinois Open Source
  6 +// License. See LICENSE.TXT for details.
  7 +//
  8 +//===----------------------------------------------------------------------===//
  9 +//
  10 +// This file defines all of the AArch64-specific intrinsics.
  11 +//
  12 +//===----------------------------------------------------------------------===//
  13 +
  14 +//===----------------------------------------------------------------------===//
  15 +// Advanced SIMD (NEON)
  16 +
  17 +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
  18 +
  19 +// Vector Absolute Compare (Floating Point)
  20 +def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
  21 + [llvm_v2f64_ty, llvm_v2f64_ty],
  22 + [IntrNoMem]>;
  23 +def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
  24 + [llvm_v2f64_ty, llvm_v2f64_ty],
  25 + [IntrNoMem]>;
  26 +
  27 +// Vector maxNum (Floating Point)
  28 +def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
  29 +
  30 +// Vector minNum (Floating Point)
  31 +def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
  32 +
  33 +// Vector Pairwise maxNum (Floating Point)
  34 +def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
  35 +
  36 +// Vector Pairwise minNum (Floating Point)
  37 +def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
  38 +
  39 +// Vector Multiply Extended (Floating Point)
  40 +def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
  41 +}
2  lib/Target/AArch64/AArch64CallingConv.td
@@ -61,7 +61,7 @@ def CC_A64_APCS : CallingConv<[
61 61 // Vectors and Floating-point types.
62 62 CCIfType<[v2i8], CCBitConvertToType<f16>>,
63 63 CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
64   - CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
  64 + CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
65 65 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
66 66 CCBitConvertToType<f128>>,
67 67
522 lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -42,6 +42,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
42 42 AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
43 43 : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
44 44
  45 + const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
  46 +
45 47 // SIMD compares set the entire lane's bits to 1
46 48 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
47 49
@@ -53,6 +55,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
53 55 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
54 56 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
55 57
  58 + if (Subtarget->hasNEON()) {
  59 + // And the vectors
  60 + addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
  61 + addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
  62 + addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
  63 + addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
  64 + addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
  65 + addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
  66 + addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
  67 + addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
  68 + addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
  69 + addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
  70 + addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
  71 + }
  72 +
56 73 computeRegisterProperties();
57 74
58 75 // We combine OR nodes for bitfield and NEON BSL operations.
@@ -251,6 +268,31 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
251 268
252 269 setExceptionPointerRegister(AArch64::X0);
253 270 setExceptionSelectorRegister(AArch64::X1);
  271 +
  272 + if (Subtarget->hasNEON()) {
  273 + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
  274 + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
  275 + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
  276 + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
  277 + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
  278 + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
  279 + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
  280 + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
  281 + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
  282 + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
  283 + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
  284 +
  285 + setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
  286 + setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
  287 + setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
  288 + setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
  289 + setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
  290 + setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
  291 + setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
  292 + setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
  293 + setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
  294 + setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
  295 + }
254 296 }
255 297
256 298 EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@@ -777,7 +819,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
777 819 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
778 820 case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
779 821
780   - default: return NULL;
  822 + case AArch64ISD::NEON_BSL:
  823 + return "AArch64ISD::NEON_BSL";
  824 + case AArch64ISD::NEON_MOVIMM:
  825 + return "AArch64ISD::NEON_MOVIMM";
  826 + case AArch64ISD::NEON_MVNIMM:
  827 + return "AArch64ISD::NEON_MVNIMM";
  828 + case AArch64ISD::NEON_FMOVIMM:
  829 + return "AArch64ISD::NEON_FMOVIMM";
  830 + case AArch64ISD::NEON_CMP:
  831 + return "AArch64ISD::NEON_CMP";
  832 + case AArch64ISD::NEON_CMPZ:
  833 + return "AArch64ISD::NEON_CMPZ";
  834 + case AArch64ISD::NEON_TST:
  835 + return "AArch64ISD::NEON_TST";
  836 + default:
  837 + return NULL;
781 838 }
782 839 }
783 840
@@ -2230,6 +2287,213 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2230 2287 DAG.getConstant(A64CC::NE, MVT::i32));
2231 2288 }
2232 2289
  2290 +static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
  2291 + SDLoc DL(Op);
  2292 + SDValue LHS = Op.getOperand(0);
  2293 + SDValue RHS = Op.getOperand(1);
  2294 + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  2295 + EVT VT = Op.getValueType();
  2296 + bool Invert = false;
  2297 + SDValue Op0, Op1;
  2298 + unsigned Opcode;
  2299 +
  2300 + if (LHS.getValueType().isInteger()) {
  2301 +
  2302 + // Attempt to use Vector Integer Compare Mask Test instruction.
  2303 + // TST = icmp ne (and (op0, op1), zero).
  2304 + if (CC == ISD::SETNE) {
  2305 + if (((LHS.getOpcode() == ISD::AND) &&
  2306 + ISD::isBuildVectorAllZeros(RHS.getNode())) ||
  2307 + ((RHS.getOpcode() == ISD::AND) &&
  2308 + ISD::isBuildVectorAllZeros(LHS.getNode()))) {
  2309 +
  2310 + SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
  2311 + SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
  2312 + SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
  2313 + return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
  2314 + }
  2315 + }
  2316 +
  2317 + // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
  2318 + // Note: Compare against Zero does not support unsigned predicates.
  2319 + if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
  2320 + ISD::isBuildVectorAllZeros(LHS.getNode())) &&
  2321 + !isUnsignedIntSetCC(CC)) {
  2322 +
  2323 + // If LHS is the zero value, swap operands and CondCode.
  2324 + if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
  2325 + CC = getSetCCSwappedOperands(CC);
  2326 + Op0 = RHS;
  2327 + } else
  2328 + Op0 = LHS;
  2329 +
  2330 + // Ensure valid CondCode for Compare Mask against Zero instruction:
  2331 + // EQ, GE, GT, LE, LT.
  2332 + if (ISD::SETNE == CC) {
  2333 + Invert = true;
  2334 + CC = ISD::SETEQ;
  2335 + }
  2336 +
  2337 + // Using constant type to differentiate integer and FP compares with zero.
  2338 + Op1 = DAG.getConstant(0, MVT::i32);
  2339 + Opcode = AArch64ISD::NEON_CMPZ;
  2340 +
  2341 + } else {
  2342 + // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
  2343 + // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
  2344 + bool Swap = false;
  2345 + switch (CC) {
  2346 + default:
  2347 + llvm_unreachable("Illegal integer comparison.");
  2348 + case ISD::SETEQ:
  2349 + case ISD::SETGT:
  2350 + case ISD::SETGE:
  2351 + case ISD::SETUGT:
  2352 + case ISD::SETUGE:
  2353 + break;
  2354 + case ISD::SETNE:
  2355 + Invert = true;
  2356 + CC = ISD::SETEQ;
  2357 + break;
  2358 + case ISD::SETULT:
  2359 + case ISD::SETULE:
  2360 + case ISD::SETLT:
  2361 + case ISD::SETLE:
  2362 + Swap = true;
  2363 + CC = getSetCCSwappedOperands(CC);
  2364 + }
  2365 +
  2366 + if (Swap)
  2367 + std::swap(LHS, RHS);
  2368 +
  2369 + Opcode = AArch64ISD::NEON_CMP;
  2370 + Op0 = LHS;
  2371 + Op1 = RHS;
  2372 + }
  2373 +
  2374 + // Generate Compare Mask instr or Compare Mask against Zero instr.
  2375 + SDValue NeonCmp =
  2376 + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
  2377 +
  2378 + if (Invert)
  2379 + NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
  2380 +
  2381 + return NeonCmp;
  2382 + }
  2383 +
  2384 + // Now handle Floating Point cases.
  2385 + // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
  2386 + if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
  2387 + ISD::isBuildVectorAllZeros(LHS.getNode())) {
  2388 +
  2389 + // If LHS is the zero value, swap operands and CondCode.
  2390 + if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
  2391 + CC = getSetCCSwappedOperands(CC);
  2392 + Op0 = RHS;
  2393 + } else
  2394 + Op0 = LHS;
  2395 +
  2396 + // Using constant type to differentiate integer and FP compares with zero.
  2397 + Op1 = DAG.getConstantFP(0, MVT::f32);
  2398 + Opcode = AArch64ISD::NEON_CMPZ;
  2399 + } else {
  2400 + // Attempt to use Vector Floating Point Compare Mask instruction.
  2401 + Op0 = LHS;
  2402 + Op1 = RHS;
  2403 + Opcode = AArch64ISD::NEON_CMP;
  2404 + }
  2405 +
  2406 + SDValue NeonCmpAlt;
  2407 + // Some register compares have to be implemented with swapped CC and operands,
  2408 + // e.g.: OLT implemented as OGT with swapped operands.
  2409 + bool SwapIfRegArgs = false;
  2410 +
  2411 + // Ensure valid CondCode for FP Compare Mask against Zero instruction:
  2412 + // EQ, GE, GT, LE, LT.
  2413 + // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
  2414 + switch (CC) {
  2415 + default:
  2416 + llvm_unreachable("Illegal FP comparison");
  2417 + case ISD::SETUNE:
  2418 + case ISD::SETNE:
  2419 + Invert = true; // Fallthrough
  2420 + case ISD::SETOEQ:
  2421 + case ISD::SETEQ:
  2422 + CC = ISD::SETEQ;
  2423 + break;
  2424 + case ISD::SETOLT:
  2425 + case ISD::SETLT:
  2426 + CC = ISD::SETLT;
  2427 + SwapIfRegArgs = true;
  2428 + break;
  2429 + case ISD::SETOGT:
  2430 + case ISD::SETGT:
  2431 + CC = ISD::SETGT;
  2432 + break;
  2433 + case ISD::SETOLE:
  2434 + case ISD::SETLE:
  2435 + CC = ISD::SETLE;
  2436 + SwapIfRegArgs = true;
  2437 + break;
  2438 + case ISD::SETOGE:
  2439 + case ISD::SETGE:
  2440 + CC = ISD::SETGE;
  2441 + break;
  2442 + case ISD::SETUGE:
  2443 + Invert = true;
  2444 + CC = ISD::SETLT;
  2445 + SwapIfRegArgs = true;
  2446 + break;
  2447 + case ISD::SETULE:
  2448 + Invert = true;
  2449 + CC = ISD::SETGT;
  2450 + break;
  2451 + case ISD::SETUGT:
  2452 + Invert = true;
  2453 + CC = ISD::SETLE;
  2454 + SwapIfRegArgs = true;
  2455 + break;
  2456 + case ISD::SETULT:
  2457 + Invert = true;
  2458 + CC = ISD::SETGE;
  2459 + break;
  2460 + case ISD::SETUEQ:
  2461 + Invert = true; // Fallthrough
  2462 + case ISD::SETONE:
  2463 + // Expand this to (OGT |OLT).
  2464 + NeonCmpAlt =
  2465 + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
  2466 + CC = ISD::SETLT;
  2467 + SwapIfRegArgs = true;
  2468 + break;
  2469 + case ISD::SETUO:
  2470 + Invert = true; // Fallthrough
  2471 + case ISD::SETO:
  2472 + // Expand this to (OGE | OLT).
  2473 + NeonCmpAlt =
  2474 + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
  2475 + CC = ISD::SETLT;
  2476 + SwapIfRegArgs = true;
  2477 + break;
  2478 + }
  2479 +
  2480 + if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
  2481 + CC = getSetCCSwappedOperands(CC);
  2482 + std::swap(Op0, Op1);
  2483 + }
  2484 +
  2485 + // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
  2486 + SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
  2487 +
  2488 + if (NeonCmpAlt.getNode())
  2489 + NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
  2490 +
  2491 + if (Invert)
  2492 + NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
  2493 +
  2494 + return NeonCmp;
  2495 +}
  2496 +
2233 2497 // (SETCC lhs, rhs, condcode)
2234 2498 SDValue
2235 2499 AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -2239,6 +2503,9 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2239 2503 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2240 2504 EVT VT = Op.getValueType();
2241 2505
  2506 + if (VT.isVector())
  2507 + return LowerVectorSETCC(Op, DAG);
  2508 +
2242 2509 if (LHS.getValueType() == MVT::f128) {
2243 2510 // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
2244 2511 // for the rest of the function (some i32 or i64 values).
@@ -2395,11 +2662,155 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2395 2662 case ISD::SETCC: return LowerSETCC(Op, DAG);
2396 2663 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
2397 2664 case ISD::VASTART: return LowerVASTART(Op, DAG);
  2665 + case ISD::BUILD_VECTOR:
  2666 + return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
2398 2667 }
2399 2668
2400 2669 return SDValue();
2401 2670 }
2402 2671
  2672 +/// Check if the specified splat value corresponds to a valid vector constant
  2673 +/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
  2674 +/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
  2675 +/// values.
  2676 +static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
  2677 + unsigned SplatBitSize, SelectionDAG &DAG,
  2678 + bool is128Bits, NeonModImmType type, EVT &VT,
  2679 + unsigned &Imm, unsigned &OpCmode) {
  2680 + switch (SplatBitSize) {
  2681 + default:
  2682 + llvm_unreachable("unexpected size for isNeonModifiedImm");
  2683 + case 8: {
  2684 + if (type != Neon_Mov_Imm)
  2685 + return false;
  2686 + assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
  2687 + // Neon movi per byte: Op=0, Cmode=1110.
  2688 + OpCmode = 0xe;
  2689 + Imm = SplatBits;
  2690 + VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
  2691 + break;
  2692 + }
  2693 + case 16: {
  2694 + // Neon move inst per halfword
  2695 + VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
  2696 + if ((SplatBits & ~0xff) == 0) {
  2697 + // Value = 0x00nn is 0x00nn LSL 0
  2698 + // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
  2699 + // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
  2700 + // Op=x, Cmode=100y
  2701 + Imm = SplatBits;
  2702 + OpCmode = 0x8;
  2703 + break;
  2704 + }
  2705 + if ((SplatBits & ~0xff00) == 0) {
  2706 + // Value = 0xnn00 is 0x00nn LSL 8
  2707 + // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
  2708 + // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
  2709 + // Op=x, Cmode=101x
  2710 + Imm = SplatBits >> 8;
  2711 + OpCmode = 0xa;
  2712 + break;
  2713 + }
  2714 + // can't handle any other
  2715 + return false;
  2716 + }
  2717 +
  2718 + case 32: {
  2719 + // First the LSL variants (MSL is unusable by some interested instructions).
  2720 +
  2721 + // Neon move instr per word, shift zeros
  2722 + VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
  2723 + if ((SplatBits & ~0xff) == 0) {
  2724 + // Value = 0x000000nn is 0x000000nn LSL 0
  2725 + // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
  2726 + // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
  2727 + // Op=x, Cmode=000x
  2728 + Imm = SplatBits;
  2729 + OpCmode = 0;
  2730 + break;
  2731 + }
  2732 + if ((SplatBits & ~0xff00) == 0) {
  2733 + // Value = 0x0000nn00 is 0x000000nn LSL 8
  2734 + // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
  2735 + // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
  2736 + // Op=x, Cmode=001x
  2737 + Imm = SplatBits >> 8;
  2738 + OpCmode = 0x2;
  2739 + break;
  2740 + }
  2741 + if ((SplatBits & ~0xff0000) == 0) {
  2742 + // Value = 0x00nn0000 is 0x000000nn LSL 16
  2743 + // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
  2744 + // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
  2745 + // Op=x, Cmode=010x
  2746 + Imm = SplatBits >> 16;
  2747 + OpCmode = 0x4;
  2748 + break;
  2749 + }
  2750 + if ((SplatBits & ~0xff000000) == 0) {
  2751 + // Value = 0xnn000000 is 0x000000nn LSL 24
  2752 + // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
  2753 + // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
  2754 + // Op=x, Cmode=011x
  2755 + Imm = SplatBits >> 24;
  2756 + OpCmode = 0x6;
  2757 + break;
  2758 + }
  2759 +
  2760 + // Now the MSL immediates.
  2761 +
  2762 + // Neon move instr per word, shift ones
  2763 + if ((SplatBits & ~0xffff) == 0 &&
  2764 + ((SplatBits | SplatUndef) & 0xff) == 0xff) {
  2765 + // Value = 0x0000nnff is 0x000000nn MSL 8
  2766 + // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
  2767 + // Op=x, Cmode=1100
  2768 + Imm = SplatBits >> 8;
  2769 + OpCmode = 0xc;
  2770 + break;
  2771 + }
  2772 + if ((SplatBits & ~0xffffff) == 0 &&
  2773 + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
  2774 + // Value = 0x00nnffff is 0x000000nn MSL 16
  2775 + // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
  2776 + // Op=x, Cmode=1101
  2777 + Imm = SplatBits >> 16;
  2778 + OpCmode = 0xd;
  2779 + break;
  2780 + }
  2781 + // can't handle any other
  2782 + return false;
  2783 + }
  2784 +
  2785 + case 64: {
  2786 + if (type != Neon_Mov_Imm)
  2787 + return false;
  2788 + // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
  2789 + // movi Op=1, Cmode=1110.
  2790 + OpCmode = 0x1e;
  2791 + uint64_t BitMask = 0xff;
  2792 + uint64_t Val = 0;
  2793 + unsigned ImmMask = 1;
  2794 + Imm = 0;
  2795 + for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
  2796 + if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
  2797 + Val |= BitMask;
  2798 + Imm |= ImmMask;
  2799 + } else if ((SplatBits & BitMask) != 0) {
  2800 + return false;
  2801 + }
  2802 + BitMask <<= 8;
  2803 + ImmMask <<= 1;
  2804 + }
  2805 + SplatBits = Val;
  2806 + VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
  2807 + break;
  2808 + }
  2809 + }
  2810 +
  2811 + return true;
  2812 +}
  2813 +
2403 2814 static SDValue PerformANDCombine(SDNode *N,
2404 2815 TargetLowering::DAGCombinerInfo &DCI) {
2405 2816
@@ -2725,6 +3136,7 @@ static SDValue PerformORCombine(SDNode *N,
2725 3136 const AArch64Subtarget *Subtarget) {
2726 3137
2727 3138 SelectionDAG &DAG = DCI.DAG;
  3139 + SDLoc DL(N);
2728 3140 EVT VT = N->getValueType(0);
2729 3141
2730 3142 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -2745,6 +3157,44 @@ static SDValue PerformORCombine(SDNode *N,
2745 3157 if (Res.getNode())
2746 3158 return Res;
2747 3159
  3160 + if (!Subtarget->hasNEON())
  3161 + return SDValue();
  3162 +
  3163 + // Attempt to use vector immediate-form BSL
  3164 + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
  3165 +
  3166 + SDValue N0 = N->getOperand(0);
  3167 + if (N0.getOpcode() != ISD::AND)
  3168 + return SDValue();
  3169 +
  3170 + SDValue N1 = N->getOperand(1);
  3171 + if (N1.getOpcode() != ISD::AND)
  3172 + return SDValue();
  3173 +
  3174 + if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
  3175 + APInt SplatUndef;
  3176 + unsigned SplatBitSize;
  3177 + bool HasAnyUndefs;
  3178 + BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
  3179 + APInt SplatBits0;
  3180 + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
  3181 + HasAnyUndefs) &&
  3182 + !HasAnyUndefs) {
  3183 + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
  3184 + APInt SplatBits1;
  3185 + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
  3186 + HasAnyUndefs) &&
  3187 + !HasAnyUndefs && SplatBits0 == ~SplatBits1) {
  3188 + // Canonicalize the vector type to make instruction selection simpler.
  3189 + EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
  3190 + SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
  3191 + N0->getOperand(1), N0->getOperand(0),
  3192 + N1->getOperand(0));
  3193 + return DAG.getNode(ISD::BITCAST, DL, VT, Result);
  3194 + }
  3195 + }
  3196 + }
  3197 +
2748 3198 return SDValue();
2749 3199 }
2750 3200
@@ -2819,6 +3269,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
2819 3269 return false;
2820 3270 }
2821 3271
  3272 +// If this is a case we can't handle, return null and let the default
  3273 +// expansion code take care of it.
  3274 +SDValue
  3275 +AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
  3276 + const AArch64Subtarget *ST) const {
  3277 +
  3278 + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
  3279 + SDLoc DL(Op);
  3280 + EVT VT = Op.getValueType();
  3281 +
  3282 + APInt SplatBits, SplatUndef;
  3283 + unsigned SplatBitSize;
  3284 + bool HasAnyUndefs;
  3285 +
  3286 + // Note we favor lowering MOVI over MVNI.
  3287 + // This has implications on the definition of patterns in TableGen to select
  3288 + // BIC immediate instructions but not ORR immediate instructions.
  3289 + // If this lowering order is changed, TableGen patterns for BIC immediate and
  3290 + // ORR immediate instructions have to be updated.
  3291 + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
  3292 + if (SplatBitSize <= 64) {
  3293 + // First attempt to use vector immediate-form MOVI
  3294 + EVT NeonMovVT;
  3295 + unsigned Imm = 0;
  3296 + unsigned OpCmode = 0;
  3297 +
  3298 + if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
  3299 + SplatBitSize, DAG, VT.is128BitVector(),
  3300 + Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
  3301 + SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
  3302 + SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
  3303 +
  3304 + if (ImmVal.getNode() && OpCmodeVal.getNode()) {
  3305 + SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
  3306 + ImmVal, OpCmodeVal);
  3307 + return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
  3308 + }
  3309 + }
  3310 +
  3311 + // Then attempt to use vector immediate-form MVNI
  3312 + uint64_t NegatedImm = (~SplatBits).getZExtValue();
  3313 + if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
  3314 + DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
  3315 + Imm, OpCmode)) {
  3316 + SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
  3317 + SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
  3318 + if (ImmVal.getNode() && OpCmodeVal.getNode()) {
  3319 + SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
  3320 + ImmVal, OpCmodeVal);
  3321 + return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
  3322 + }
  3323 + }
  3324 +
  3325 + // Attempt to use vector immediate-form FMOV
  3326 + if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
  3327 + (VT == MVT::v2f64 && SplatBitSize == 64)) {
  3328 + APFloat RealVal(
  3329 + SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
  3330 + SplatBits);
  3331 + uint32_t ImmVal;
  3332 + if (A64Imms::isFPImm(RealVal, ImmVal)) {
  3333 + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
  3334 + return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
  3335 + }
  3336 + }
  3337 + }
  3338 + }
  3339 + return SDValue();
  3340 +}
  3341 +
2822 3342 AArch64TargetLowering::ConstraintType
2823 3343 AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
2824 3344 if (Constraint.size() == 1) {
33 lib/Target/AArch64/AArch64ISelLowering.h
@@ -111,7 +111,28 @@ namespace AArch64ISD {
111 111 // created using the small memory model style: i.e. adrp/add or
112 112 // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
113 113 // get selected.
114   - WrapperSmall
  114 + WrapperSmall,
  115 +
  116 + // Vector bitwise select
  117 + NEON_BSL,
  118 +
  119 + // Vector move immediate
  120 + NEON_MOVIMM,
  121 +
  122 + // Vector Move Inverted Immediate
  123 + NEON_MVNIMM,
  124 +
  125 + // Vector FP move immediate
  126 + NEON_FMOVIMM,
  127 +
  128 + // Vector compare
  129 + NEON_CMP,
  130 +
  131 + // Vector compare zero
  132 + NEON_CMPZ,
  133 +
  134 + // Vector compare bitwise test
  135 + NEON_TST
115 136 };
116 137 }
117 138
@@ -148,9 +169,11 @@ class AArch64TargetLowering : public TargetLowering {
148 169 SDLoc dl, SelectionDAG &DAG,
149 170 SmallVectorImpl<SDValue> &InVals) const;
150 171
151   - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
152   - SDLoc DL, SDValue &Chain) const;
  172 + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
  173 + const AArch64Subtarget *ST) const;
153 174
  175 + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
  176 + SDValue &Chain) const;
154 177
155 178 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
156 179 /// for tail call optimization. Targets which want to do tail call
@@ -253,6 +276,10 @@ class AArch64TargetLowering : public TargetLowering {
253 276 return &getTargetMachine().getSubtarget<AArch64Subtarget>();
254 277 }
255 278 };
  279 +enum NeonModImmType {
  280 + Neon_Mov_Imm,
  281 + Neon_Mvn_Imm
  282 +};
256 283 } // namespace llvm
257 284
258 285 #endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
93 lib/Target/AArch64/AArch64InstrFormats.td
@@ -959,3 +959,96 @@ class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
959 959 let Inst{4-0} = op4;
960 960 }
961 961
  962 +
  963 +//===----------------------------------------------------------------------===//
  964 +//
  965 +// Neon Instruction Format Definitions.
  966 +//
  967 +
  968 +let Predicates = [HasNEON] in {
  969 +
  970 +class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
  971 + : InstAlias<Asm, Result, Emit> {
  972 +}
  973 +
  974 +// Format AdvSIMD 3 vector registers with same vector type
  975 +class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
  976 + dag outs, dag ins, string asmstr,
  977 + list<dag> patterns, InstrItinClass itin>
  978 + : A64InstRdnm<outs, ins, asmstr, patterns, itin>
  979 +{
  980 + let Inst{31} = 0b0;
  981 + let Inst{30} = q;
  982 + let Inst{29} = u;
  983 + let Inst{28-24} = 0b01110;
  984 + let Inst{23-22} = size;
  985 + let Inst{21} = 0b1;
  986 + // Inherit Rm in 20-16
  987 + let Inst{15-11} = opcode;
  988 + let Inst{10} = 0b1;
  989 + // Inherit Rn in 9-5
  990 + // Inherit Rd in 4-0
  991 +}
  992 +
  993 +// Format AdvSIMD 1 vector register with modified immediate
  994 +class NeonI_1VModImm<bit q, bit op,
  995 + dag outs, dag ins, string asmstr,
  996 + list<dag> patterns, InstrItinClass itin>
  997 + : A64InstRd<outs,ins, asmstr, patterns, itin>
  998 +{
  999 + bits<8> Imm;
  1000 + bits<4> cmode;
  1001 + let Inst{31} = 0b0;
  1002 + let Inst{30} = q;
  1003 + let Inst{29} = op;
  1004 + let Inst{28-19} = 0b0111100000;
  1005 + let Inst{15-12} = cmode;
  1006 + let Inst{11} = 0b0; // o2
  1007 + let Inst{10} = 1;
  1008 + // Inherit Rd in 4-0
  1009 + let Inst{18-16} = Imm{7-5}; // imm a:b:c
  1010 + let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
  1011 +}
  1012 +
  1013 +// Format AdvSIMD 3 scalar registers with same type
  1014 +
  1015 +class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
  1016 + dag outs, dag ins, string asmstr,
  1017 + list<dag> patterns, InstrItinClass itin>
  1018 + : A64InstRdnm<outs, ins, asmstr, patterns, itin>
  1019 +{
  1020 + let Inst{31} = 0b0;
  1021 + let Inst{30} = 0b1;
  1022 + let Inst{29} = u;
  1023 + let Inst{28-24} = 0b11110;
  1024 + let Inst{23-22} = size;
  1025 + let Inst{21} = 0b1;
  1026 + // Inherit Rm in 20-16
  1027 + let Inst{15-11} = opcode;
  1028 + let Inst{10} = 0b1;
  1029 + // Inherit Rn in 9-5
  1030 + // Inherit Rd in 4-0
  1031 +}
  1032 +
  1033 +
  1034 +// Format AdvSIMD 2 vector registers miscellaneous
  1035 +class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
  1036 + dag outs, dag ins, string asmstr,
  1037 + list<dag> patterns, InstrItinClass itin>
  1038 + : A64InstRdn<outs, ins, asmstr, patterns, itin>
  1039 +{
  1040 + let Inst{31} = 0b0;
  1041 + let Inst{30} = q;
  1042 + let Inst{29} = u;
  1043 + let Inst{28-24} = 0b01110;
  1044 + let Inst{23-22} = size;
  1045 + let Inst{21-17} = 0b10000;
  1046 + let Inst{16-12} = opcode;
  1047 + let Inst{11-10} = 0b10;
  1048 +
  1049 + // Inherit Rn in 9-5
  1050 + // Inherit Rd in 4-0
  1051 +}
  1052 +
  1053 +}
  1054 +
40 lib/Target/AArch64/AArch64InstrInfo.td
@@ -11,6 +11,17 @@
11 11 //
12 12 //===----------------------------------------------------------------------===//
13 13
  14 +//===----------------------------------------------------------------------===//
  15 +// ARM Instruction Predicate Definitions.
  16 +//
  17 +def HasNEON : Predicate<"Subtarget->hasNEON()">,
  18 + AssemblerPredicate<"FeatureNEON", "neon">;
  19 +def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
  20 + AssemblerPredicate<"FeatureCrypto","crypto">;
  21 +
  22 +// Use fused MAC if more precision in FP computation is allowed.
  23 +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
  24 + " FPOpFusion::Fast)">;
14 25 include "AArch64InstrFormats.td"
15 26
16 27 //===----------------------------------------------------------------------===//
@@ -2173,6 +2184,29 @@ def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
2173 2184 def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
2174 2185 def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
2175 2186
  2187 +// Extra patterns for when we're allowed to optimise separate multiplication and
  2188 +// addition.
  2189 +let Predicates = [UseFusedMAC] in {
  2190 +def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
  2191 + (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  2192 +def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
  2193 + (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  2194 +def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
  2195 + (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  2196 +def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
  2197 + (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  2198 +
  2199 +def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
  2200 + (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  2201 +def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
  2202 + (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  2203 +def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
  2204 + (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  2205 +def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
  2206 + (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  2207 +}
  2208 +
  2209 +
2176 2210 //===----------------------------------------------------------------------===//
2177 2211 // Floating-point <-> fixed-point conversion instructions
2178 2212 //===----------------------------------------------------------------------===//
@@ -5123,3 +5157,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
5123 5157
5124 5158 defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
5125 5159 (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
  5160 +
  5161 +//===----------------------------------------------------------------------===//
  5162 +// Advanced SIMD (NEON) Support
  5163 +//
  5164 +
  5165 +include "AArch64InstrNEON.td"
1,634 lib/Target/AArch64/AArch64InstrNEON.td
... ... @@ -0,0 +1,1634 @@
  1 +//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
  2 +//
  3 +// The LLVM Compiler Infrastructure
  4 +//
  5 +// This file is distributed under the University of Illinois Open Source
  6 +// License. See LICENSE.TXT for details.
  7 +//
  8 +//===----------------------------------------------------------------------===//
  9 +//
  10 +// This file describes the AArch64 NEON instruction set.
  11 +//
  12 +//===----------------------------------------------------------------------===//
  13 +
  14 +//===----------------------------------------------------------------------===//
  15 +// NEON-specific DAG Nodes.
  16 +//===----------------------------------------------------------------------===//
  17 +def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
  18 + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
  19 + SDTCisSameAs<0, 3>]>>;
  20 +
  21 +// (outs Result), (ins Imm, OpCmode)
  22 +def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
  23 +
  24 +def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
  25 +
  26 +def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
  27 +
  28 +// (outs Result), (ins Imm)
  29 +def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
  30 + [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
  31 +
  32 +// (outs Result), (ins LHS, RHS, CondCode)
  33 +def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
  34 + [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
  35 +
  36 +// (outs Result), (ins LHS, 0/0.0 constant, CondCode)
  37 +def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
  38 + [SDTCisVec<0>, SDTCisVec<1>]>>;
  39 +
  40 +// (outs Result), (ins LHS, RHS)
  41 +def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
  42 + [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
  43 +
  44 +//===----------------------------------------------------------------------===//
  45 +// Multiclasses
  46 +//===----------------------------------------------------------------------===//
  47 +
  48 +multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
  49 + string asmop, SDPatternOperator opnode8B,
  50 + SDPatternOperator opnode16B,
  51 + bit Commutable = 0>
  52 +{
  53 + let isCommutable = Commutable in {
  54 + def _8B : NeonI_3VSame<0b0, u, size, opcode,
  55 + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
  56 + asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
  57 + [(set (v8i8 VPR64:$Rd),
  58 + (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
  59 + NoItinerary>;
  60 +
  61 + def _16B : NeonI_3VSame<0b1, u, size, opcode,
  62 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  63 + asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
  64 + [(set (v16i8 VPR128:$Rd),
  65 + (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
  66 + NoItinerary>;
  67 + }
  68 +
  69 +}
  70 +
  71 +multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
  72 + string asmop, SDPatternOperator opnode,
  73 + bit Commutable = 0>
  74 +{
  75 + let isCommutable = Commutable in {
  76 + def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
  77 + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
  78 + asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
  79 + [(set (v4i16 VPR64:$Rd),
  80 + (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
  81 + NoItinerary>;
  82 +
  83 + def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
  84 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  85 + asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
  86 + [(set (v8i16 VPR128:$Rd),
  87 + (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
  88 + NoItinerary>;
  89 +
  90 + def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
  91 + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
  92 + asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
  93 + [(set (v2i32 VPR64:$Rd),
  94 + (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
  95 + NoItinerary>;
  96 +
  97 + def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
  98 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  99 + asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
  100 + [(set (v4i32 VPR128:$Rd),
  101 + (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
  102 + NoItinerary>;
  103 + }
  104 +}
  105 +multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
  106 + string asmop, SDPatternOperator opnode,
  107 + bit Commutable = 0>
  108 + : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
  109 +{
  110 + let isCommutable = Commutable in {
  111 + def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
  112 + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
  113 + asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
  114 + [(set (v8i8 VPR64:$Rd),
  115 + (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
  116 + NoItinerary>;
  117 +
  118 + def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
  119 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  120 + asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
  121 + [(set (v16i8 VPR128:$Rd),
  122 + (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
  123 + NoItinerary>;
  124 + }
  125 +}
  126 +
  127 +multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
  128 + string asmop, SDPatternOperator opnode,
  129 + bit Commutable = 0>
  130 + : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
  131 +{
  132 + let isCommutable = Commutable in {
  133 + def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
  134 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  135 + asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
  136 + [(set (v2i64 VPR128:$Rd),
  137 + (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
  138 + NoItinerary>;
  139 + }
  140 +}
  141 +
  142 +// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
  143 +// but Result types can be integer or floating point types.
  144 +multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
  145 + string asmop, SDPatternOperator opnode2S,
  146 + SDPatternOperator opnode4S,
  147 + SDPatternOperator opnode2D,
  148 + ValueType ResTy2S, ValueType ResTy4S,
  149 + ValueType ResTy2D, bit Commutable = 0>
  150 +{
  151 + let isCommutable = Commutable in {
  152 + def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
  153 + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
  154 + asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
  155 + [(set (ResTy2S VPR64:$Rd),
  156 + (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
  157 + NoItinerary>;
  158 +
  159 + def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
  160 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  161 + asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
  162 + [(set (ResTy4S VPR128:$Rd),
  163 + (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
  164 + NoItinerary>;
  165 +
  166 + def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
  167 + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
  168 + asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
  169 + [(set (ResTy2D VPR128:$Rd),
  170 + (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
  171 + NoItinerary>;
  172 + }
  173 +}
  174 +
  175 +//===----------------------------------------------------------------------===//
  176 +// Instruction Definitions
  177 +//===----------------------------------------------------------------------===//
  178 +
  179 +// Vector Arithmetic Instructions
  180 +
  181 +// Vector Add (Integer and Floating-Point)
  182 +
  183 +defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
  184 +defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
  185 + v2f32, v4f32, v2f64, 1>;
  186 +
  187 +// Vector Sub (Integer and Floating-Point)
  188 +
  189 +defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
  190 +defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
  191 + v2f32, v4f32, v2f64, 0>;
  192 +
  193 +// Vector Multiply (Integer and Floating-Point)
  194 +
  195 +defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
  196 +defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
  197 + v2f32, v4f32, v2f64, 1>;
  198 +
  199 +// Vector Multiply (Polynomial)
  200 +
  201 +defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
  202 + int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
  203 +
  204 +// Vector Multiply-accumulate and Multiply-subtract (Integer)
  205 +
  206 +// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and