Skip to content

Commit

Permalink
ARM v8.1a adds Advanced SIMD instructions for Rounding Double Multiply
Browse files Browse the repository at this point in the history
Add/Subtract.

The following instructions are added to AArch32 instruction set:

- VQRDMLAH: Vector Saturating Rounding Doubling Multiply Accumulate
            Returning High Half
- VQRDMLSH: Vector Saturating Rounding Doubling Multiply Subtract
            Returning High Half

The following instructions are added to AArch64 instruction set:

- SQRDMLAH: Signed Saturating Rounding Doubling Multiply Accumulate
            Returning High Half
- SQRDMLSH: Signed Saturating Rounding Doubling Multiply Subtract
            Returning High Half

This patch adds intrinsic and ACLE macro support for these instructions,
as well as corresponding tests.

Differential Revision: http://reviews.llvm.org/D14982

llvm-svn: 254250
  • Loading branch information
labrinea committed Nov 29, 2015
1 parent e14261a commit 502592c
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
46 changes: 46 additions & 0 deletions clang/include/clang/Basic/arm_neon.td
Expand Up @@ -373,6 +373,10 @@ def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
(splat $p2, $p3))>;
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
def OP_FMS_LN : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
Expand Down Expand Up @@ -473,6 +477,11 @@ def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">;
def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;

def OP_SCALAR_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1,
(call "vget_lane", $p2, $p3)))>;
def OP_SCALAR_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1,
(call "vget_lane", $p2, $p3)))>;

def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
(call "vget_lane",
(bitcast "int16x4_t", $p0), $p1))>;
Expand Down Expand Up @@ -514,6 +523,12 @@ def VMLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>;
def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>;
def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">;
def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>;
def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>;
}

def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">;
def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">;
def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">;
Expand Down Expand Up @@ -741,6 +756,12 @@ def VQDMULH_N : SInst<"vqdmulh_n", "dda", "siQsQi">;
def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>;
def VQRDMULH_N : SInst<"vqrdmulh_n", "dda", "siQsQi">;
def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>;
def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>;
}

def VMLA_N : IOpInst<"vmla_n", "ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
def VMLAL_N : SOpInst<"vmlal_n", "wwda", "siUsUi", OP_MLAL_N>;
def VQDMLAL_N : SInst<"vqdmlal_n", "wwda", "si">;
Expand Down Expand Up @@ -1160,6 +1181,11 @@ def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "wkki", "si",
def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>;
def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>;
def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>;
}

// Note: d type implemented by SCALAR_VMULX_LANE
def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>;
// Note: d type is implemented by SCALAR_VMULX_LANEQ
Expand Down Expand Up @@ -1405,6 +1431,16 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">;
// Scalar Integer Saturating Rounding Doubling Multiply Half High
def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>;

////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>;
}

////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Multiply Extended
def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">;
Expand Down Expand Up @@ -1606,6 +1642,16 @@ def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QD
def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>;

let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>;
def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>;

// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>;
def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>;
}

def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
}
10 changes: 10 additions & 0 deletions clang/lib/Basic/Targets.cpp
Expand Up @@ -4869,6 +4869,9 @@ class ARMTargetInfo : public TargetInfo {

if (Opts.UnsafeFPMath)
Builder.defineMacro("__ARM_FP_FAST", "1");

if (ArchKind == llvm::ARM::AK_ARMV8_1A)
Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
}

ArrayRef<Builtin::Info> getTargetBuiltins() const override {
Expand Down Expand Up @@ -5250,6 +5253,7 @@ class AArch64TargetInfo : public TargetInfo {
unsigned CRC;
unsigned Crypto;
unsigned Unaligned;
unsigned V8_1A;

static const Builtin::Info BuiltinInfo[];

Expand Down Expand Up @@ -5372,6 +5376,9 @@ class AArch64TargetInfo : public TargetInfo {
if (Unaligned)
Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");

if (V8_1A)
Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");

// All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
Expand All @@ -5397,6 +5404,7 @@ class AArch64TargetInfo : public TargetInfo {
CRC = 0;
Crypto = 0;
Unaligned = 1;
V8_1A = 0;

for (const auto &Feature : Features) {
if (Feature == "+neon")
Expand All @@ -5407,6 +5415,8 @@ class AArch64TargetInfo : public TargetInfo {
Crypto = 1;
if (Feature == "+strict-align")
Unaligned = 0;
if (Feature == "+v8.1a")
V8_1A = 1;
}

setDataLayoutString();
Expand Down
3 changes: 3 additions & 0 deletions clang/test/Preprocessor/aarch64-target-features.c
Expand Up @@ -71,6 +71,9 @@
// CHECK-NEON: __ARM_NEON 1
// CHECK-NEON: __ARM_NEON_FP 0xE

// RUN: %clang -target aarch64-none-eabi -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V81A %s
// CHECK-V81A: __ARM_FEATURE_QRDMX 1

// RUN: %clang -target aarch64 -march=arm64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
// RUN: %clang -target aarch64 -march=aarch64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
// CHECK-ARCH-NOT-ACCEPT: error: the clang compiler does not support
Expand Down
1 change: 1 addition & 0 deletions clang/test/Preprocessor/arm-target-features.c
Expand Up @@ -407,4 +407,5 @@
// CHECK-V81A: __ARM_ARCH 8
// CHECK-V81A: __ARM_ARCH_8_1A__ 1
// CHECK-V81A: #define __ARM_ARCH_PROFILE 'A'
// CHECK-V81A: __ARM_FEATURE_QRDMX 1
// CHECK-V81A: #define __ARM_FP 0xE

0 comments on commit 502592c

Please sign in to comment.