303 changes: 1 addition & 302 deletions clang/include/clang/Basic/arm_neon.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,309 +11,8 @@
// file will be generated. See ARM document DUI0348B.
//
//===----------------------------------------------------------------------===//
//
// Each intrinsic is a subclass of the Inst class. An intrinsic can either
// generate a __builtin_* call or it can expand to a set of generic operations.
//
// The operations are subclasses of Operation providing a list of DAGs, the
// last of which is the return value. The available DAG nodes are documented
// below.
//
//===----------------------------------------------------------------------===//

// The base Operation class. All operations must subclass this.
class Operation<list<dag> ops=[]> {
list<dag> Ops = ops;
bit Unavailable = 0;
}
// An operation that only contains a single DAG.
class Op<dag op> : Operation<[op]>;
// A shorter version of Operation - takes a list of DAGs. The last of these will
// be the return value.
class LOp<list<dag> ops> : Operation<ops>;

// These defs and classes are used internally to implement the SetTheory
// expansion and should be ignored.
foreach Index = 0-63 in
def sv##Index;
class MaskExpand;

//===----------------------------------------------------------------------===//
// Available operations
//===----------------------------------------------------------------------===//

// DAG arguments can either be operations (documented below) or variables.
// Variables are prefixed with '$'. There are variables for each input argument,
// with the name $pN, where N starts at zero. So the zero'th argument will be
// $p0, the first $p1 etc.

// op - Binary or unary operator, depending on the number of arguments. The
// operator itself is just treated as a raw string and is not checked.
// example: (op "+", $p0, $p1) -> "__p0 + __p1".
// (op "-", $p0) -> "-__p0"
def op;
// call - Invoke another intrinsic. The input types are type checked and
// disambiguated. If there is no intrinsic defined that takes
// the given types (or if there is a type ambiguity) an error is
// generated at tblgen time. The name of the intrinsic is the raw
// name as given to the Inst class (not mangled).
// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
// (assuming $p0 has type int16x8_t).
def call;
// cast - Perform a cast to a different type. This gets emitted as a static
// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
// "bitcast".
//
// The syntax is (cast MOD* VAL). The last argument is the value to
// cast, preceded by a sequence of type modifiers. The target type
// starts off as the type of VAL, and is modified by MOD in sequence.
// The available modifiers are:
// - $X - Take the type of parameter/variable X. For example:
// (cast $p0, $p1) would cast $p1 to the type of $p0.
// - "R" - The type of the return type.
// - A typedef string - A NEON or stdint.h type that is then parsed.
// for example: (cast "uint32x4_t", $p0).
// - "U" - Make the type unsigned.
// - "S" - Make the type signed.
// - "H" - Halve the number of lanes in the type.
// - "D" - Double the number of lanes in the type.
// - "8" - Convert type to an equivalent vector of 8-bit signed
// integers.
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
// value is of type "int32x4_t".
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
// has type float64x1_t or any other vector type of 64 bits).
// (cast "int32_t", $p2) -> "(int32_t)__p2"
def cast;
// bitcast - Same as "cast", except a reinterpret-cast is produced:
// (bitcast "T", $p0) -> "*(T*)&__p0".
// The VAL argument is saved to a temporary so it can be used
// as an l-value.
def bitcast;
// dup - Take a scalar argument and create a vector by duplicating it into
// all lanes. The type of the vector is the base type of the intrinsic.
// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
// is uint32x2_t).
def dup;
// splat - Take a vector and a lane index, and return a vector of the same type
// containing repeated instances of the source vector at the lane index.
// example: (splat $p0, $p1) ->
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
// (assuming __p0 has four elements).
def splat;
// save_temp - Create a temporary (local) variable. The variable takes a name
// based on the zero'th parameter and can be referenced using
// using that name in subsequent DAGs in the same
// operation. The scope of a temp is the operation. If a variable
// with the given name already exists, an error will be given at
// tblgen time.
// example: [(save_temp $var, (call "foo", $p0)),
// (op "+", $var, $p1)] ->
// "int32x2_t __var = foo(__p0); return __var + __p1;"
def save_temp;
// name_replace - Return the name of the current intrinsic with the first
// argument replaced by the second argument. Raises an error if
// the first argument does not exist in the intrinsic name.
// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
// version of this intrinsic).
def name_replace;
// literal - Create a literal piece of code. The code is treated as a raw
// string, and must be given a type. The type is a stdint.h or
// NEON intrinsic type as given to (cast).
// example: (literal "int32_t", "0")
def literal;
// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
// The MASK argument is a set of elements. The elements are generated
// from the two special defs "mask0" and "mask1". "mask0" expands to
// the lane indices in sequence for ARG0, and "mask1" expands to
// the lane indices in sequence for ARG1. They can be used as-is, e.g.
//
// (shuffle $p0, $p1, mask0) -> $p0
// (shuffle $p0, $p1, mask1) -> $p1
//
// or, more usefully, they can be manipulated using the SetTheory
// operators plus some extra operators defined in the NEON emitter.
// The operators are described below.
// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
// A concatenation of the high halves of the input vectors.
def shuffle;

// add, interleave, decimate: These set operators are vanilla SetTheory
// operators and take their normal definition.
def add;
def interleave;
def decimate;
// rotl - Rotate set left by a number of elements.
// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
def rotl;
// rotl - Rotate set right by a number of elements.
// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
def rotr;
// highhalf - Take only the high half of the input.
// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
def highhalf;
// highhalf - Take only the low half of the input.
// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
def lowhalf;
// rev - Perform a variable-width reversal of the elements. The zero'th argument
// is a width in bits to reverse. The lanes this maps to is determined
// based on the element width of the underlying type.
// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
def rev;
// mask0 - The initial sequence of lanes for shuffle ARG0
def mask0 : MaskExpand;
// mask0 - The initial sequence of lanes for shuffle ARG1
def mask1 : MaskExpand;

def OP_NONE : Operation;
def OP_UNAVAILABLE : Operation {
let Unavailable = 1;
}

//===----------------------------------------------------------------------===//
// Instruction definitions
//===----------------------------------------------------------------------===//

// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
// a sequence of typespecs.
//
// The name is the base name of the intrinsic, for example "vget_lane". This is
// then mangled by the tblgen backend to add type information ("vget_lane_s16").
//
// A typespec is a sequence of uppercase characters (modifiers) followed by one
// lowercase character. A typespec encodes a particular "base type" of the
// intrinsic.
//
// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
// typespec codes are given below.
//
// The string given to an Inst class is a sequence of typespecs. The intrinsic
// is instantiated for every typespec in the sequence. For example "sdQsQd".
//
// The prototype is a string that defines the return type of the intrinsic
// and the type of each argument. The return type and every argument gets a
// "modifier" that can change in some way the "base type" of the intrinsic.
//
// The modifier 'd' means "default" and does not modify the base type in any
// way. The available modifiers are given below.
//
// Typespecs
// ---------
// c: char
// s: short
// i: int
// l: long
// k: 128-bit long
// f: float
// h: half-float
// d: double
//
// Typespec modifiers
// ------------------
// S: scalar, only used for function mangling.
// U: unsigned
// Q: 128b
// H: 128b without mangling 'q'
// P: polynomial
//
// Prototype modifiers
// -------------------
// prototype: return (arg, arg, ...)
//
// v: void
// t: best-fit integer (int/poly args)
// x: signed integer (int/float args)
// u: unsigned integer (int/float args)
// f: float (int args)
// F: double (int args)
// H: half (int args)
// d: default
// g: default, ignore 'Q' size modifier.
// j: default, force 'Q' size modifier.
// w: double width elements, same num elts
// n: double width elements, half num elts
// h: half width elements, double num elts
// q: half width elements, quad num elts
// e: half width elements, double num elts, unsigned
// m: half width elements, same num elts
// i: constant int
// l: constant uint64
// s: scalar of element type
// z: scalar of half width element type, signed
// r: scalar of double width element type, signed
// a: scalar of element type (splat to vector type)
// b: scalar of unsigned integer/long type (int/float args)
// $: scalar of signed integer/long type (int/float args)
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
// 2,3,4: array of default vectors
// B,C,D: array of default elts, force 'Q' size modifier.
// p: pointer type
// c: const pointer type

// Every intrinsic subclasses Inst.
class Inst <string n, string p, string t, Operation o> {
string Name = n;
string Prototype = p;
string Types = t;
string ArchGuard = "";

Operation Operation = o;
bit CartesianProductOfTypes = 0;
bit BigEndianSafe = 0;
bit isShift = 0;
bit isScalarShift = 0;
bit isScalarNarrowShift = 0;
bit isVCVT_N = 0;
// For immediate checks: the immediate will be assumed to specify the lane of
// a Q register. Only used for intrinsics which end up calling polymorphic
// builtins.
bit isLaneQ = 0;

// Certain intrinsics have different names than their representative
// instructions. This field allows us to handle this correctly when we
// are generating tests.
string InstName = "";

// Certain intrinsics even though they are not a WOpInst or LOpInst,
// generate a WOpInst/LOpInst instruction (see below for definition
// of a WOpInst/LOpInst). For testing purposes we need to know
// this. Ex: vset_lane which outputs vmov instructions.
bit isHiddenWInst = 0;
bit isHiddenLInst = 0;
}

// The following instruction classes are implemented via builtins.
// These declarations are used to generate Builtins.def:
//
// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
// IInst: Instruction with generic integer suffix (e.g., "i8")
// WInst: Instruction with only bit size suffix (e.g., "8")
class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}

// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
// the purpose of generating tests.
//
// SOpInst: Instruction with signed/unsigned suffix (e.g., "s8",
// "u8", "p8").
// IOpInst: Instruction with generic integer suffix (e.g., "i8").
// WOpInst: Instruction with bit size only suffix (e.g., "8").
// LOpInst: Logical instruction with no bit size suffix.
// NoTestOpInst: Intrinsic that has no corresponding instruction.
class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}

//===----------------------------------------------------------------------===//
// Operations
//===----------------------------------------------------------------------===//
include "arm_neon_incl.td"

def OP_ADD : Op<(op "+", $p0, $p1)>;
def OP_ADDL : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>;
Expand Down
313 changes: 313 additions & 0 deletions clang/include/clang/Basic/arm_neon_incl.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
//===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines data structures shared by arm_neon.td and arm_fp16.td.
// It constains base operation classes, operations, instructions, instruction
// modifiers, etc.
//
//===----------------------------------------------------------------------===//
//
// Each intrinsic is a subclass of the Inst class. An intrinsic can either
// generate a __builtin_* call or it can expand to a set of generic operations.
//
// The operations are subclasses of Operation providing a list of DAGs, the
// last of which is the return value. The available DAG nodes are documented
// below.
//
//===----------------------------------------------------------------------===//

// The base Operation class. All operations must subclass this.
class Operation<list<dag> ops=[]> {
list<dag> Ops = ops;
bit Unavailable = 0;
}
// An operation that only contains a single DAG.
class Op<dag op> : Operation<[op]>;
// A shorter version of Operation - takes a list of DAGs. The last of these will
// be the return value.
class LOp<list<dag> ops> : Operation<ops>;

// These defs and classes are used internally to implement the SetTheory
// expansion and should be ignored.
foreach Index = 0-63 in
def sv##Index;
class MaskExpand;

//===----------------------------------------------------------------------===//
// Available operations
//===----------------------------------------------------------------------===//

// DAG arguments can either be operations (documented below) or variables.
// Variables are prefixed with '$'. There are variables for each input argument,
// with the name $pN, where N starts at zero. So the zero'th argument will be
// $p0, the first $p1 etc.

// op - Binary or unary operator, depending on the number of arguments. The
// operator itself is just treated as a raw string and is not checked.
// example: (op "+", $p0, $p1) -> "__p0 + __p1".
// (op "-", $p0) -> "-__p0"
def op;
// call - Invoke another intrinsic. The input types are type checked and
// disambiguated. If there is no intrinsic defined that takes
// the given types (or if there is a type ambiguity) an error is
// generated at tblgen time. The name of the intrinsic is the raw
// name as given to the Inst class (not mangled).
// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
// (assuming $p0 has type int16x8_t).
def call;
// cast - Perform a cast to a different type. This gets emitted as a static
// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
// "bitcast".
//
// The syntax is (cast MOD* VAL). The last argument is the value to
// cast, preceded by a sequence of type modifiers. The target type
// starts off as the type of VAL, and is modified by MOD in sequence.
// The available modifiers are:
// - $X - Take the type of parameter/variable X. For example:
// (cast $p0, $p1) would cast $p1 to the type of $p0.
// - "R" - The type of the return type.
// - A typedef string - A NEON or stdint.h type that is then parsed.
// for example: (cast "uint32x4_t", $p0).
// - "U" - Make the type unsigned.
// - "S" - Make the type signed.
// - "H" - Halve the number of lanes in the type.
// - "D" - Double the number of lanes in the type.
// - "8" - Convert type to an equivalent vector of 8-bit signed
// integers.
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
// value is of type "int32x4_t".
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
// has type float64x1_t or any other vector type of 64 bits).
// (cast "int32_t", $p2) -> "(int32_t)__p2"
def cast;
// bitcast - Same as "cast", except a reinterpret-cast is produced:
// (bitcast "T", $p0) -> "*(T*)&__p0".
// The VAL argument is saved to a temporary so it can be used
// as an l-value.
def bitcast;
// dup - Take a scalar argument and create a vector by duplicating it into
// all lanes. The type of the vector is the base type of the intrinsic.
// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
// is uint32x2_t).
def dup;
// splat - Take a vector and a lane index, and return a vector of the same type
// containing repeated instances of the source vector at the lane index.
// example: (splat $p0, $p1) ->
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
// (assuming __p0 has four elements).
def splat;
// save_temp - Create a temporary (local) variable. The variable takes a name
// based on the zero'th parameter and can be referenced using
// using that name in subsequent DAGs in the same
// operation. The scope of a temp is the operation. If a variable
// with the given name already exists, an error will be given at
// tblgen time.
// example: [(save_temp $var, (call "foo", $p0)),
// (op "+", $var, $p1)] ->
// "int32x2_t __var = foo(__p0); return __var + __p1;"
def save_temp;
// name_replace - Return the name of the current intrinsic with the first
// argument replaced by the second argument. Raises an error if
// the first argument does not exist in the intrinsic name.
// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
// version of this intrinsic).
def name_replace;
// literal - Create a literal piece of code. The code is treated as a raw
// string, and must be given a type. The type is a stdint.h or
// NEON intrinsic type as given to (cast).
// example: (literal "int32_t", "0")
def literal;
// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
// The MASK argument is a set of elements. The elements are generated
// from the two special defs "mask0" and "mask1". "mask0" expands to
// the lane indices in sequence for ARG0, and "mask1" expands to
// the lane indices in sequence for ARG1. They can be used as-is, e.g.
//
// (shuffle $p0, $p1, mask0) -> $p0
// (shuffle $p0, $p1, mask1) -> $p1
//
// or, more usefully, they can be manipulated using the SetTheory
// operators plus some extra operators defined in the NEON emitter.
// The operators are described below.
// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
// A concatenation of the high halves of the input vectors.
def shuffle;

// add, interleave, decimate: These set operators are vanilla SetTheory
// operators and take their normal definition.
def add;
def interleave;
def decimate;
// rotl - Rotate set left by a number of elements.
// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
def rotl;
// rotl - Rotate set right by a number of elements.
// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
def rotr;
// highhalf - Take only the high half of the input.
// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
def highhalf;
// highhalf - Take only the low half of the input.
// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
def lowhalf;
// rev - Perform a variable-width reversal of the elements. The zero'th argument
// is a width in bits to reverse. The lanes this maps to is determined
// based on the element width of the underlying type.
// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
def rev;
// mask0 - The initial sequence of lanes for shuffle ARG0
def mask0 : MaskExpand;
// mask0 - The initial sequence of lanes for shuffle ARG1
def mask1 : MaskExpand;

def OP_NONE : Operation;
def OP_UNAVAILABLE : Operation {
let Unavailable = 1;
}

//===----------------------------------------------------------------------===//
// Instruction definitions
//===----------------------------------------------------------------------===//

// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
// a sequence of typespecs.
//
// The name is the base name of the intrinsic, for example "vget_lane". This is
// then mangled by the tblgen backend to add type information ("vget_lane_s16").
//
// A typespec is a sequence of uppercase characters (modifiers) followed by one
// lowercase character. A typespec encodes a particular "base type" of the
// intrinsic.
//
// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
// typespec codes are given below.
//
// The string given to an Inst class is a sequence of typespecs. The intrinsic
// is instantiated for every typespec in the sequence. For example "sdQsQd".
//
// The prototype is a string that defines the return type of the intrinsic
// and the type of each argument. The return type and every argument gets a
// "modifier" that can change in some way the "base type" of the intrinsic.
//
// The modifier 'd' means "default" and does not modify the base type in any
// way. The available modifiers are given below.
//
// Typespecs
// ---------
// c: char
// s: short
// i: int
// l: long
// k: 128-bit long
// f: float
// h: half-float
// d: double
//
// Typespec modifiers
// ------------------
// S: scalar, only used for function mangling.
// U: unsigned
// Q: 128b
// H: 128b without mangling 'q'
// P: polynomial
//
// Prototype modifiers
// -------------------
// prototype: return (arg, arg, ...)
//
// v: void
// t: best-fit integer (int/poly args)
// x: signed integer (int/float args)
// u: unsigned integer (int/float args)
// f: float (int args)
// F: double (int args)
// H: half (int args)
// d: default
// g: default, ignore 'Q' size modifier.
// j: default, force 'Q' size modifier.
// w: double width elements, same num elts
// n: double width elements, half num elts
// h: half width elements, double num elts
// q: half width elements, quad num elts
// e: half width elements, double num elts, unsigned
// m: half width elements, same num elts
// i: constant int
// l: constant uint64
// s: scalar of element type
// z: scalar of half width element type, signed
// r: scalar of double width element type, signed
// a: scalar of element type (splat to vector type)
// b: scalar of unsigned integer/long type (int/float args)
// $: scalar of signed integer/long type (int/float args)
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
// 2,3,4: array of default vectors
// B,C,D: array of default elts, force 'Q' size modifier.
// p: pointer type
// c: const pointer type

// Every intrinsic subclasses Inst.
class Inst <string n, string p, string t, Operation o> {
string Name = n;
string Prototype = p;
string Types = t;
string ArchGuard = "";

Operation Operation = o;
bit CartesianProductOfTypes = 0;
bit BigEndianSafe = 0;
bit isShift = 0;
bit isScalarShift = 0;
bit isScalarNarrowShift = 0;
bit isVCVT_N = 0;
// For immediate checks: the immediate will be assumed to specify the lane of
// a Q register. Only used for intrinsics which end up calling polymorphic
// builtins.
bit isLaneQ = 0;

// Certain intrinsics have different names than their representative
// instructions. This field allows us to handle this correctly when we
// are generating tests.
string InstName = "";

// Certain intrinsics even though they are not a WOpInst or LOpInst,
// generate a WOpInst/LOpInst instruction (see below for definition
// of a WOpInst/LOpInst). For testing purposes we need to know
// this. Ex: vset_lane which outputs vmov instructions.
bit isHiddenWInst = 0;
bit isHiddenLInst = 0;
}

// The following instruction classes are implemented via builtins.
// These declarations are used to generate Builtins.def:
//
// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
// IInst: Instruction with generic integer suffix (e.g., "i8")
// WInst: Instruction with only bit size suffix (e.g., "8")
class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}

// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
// the purpose of generating tests.
//
// SOpInst: Instruction with signed/unsigned suffix (e.g., "s8",
// "u8", "p8").
// IOpInst: Instruction with generic integer suffix (e.g., "i8").
// WOpInst: Instruction with bit size only suffix (e.g., "8").
// LOpInst: Logical instruction with no bit size suffix.
// NoTestOpInst: Intrinsic that has no corresponding instruction.
class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,

if ((FPU & NeonMode) && HasFullFP16)
Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1");
if (HasFullFP16)
Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1");

switch (ArchKind) {
default:
Expand Down
217 changes: 215 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,12 @@ set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)

# Generate arm_neon.h
clang_tablegen(arm_neon.h -gen-arm-neon
-I ${CLANG_SOURCE_DIR}/include/clang/Basic/
SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_neon.td)
# Generate arm_fp16.h
clang_tablegen(arm_fp16.h -gen-arm-fp16
-I ${CLANG_SOURCE_DIR}/include/clang/Basic/
SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_fp16.td)

set(out_files)
foreach( f ${files} ${cuda_wrapper_files} )
Expand All @@ -134,6 +139,11 @@ add_custom_command(OUTPUT ${output_dir}/arm_neon.h
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_neon.h ${output_dir}/arm_neon.h
COMMENT "Copying clang's arm_neon.h...")
list(APPEND out_files ${output_dir}/arm_neon.h)
add_custom_command(OUTPUT ${output_dir}/arm_fp16.h
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h ${output_dir}/arm_fp16.h
COMMENT "Copying clang's arm_fp16.h...")
list(APPEND out_files ${output_dir}/arm_fp16.h)

add_custom_target(clang-headers ALL DEPENDS ${out_files})
set_target_properties(clang-headers PROPERTIES FOLDER "Misc")
Expand All @@ -144,6 +154,12 @@ install(
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)

install(
FILES ${files} ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
COMPONENT clang-headers
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)

install(
FILES ${cuda_wrapper_files}
COMPONENT clang-headers
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ module _Builtin_intrinsics [system] [extern_c] {
explicit module neon {
requires neon
header "arm_neon.h"
header "arm_fp16.h"
export *
}
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
#define GET_NEON_OVERLOAD_CHECK
#include "clang/Basic/arm_neon.inc"
#include "clang/Basic/arm_fp16.inc"
#undef GET_NEON_OVERLOAD_CHECK
}

Expand Down Expand Up @@ -1404,6 +1405,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return false;
#define GET_NEON_IMMEDIATE_CHECK
#include "clang/Basic/arm_neon.inc"
#include "clang/Basic/arm_fp16.inc"
#undef GET_NEON_IMMEDIATE_CHECK
}

Expand Down
643 changes: 643 additions & 0 deletions clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c

Large diffs are not rendered by default.

148 changes: 147 additions & 1 deletion clang/utils/TableGen/NeonEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,11 @@ class NeonEmitter {
// run - Emit arm_neon.h.inc
void run(raw_ostream &o);

// runFP16 - Emit arm_fp16.h.inc
void runFP16(raw_ostream &o);

// runHeader - Emit all the __builtin prototypes used in arm_neon.h
// and arm_fp16.h
void runHeader(raw_ostream &o);

// runTests - Emit tests for all the Neon intrinsics.
Expand Down Expand Up @@ -852,6 +856,35 @@ void Type::applyModifier(char Mod) {
NumVectors = 0;
Float = true;
break;
case 'Y':
Bitwidth = ElementBitwidth = 16;
NumVectors = 0;
Float = true;
break;
case 'I':
Bitwidth = ElementBitwidth = 32;
NumVectors = 0;
Float = false;
Signed = true;
break;
case 'L':
Bitwidth = ElementBitwidth = 64;
NumVectors = 0;
Float = false;
Signed = true;
break;
case 'U':
Bitwidth = ElementBitwidth = 32;
NumVectors = 0;
Float = false;
Signed = false;
break;
case 'O':
Bitwidth = ElementBitwidth = 64;
NumVectors = 0;
Float = false;
Signed = false;
break;
case 'f':
Float = true;
ElementBitwidth = 32;
Expand Down Expand Up @@ -1010,7 +1043,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
}

static bool isFloatingPointProtoModifier(char Mod) {
return Mod == 'F' || Mod == 'f' || Mod == 'H';
return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
}

std::string Intrinsic::getBuiltinTypeStr() {
Expand Down Expand Up @@ -2420,12 +2453,125 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#endif /* __ARM_NEON_H */\n";
}

/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::runFP16(raw_ostream &OS) {
OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
"------------------------------"
"---===\n"
" *\n"
" * Permission is hereby granted, free of charge, to any person "
"obtaining a copy\n"
" * of this software and associated documentation files (the "
"\"Software\"), to deal\n"
" * in the Software without restriction, including without limitation "
"the rights\n"
" * to use, copy, modify, merge, publish, distribute, sublicense, "
"and/or sell\n"
" * copies of the Software, and to permit persons to whom the Software "
"is\n"
" * furnished to do so, subject to the following conditions:\n"
" *\n"
" * The above copyright notice and this permission notice shall be "
"included in\n"
" * all copies or substantial portions of the Software.\n"
" *\n"
" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
"EXPRESS OR\n"
" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
"MERCHANTABILITY,\n"
" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
"SHALL THE\n"
" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
"OTHER\n"
" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
"ARISING FROM,\n"
" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
"DEALINGS IN\n"
" * THE SOFTWARE.\n"
" *\n"
" *===-----------------------------------------------------------------"
"---"
"---===\n"
" */\n\n";

OS << "#ifndef __ARM_FP16_H\n";
OS << "#define __ARM_FP16_H\n\n";

OS << "#include <stdint.h>\n\n";

OS << "typedef __fp16 float16_t;\n";

OS << "#define __ai static inline __attribute__((__always_inline__, "
"__nodebug__))\n\n";

SmallVector<Intrinsic *, 128> Defs;
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
for (auto *R : RV)
createIntrinsic(R, Defs);

for (auto *I : Defs)
I->indexBody();

std::stable_sort(
Defs.begin(), Defs.end(),
[](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

// Only emit a def when its requirements have been met.
// FIXME: This loop could be made faster, but it's fast enough for now.
bool MadeProgress = true;
std::string InGuard;
while (!Defs.empty() && MadeProgress) {
MadeProgress = false;

for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
I != Defs.end(); /*No step*/) {
bool DependenciesSatisfied = true;
for (auto *II : (*I)->getDependencies()) {
if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
DependenciesSatisfied = false;
}
if (!DependenciesSatisfied) {
// Try the next one.
++I;
continue;
}

// Emit #endif/#if pair if needed.
if ((*I)->getGuard() != InGuard) {
if (!InGuard.empty())
OS << "#endif\n";
InGuard = (*I)->getGuard();
if (!InGuard.empty())
OS << "#if " << InGuard << "\n";
}

// Actually generate the intrinsic code.
OS << (*I)->generate();

MadeProgress = true;
I = Defs.erase(I);
}
}
assert(Defs.empty() && "Some requirements were not satisfied!");
if (!InGuard.empty())
OS << "#endif\n";

OS << "\n";
OS << "#undef __ai\n\n";
OS << "#endif /* __ARM_FP16_H */\n";
}

namespace clang {

void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).run(OS);
}

void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runFP16(OS);
}

void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runHeader(OS);
}
Expand Down
5 changes: 5 additions & 0 deletions clang/utils/TableGen/TableGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ enum ActionType {
GenClangCommentCommandInfo,
GenClangCommentCommandList,
GenArmNeon,
GenArmFP16,
GenArmNeonSema,
GenArmNeonTest,
GenAttrDocs,
Expand Down Expand Up @@ -139,6 +140,7 @@ cl::opt<ActionType> Action(
"Generate list of commands that are used in "
"documentation comments"),
clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
"Generate ARM NEON sema support for clang"),
clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
Expand Down Expand Up @@ -250,6 +252,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmNeon:
EmitNeon(Records, OS);
break;
case GenArmFP16:
EmitFP16(Records, OS);
break;
case GenArmNeonSema:
EmitNeonSema(Records, OS);
break;
Expand Down
1 change: 1 addition & 0 deletions clang/utils/TableGen/TableGenBackends.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS);
void EmitClangCommentCommandList(RecordKeeper &Records, raw_ostream &OS);

void EmitNeon(RecordKeeper &Records, raw_ostream &OS);
void EmitFP16(RecordKeeper &Records, raw_ostream &OS);
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS);
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS);
void EmitNeon2(RecordKeeper &Records, raw_ostream &OS);
Expand Down