Skip to content

Commit

Permalink
[AArch64] v8.3-a complex number support
Browse files Browse the repository at this point in the history
New instructions are added to AArch32 and AArch64 to aid
floating-point multiplication and addition of complex numbers,
where the complex numbers are packed in a vector register as a
pair of elements. The Imaginary part of the number is placed in the
more significant element, and the Real part of the number is placed
in the less significant element.

Differential Revision: https://reviews.llvm.org/D36792

llvm-svn: 312228
  • Loading branch information
sparker-arm committed Aug 31, 2017
1 parent e15300e commit 5f93464
Show file tree
Hide file tree
Showing 7 changed files with 531 additions and 0 deletions.
232 changes: 232 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Expand Up @@ -9391,6 +9391,238 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
}
} // let Predicates = [HasNeon, HasRDM]

//----------------------------------------------------------------------------
// ARMv8.3 Complex ADD/MLA instructions
//----------------------------------------------------------------------------

class ComplexRotationOperand<int Angle, int Remainder, string Type>
: AsmOperandClass {
let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
let DiagnosticType = "InvalidComplexRotation" # Type;
let Name = "ComplexRotation" # Type;
}
def complexrotateop : Operand<i32> {
let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
let PrintMethod = "printComplexRotationOp<90, 0>";
}
def complexrotateopodd : Operand<i32> {
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
let PrintMethod = "printComplexRotationOp<180, 90>";
}

let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
RegisterOperand regtype, Operand rottype,
string asm, string kind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
bits<1> rot;
let Inst{31} = 0;
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21} = 0;
let Inst{20-16} = Rm;
let Inst{15-13} = opcode;
// Non-tied version (FCADD) only has one rotation bit
let Inst{12} = rot;
let Inst{11} = 0;
let Inst{10} = 1;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
string asm, SDPatternOperator OpNode>{
let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
(rottype i32:$rot)))]>;

def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
(rottype i32:$rot)))]>;
}

let Predicates = [HasV8_3a, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
(rottype i32:$rot)))]>;

def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
(rottype i32:$rot)))]>;

def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
(rottype i32:$rot)))]>;
}
}

let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
bits<3> opcode,
RegisterOperand regtype,
Operand rottype, string asm,
string kind, list<dag> pattern>
: I<(outs regtype:$dst),
(ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
bits<2> rot;
let Inst{31} = 0;
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21} = 0;
let Inst{20-16} = Rm;
let Inst{15-13} = opcode;
let Inst{12-11} = rot;
let Inst{10} = 1;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
Operand rottype, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
rottype, asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
(rottype i32:$rot)))]>;

def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
rottype, asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
(rottype i32:$rot)))]>;
}

let Predicates = [HasV8_3a, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
rottype, asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
(rottype i32:$rot)))]>;

def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
rottype, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
(rottype i32:$rot)))]>;

def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
rottype, asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
(rottype i32:$rot)))]>;
}
}

let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
bit opc1, bit opc2, RegisterOperand dst_reg,
RegisterOperand lhs_reg,
RegisterOperand rhs_reg, Operand vec_idx,
Operand rottype, string asm, string apple_kind,
string dst_kind, string lhs_kind,
string rhs_kind, list<dag> pattern>
: I<(outs dst_reg:$dst),
(ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot),
asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
"$idx, $rot" # "|" # apple_kind #
"\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
bits<2> rot;

let Inst{31} = 0;
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28} = Scalar;
let Inst{27-24} = 0b1111;
let Inst{23-22} = size;
// Bit 21 must be set by the derived class.
let Inst{20-16} = Rm;
let Inst{15} = opc1;
let Inst{14-13} = rot;
let Inst{12} = opc2;
// Bit 11 must be set by the derived class.
let Inst{10} = 0;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}

// The complex instructions index by pairs of elements, so the VectorIndexes
// don't match the lane types, and the index bits are different to the other
// classes.
multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
string asm, SDPatternOperator OpNode> {
let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
".4h", ".h", []> {
bits<1> idx;
let Inst{11} = 0;
let Inst{21} = idx{0};
}

def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2,
V128, V128, V128, VectorIndexS, rottype, asm, ".8h",
".8h", ".8h", ".h", []> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
}
} // Predicates = [HasV8_3a,HasNEON,HasFullFP16]

let Predicates = [HasV8_3a,HasNEON] in {
def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
".4s", ".4s", ".s", []> {
bits<1> idx;
let Inst{11} = idx{0};
let Inst{21} = 0;
}
} // Predicates = [HasV8_3a,HasNEON]
}

//----------------------------------------------------------------------------
// Crypto extensions
//----------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -460,6 +460,15 @@ let Predicates = [HasRCPC] in {
def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
}

// v8.3a complex add and multiply-accumulate. No predicate here, that is done
// inside the multiclass as the FP16 versions need different predicates.
defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
"fcmla", null_frag>;
defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
"fcadd", null_frag>;
defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
null_frag>;

let Predicates = [HasV8_3a] in {
// v8.3a Pointer Authentication
let Uses = [LR], Defs = [LR] in {
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Expand Up @@ -835,6 +835,17 @@ class AArch64Operand : public MCParsedAsmOperand {
AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
}

template<int64_t Angle, int64_t Remainder>
bool isComplexRotation() const {
if (!isImm()) return false;

const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
uint64_t Value = CE->getValue();

return (Value % Angle == Remainder && Value <= 270);
}

/// Is this a vector list with the type implicit (presumably attached to the
/// instruction itself)?
template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
Expand Down Expand Up @@ -1527,6 +1538,18 @@ class AArch64Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff));
}

void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(MCE->getValue() / 90));
}

void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm((MCE->getValue() - 90) / 180));
}

void print(raw_ostream &OS) const override;

static std::unique_ptr<AArch64Operand>
Expand Down Expand Up @@ -3402,6 +3425,10 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "expected readable system register");
case Match_MSR:
return Error(Loc, "expected writable system register or pstate");
case Match_InvalidComplexRotationEven:
return Error(Loc, "complex rotation must be 0, 90, 180 or 270.");
case Match_InvalidComplexRotationOdd:
return Error(Loc, "complex rotation must be 90 or 270.");
case Match_MnemonicFail: {
std::string Suggestion = AArch64MnemonicSpellCheck(
((AArch64Operand &)*Operands[0]).getToken(),
Expand Down Expand Up @@ -3802,6 +3829,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidIndexS:
case Match_InvalidIndexD:
case Match_InvalidLabel:
case Match_InvalidComplexRotationEven:
case Match_InvalidComplexRotationOdd:
case Match_MSR:
case Match_MRS: {
if (ErrorInfo >= Operands.size())
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
Expand Up @@ -1331,3 +1331,12 @@ void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
O << format("#%#016llx", Val);
}

template<int64_t Angle, int64_t Remainder>
void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
O << "#" << (Val * Angle) + Remainder;
}

3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
Expand Up @@ -158,6 +158,9 @@ class AArch64InstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O);
void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
template<int64_t Angle, int64_t Remainder>
void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
template<unsigned size>
void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
Expand Down

0 comments on commit 5f93464

Please sign in to comment.