Skip to content

Commit

Permalink
[ARM] Add ARMv8.2-A FP16 vector instructions
Browse files Browse the repository at this point in the history
ARMv8.2-A adds 16-bit floating point versions of all existing SIMD
floating-point instructions. This is an optional extension, so all of
these instructions require the FeatureFullFP16 subtarget feature.

Note that VFP without SIMD is not a valid combination for any version of
ARMv8-A, but I have ensured that these instructions all depend on both
FeatureNEON and FeatureFullFP16 for consistency.

Differential Revision: http://reviews.llvm.org/D15039

llvm-svn: 255764
  • Loading branch information
ostannard committed Dec 16, 2015
1 parent 25e241b commit 2de8c16
Show file tree
Hide file tree
Showing 10 changed files with 2,289 additions and 42 deletions.
384 changes: 356 additions & 28 deletions llvm/lib/Target/ARM/ARMInstrNEON.td

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/lib/Target/ARM/ARMRegisterInfo.td
Expand Up @@ -288,7 +288,7 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// class.
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(sequence "D%u", 0, 31)> {
// Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
// Darwin platforms.
Expand All @@ -301,16 +301,16 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,

// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 16)>;

// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 8)>;

// Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Expand Up @@ -5643,9 +5643,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
(static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
(static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
RegIdx = 4;

if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
Expand Down
74 changes: 66 additions & 8 deletions llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Expand Up @@ -5073,6 +5073,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,

static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];

unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
Expand All @@ -5083,10 +5087,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,

DecodeStatus S = MCDisassembler::Success;

// VMOVv2f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv2f32);
// If the top 3 bits of imm are clear, this is a VMOV (immediate)
if (!(imm & 0x38)) {
if (cmode == 0xF) {
if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv2f32);
}
if (hasFullFP16) {
if (cmode == 0xE) {
if (op == 1) {
Inst.setOpcode(ARM::VMOVv1i64);
} else {
Inst.setOpcode(ARM::VMOVv8i8);
}
}
if (cmode == 0xD) {
if (op == 1) {
Inst.setOpcode(ARM::VMVNv2i32);
} else {
Inst.setOpcode(ARM::VMOVv2i32);
}
}
if (cmode == 0xC) {
if (op == 1) {
Inst.setOpcode(ARM::VMVNv2i32);
} else {
Inst.setOpcode(ARM::VMOVv2i32);
}
}
}
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}

Expand All @@ -5103,6 +5132,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,

static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];

unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
Expand All @@ -5113,10 +5146,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,

DecodeStatus S = MCDisassembler::Success;

// VMOVv4f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv4f32);
// If the top 3 bits of imm are clear, this is a VMOV (immediate)
if (!(imm & 0x38)) {
if (cmode == 0xF) {
if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv4f32);
}
if (hasFullFP16) {
if (cmode == 0xE) {
if (op == 1) {
Inst.setOpcode(ARM::VMOVv2i64);
} else {
Inst.setOpcode(ARM::VMOVv16i8);
}
}
if (cmode == 0xD) {
if (op == 1) {
Inst.setOpcode(ARM::VMVNv4i32);
} else {
Inst.setOpcode(ARM::VMOVv4i32);
}
}
if (cmode == 0xC) {
if (op == 1) {
Inst.setOpcode(ARM::VMVNv4i32);
} else {
Inst.setOpcode(ARM::VMOVv4i32);
}
}
}
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}

Expand Down

0 comments on commit 2de8c16

Please sign in to comment.