diff --git a/llvm/include/llvm/MC/MCDecoder.h b/llvm/include/llvm/MC/MCDecoder.h index 70762a4a5ebae..459c8a6a5ea34 100644 --- a/llvm/include/llvm/MC/MCDecoder.h +++ b/llvm/include/llvm/MC/MCDecoder.h @@ -12,6 +12,7 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/Support/MathExtras.h" +#include #include namespace llvm::MCD { @@ -48,6 +49,15 @@ fieldFromInstruction(const InsnType &Insn, unsigned StartBit, return Insn.extractBitsAsZExtValue(NumBits, StartBit); } +template +uint64_t fieldFromInstruction(const std::bitset &Insn, unsigned StartBit, + unsigned NumBits) { + assert(StartBit + NumBits <= N && "Instruction field out of bounds!"); + assert(NumBits <= 64 && "Cannot support >64-bit extractions!"); + const std::bitset Mask(maskTrailingOnes(NumBits)); + return ((Insn >> StartBit) & Mask).to_ullong(); +} + // Helper function for inserting bits extracted from an encoded instruction into // an integer-typed field. template @@ -62,6 +72,13 @@ insertBits(IntType &field, IntType bits, unsigned startBit, unsigned numBits) { field |= bits << startBit; } +// InsnBitWidth is essentially a type trait used by the decoder emitter to query +// the supported bitwidth for a given type. But default, the value is 0, making +// it an invalid type for use as `InsnType` when instantiating the decoder. +// Individual targets are expected to provide specializations for these based +// on their usage. +template static constexpr uint32_t InsnBitWidth = 0; + } // namespace llvm::MCD #endif // LLVM_MC_MCDECODER_H diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 619ff4e5c73c4..05295ae73be23 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -6,7 +6,8 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler + --specialize-decoders-per-bitwidth) tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info) tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 6a2beeed41dfd..80d194afa926b 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Compiler.h" using namespace llvm; +using namespace llvm::MCD; #define DEBUG_TYPE "amdgpu-disassembler" @@ -446,6 +447,14 @@ static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, #include "AMDGPUGenDisassemblerTables.inc" +// Define bitwidths for various types used to instantiate the decoder. +template <> static constexpr uint32_t llvm::MCD::InsnBitWidth = 32; +template <> static constexpr uint32_t llvm::MCD::InsnBitWidth = 64; +template <> +static constexpr uint32_t llvm::MCD::InsnBitWidth> = 96; +template <> +static constexpr uint32_t llvm::MCD::InsnBitWidth> = 128; + //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// @@ -498,26 +507,24 @@ template static inline T eatBytes(ArrayRef& Bytes) { return Res; } -static inline DecoderUInt128 eat12Bytes(ArrayRef &Bytes) { +static inline std::bitset<96> eat12Bytes(ArrayRef &Bytes) { + using namespace llvm::support::endian; assert(Bytes.size() >= 12); - uint64_t Lo = - support::endian::read(Bytes.data()); + std::bitset<96> Lo(read(Bytes.data())); Bytes = Bytes.slice(8); - uint64_t Hi = - support::endian::read(Bytes.data()); + std::bitset<96> Hi(read(Bytes.data())); Bytes = Bytes.slice(4); - return DecoderUInt128(Lo, Hi); + return (Hi << 64) | Lo; } -static inline DecoderUInt128 eat16Bytes(ArrayRef &Bytes) { +static inline std::bitset<128> eat16Bytes(ArrayRef &Bytes) { + using namespace llvm::support::endian; assert(Bytes.size() >= 16); - uint64_t Lo = - support::endian::read(Bytes.data()); + std::bitset<128> Lo(read(Bytes.data())); Bytes = Bytes.slice(8); - uint64_t Hi = - support::endian::read(Bytes.data()); + std::bitset<128> Hi(read(Bytes.data())); Bytes = Bytes.slice(8); - return DecoderUInt128(Lo, Hi); + return (Hi << 64) | Lo; } void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, @@ -600,14 +607,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 // encodings if (isGFX1250() && Bytes.size() >= 16) { - DecoderUInt128 DecW = eat16Bytes(Bytes); + std::bitset<128> DecW = eat16Bytes(Bytes); if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS)) break; Bytes = Bytes_.slice(0, MaxInstBytesNum); } if (isGFX11Plus() && Bytes.size() >= 12) { - DecoderUInt128 DecW = eat12Bytes(Bytes); + std::bitset<96> DecW = eat12Bytes(Bytes); if (isGFX11() && tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI, @@ -642,7 +649,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } else if (Bytes.size() >= 16 && STI.hasFeature(AMDGPU::FeatureGFX950Insts)) { - DecoderUInt128 DecW = eat16Bytes(Bytes); + std::bitset<128> DecW = eat16Bytes(Bytes); if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS)) break; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f4d164bf10c3c..ded447b6f8d5a 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -32,44 +32,6 @@ class MCOperand; class MCSubtargetInfo; class Twine; -// Exposes an interface expected by autogenerated code in -// FixedLenDecoderEmitter -class DecoderUInt128 { -private: - uint64_t Lo = 0; - uint64_t Hi = 0; - -public: - DecoderUInt128() = default; - DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {} - operator bool() const { return Lo || Hi; } - uint64_t extractBitsAsZExtValue(unsigned NumBits, - unsigned BitPosition) const { - assert(NumBits && NumBits <= 64); - assert(BitPosition < 128); - uint64_t Val; - if (BitPosition < 64) - Val = Lo >> BitPosition | Hi << 1 << (63 - BitPosition); - else - Val = Hi >> (BitPosition - 64); - return Val & ((uint64_t(2) << (NumBits - 1)) - 1); - } - DecoderUInt128 operator&(const DecoderUInt128 &RHS) const { - return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi); - } - DecoderUInt128 operator&(const uint64_t &RHS) const { - return *this & DecoderUInt128(RHS); - } - DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); } - bool operator==(const DecoderUInt128 &RHS) { - return Lo == RHS.Lo && Hi == RHS.Hi; - } - bool operator!=(const DecoderUInt128 &RHS) { - return Lo != RHS.Lo || Hi != RHS.Hi; - } - bool operator!=(const int &RHS) { return *this != DecoderUInt128(RHS); } -}; - //===----------------------------------------------------------------------===// // AMDGPUDisassembler //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 47329b2c2f4d2..531238ae85029 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -7,7 +7,8 @@ tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter) tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred) tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler + --specialize-decoders-per-bitwidth) tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info) tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index de1bdb4a8811c..c8b89f5192c3d 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -558,7 +558,7 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm, return decodeZcmpRlist(Inst, Imm, Address, Decoder); } -static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint16_t Insn, uint64_t Address, const MCDisassembler *Decoder) { uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5); @@ -701,6 +701,12 @@ static constexpr DecoderListEntry DecoderList32[]{ {DecoderTableZdinxRV32Only32, {}, "RV32-only Zdinx (Double in Integer)"}, }; +// Define bitwidths for various types used to instantiate the decoder. +template <> static constexpr uint32_t llvm::MCD::InsnBitWidth = 16; +template <> static constexpr uint32_t llvm::MCD::InsnBitWidth = 32; +// Use uint64_t to represent 48 bit instructions. +template <> static constexpr uint32_t llvm::MCD::InsnBitWidth = 48; + DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, @@ -711,9 +717,7 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, } Size = 4; - // Use uint64_t to match getInstruction48. decodeInstruction is templated - // on the Insn type. - uint64_t Insn = support::endian::read32le(Bytes.data()); + uint32_t Insn = support::endian::read32le(Bytes.data()); for (const DecoderListEntry &Entry : DecoderList32) { if (!Entry.haveContainedFeatures(STI.getFeatureBits())) @@ -759,9 +763,7 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, } Size = 2; - // Use uint64_t to match getInstruction48. decodeInstruction is templated - // on the Insn type. - uint64_t Insn = support::endian::read16le(Bytes.data()); + uint16_t Insn = support::endian::read16le(Bytes.data()); for (const DecoderListEntry &Entry : DecoderList16) { if (!Entry.haveContainedFeatures(STI.getFeatureBits())) diff --git a/llvm/test/TableGen/DecoderEmitterBitwidthSpecialization.td b/llvm/test/TableGen/DecoderEmitterBitwidthSpecialization.td new file mode 100644 index 0000000000000..b4142e983ef77 --- /dev/null +++ b/llvm/test/TableGen/DecoderEmitterBitwidthSpecialization.td @@ -0,0 +1,175 @@ +// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: llvm-tblgen -gen-disassembler -specialize-decoders-per-bitwidth -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-SPECIALIZE-NO-TABLE +// RUN: llvm-tblgen -gen-disassembler -specialize-decoders-per-bitwidth -use-fn-table-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-SPECIALIZE-TABLE + + +include "llvm/Target/Target.td" + +def archInstrInfo : InstrInfo { } + +def arch : Target { + let InstructionSet = archInstrInfo; +} + +let Namespace = "arch" in { + def R0 : Register<"r0">; + def R1 : Register<"r1">; + def R2 : Register<"r2">; + def R3 : Register<"r3">; +} +def Regs : RegisterClass<"Regs", [i32], 32, (add R0, R1, R2, R3)>; + +// Bit 0 of the encoding determines the size (8 or 16 bits). +// Bits {3..1} define the number of operands encoded. +class Instruction8Bit : Instruction { + let Size = 1; + let OutOperandList = (outs); + field bits<8> Inst; + let Inst{0} = 0; + let Inst{3-1} = NumOps; +} + +class Instruction16Bit : Instruction { + let Size = 2; + let OutOperandList = (outs); + field bits<16> Inst; + let Inst{0} = 1; + let Inst{3-1} = NumOps; +} + +// Define instructions to generate 4 cases in decodeToMCInst. +// Each register operand needs 2 bits to encode. + +// An instruction with no inputs. +def Inst0 : Instruction8Bit<0> { + let Inst{7-4} = 0; + let InOperandList = (ins); + let AsmString = "Inst0"; +} + +// An instruction with a single input. +def Inst1 : Instruction8Bit<1> { + bits<2> r0; + let Inst{5-4} = r0; + let Inst{7-6} = 0; + let InOperandList = (ins Regs:$r0); + let AsmString = "Inst1"; +} + +// An instruction with two inputs. +def Inst2 : Instruction16Bit<2> { + bits<2> r0; + bits<2> r1; + let Inst{5-4} = r0; + let Inst{7-6} = r1; + let Inst{15-8} = 0; + let InOperandList = (ins Regs:$r0, Regs:$r1); + let AsmString = "Inst2"; +} + +// An instruction with three inputs. . +def Inst3 : Instruction16Bit<3> { + bits<2> r0; + bits<2> r1; + bits<2> r2; + let Inst{5-4} = r0; + let Inst{7-6} = r1; + let Inst{9-8} = r2; + let Inst{15-10} = 0; + let InOperandList = (ins Regs:$r0, Regs:$r1, Regs:$r2); + let AsmString = "Inst3"; +} + +// ----------------------------------------------------------------------------- +// In the default case, we emit a single decodeToMCinst function and DecodeIdx +// is shared across all bitwidths. + +// CHECK-DEFAULT-LABEL: DecoderTable8[25] +// CHECK-DEFAULT: DecodeIdx: 0 +// CHECK-DEFAULT: DecodeIdx: 1 +// CHECK-DEFAULT: }; + +// CHECK-DEFAULT-LABEL: DecoderTable16[25] +// CHECK-DEFAULT: DecodeIdx: 2 +// CHECK-DEFAULT: DecodeIdx: 3 +// CHECK-DEFAULT: }; + +// CHECK-DEFAULT-LABEL: template +// CHECK-DEFAULT-NEXT: static DecodeStatus decodeToMCInst +// CHECK-DEFAULT: case 0 +// CHECK-DEFAULT: case 1 +// CHECK-DEFAULT: case 2 +// CHECK-DEFAULT: case 3 + +// ----------------------------------------------------------------------------- +// When we specialize per bitwidth, we emit 2 decodeToMCInst functions and +// DecodeIdx is assigned per bit width. + +// CHECK-SPECIALIZE-NO-TABLE-LABEL: DecoderTable8[25] +// CHECK-SPECIALIZE-NO-TABLE: DecodeIdx: 0 +// CHECK-SPECIALIZE-NO-TABLE: DecodeIdx: 1 +// CHECK-SPECIALIZE-NO-TABLE: }; + +// CHECK-SPECIALIZE-NO-TABLE-LABEL: template +// CHECK-SPECIALIZE-NO-TABLE-NEXT: static std::enable_if_t == 8, DecodeStatus> +// CHECK-SPECIALIZE-NO-TABLE-NEXT: decodeToMCInst +// CHECK-SPECIALIZE-NO-TABLE: case 0 +// CHECK-SPECIALIZE-NO-TABLE: case 1 + +// CHECK-SPECIALIZE-NO-TABLE-LABEL: DecoderTable16[25] +// CHECK-SPECIALIZE-NO-TABLE: DecodeIdx: 0 +// CHECK-SPECIALIZE-NO-TABLE: DecodeIdx: 1 +// CHECK-SPECIALIZE-NO-TABLE: }; + +// CHECK-SPECIALIZE-NO-TABLE-LABEL: template +// CHECK-SPECIALIZE-NO-TABLE-NEXT: static std::enable_if_t == 16, DecodeStatus> +// CHECK-SPECIALIZE-NO-TABLE-NEXT: decodeToMCInst +// CHECK-SPECIALIZE-NO-TABLE: case 0 +// CHECK-SPECIALIZE-NO-TABLE: case 1 + +// ----------------------------------------------------------------------------- +// Per bitwidth specialization with function table. + +// 8 bit deccoder table, functions, and function table. +// CHECK-SPECIALIZE-TABLE-LABEL: DecoderTable8[25] +// CHECK-SPECIALIZE-TABLE: DecodeIdx: 0 +// CHECK-SPECIALIZE-TABLE: DecodeIdx: 1 +// CHECK-SPECIALIZE-TABLE: }; + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 8, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_8bit_0 + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 8, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_8bit_1 + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 8, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeToMCInst +// CHECK-SPECIALIZE-TABLE-LABEL: static constexpr DecodeFnTy decodeFnTable[] = { +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_8bit_0, +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_8bit_1, +// CHECK-SPECIALIZE-TABLE-NEXT: }; + +// 16 bit deccoder table, functions, and function table. +// CHECK-SPECIALIZE-TABLE-LABEL: DecoderTable16[25] +// CHECK-SPECIALIZE-TABLE: DecodeIdx: 0 +// CHECK-SPECIALIZE-TABLE: DecodeIdx: 1 +// CHECK-SPECIALIZE-TABLE: }; + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 16, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_16bit_0 + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 16, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_16bit_1 + +// CHECK-SPECIALIZE-TABLE-LABEL: template +// CHECK-SPECIALIZE-TABLE-NEXT: static std::enable_if_t == 16, DecodeStatus> +// CHECK-SPECIALIZE-TABLE-NEXT: decodeToMCInst +// CHECK-SPECIALIZE-TABLE-LABEL: static constexpr DecodeFnTy decodeFnTable[] = { +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_16bit_0, +// CHECK-SPECIALIZE-TABLE-NEXT: decodeFn_16bit_1, +// CHECK-SPECIALIZE-TABLE-NEXT: }; diff --git a/llvm/test/TableGen/DecoderEmitterFnTable.td b/llvm/test/TableGen/DecoderEmitterFnTable.td index 7bed18c19a513..8929e6da716e6 100644 --- a/llvm/test/TableGen/DecoderEmitterFnTable.td +++ b/llvm/test/TableGen/DecoderEmitterFnTable.td @@ -71,14 +71,14 @@ def Inst3 : TestInstruction { let AsmString = "Inst3"; } -// CHECK-LABEL: DecodeStatus decodeFn0(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) -// CHECK-LABEL: DecodeStatus decodeFn1(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) -// CHECK-LABEL: DecodeStatus decodeFn2(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) -// CHECK-LABEL: DecodeStatus decodeFn3(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn_0(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn_1(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn_2(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn_3(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) // CHECK-LABEL: decodeToMCInst(unsigned Idx, DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) // CHECK: static constexpr DecodeFnTy decodeFnTable[] -// CHECK-NEXT: decodeFn0, -// CHECK-NEXT: decodeFn1, -// CHECK-NEXT: decodeFn2, -// CHECK-NEXT: decodeFn3, +// CHECK-NEXT: decodeFn_0, +// CHECK-NEXT: decodeFn_1, +// CHECK-NEXT: decodeFn_2, +// CHECK-NEXT: decodeFn_3, // CHECK: return decodeFnTable[Idx](S, insn, MI, Address, Decoder, DecodeComplete) diff --git a/llvm/test/TableGen/HwModeEncodeDecode3.td b/llvm/test/TableGen/HwModeEncodeDecode3.td index dbbf866f057e5..5e9ac7d17e45a 100644 --- a/llvm/test/TableGen/HwModeEncodeDecode3.td +++ b/llvm/test/TableGen/HwModeEncodeDecode3.td @@ -118,8 +118,6 @@ def unrelated: Instruction { // exact duplicates and could effectively be merged into one. // DECODER-LABEL: DecoderTable32 // DECODER-DAG: Opcode: bar -// DECODER-LABEL: DecoderTable64 -// DECODER-DAG: Opcode: fooTypeEncDefault:foo // DECODER-LABEL: DecoderTable_ModeA32 // DECODER-DAG: Opcode: fooTypeEncA:foo // DECODER-DAG: Opcode: bar @@ -138,13 +136,13 @@ def unrelated: Instruction { // DECODER-DAG: Opcode: unrelated // DECODER-LABEL: DecoderTableAlt_ModeC32 // DECODER-DAG: Opcode: unrelated +// DECODER-LABEL: DecoderTable64 +// DECODER-DAG: Opcode: fooTypeEncDefault:foo // Under the 'O1' optimization level, unnecessary duplicate tables will be eliminated, // reducing the four ‘Alt’ tables down to just one. // DECODER-SUPPRESS-O1-LABEL: DecoderTable32 // DECODER-SUPPRESS-O1-DAG: Opcode: bar -// DECODER-SUPPRESS-O1-LABEL: DecoderTable64 -// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncDefault:foo // DECODER-SUPPRESS-O1-LABEL: DecoderTable_ModeA32 // DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncA:foo // DECODER-SUPPRESS-O1-DAG: Opcode: bar @@ -157,6 +155,8 @@ def unrelated: Instruction { // DECODER-SUPPRESS-O1-DAG: Opcode: bar // DECODER-SUPPRESS-O1-LABEL: DecoderTableAlt32 // DECODER-SUPPRESS-O1-DAG: Opcode: unrelated +// DECODER-SUPPRESS-O1-LABEL: DecoderTable64 +// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncDefault:foo // Under the 'O2' optimization condition, instructions possessing the 'EncodingByHwMode' // attribute will be extracted from their original DecoderNamespace and placed into their @@ -166,9 +166,6 @@ def unrelated: Instruction { // consider the interplay between HwMode and DecoderNamespace for their instructions. // DECODER-SUPPRESS-O2-LABEL: DecoderTable32 // DECODER-SUPPRESS-O2-DAG: Opcode: bar -// DECODER-SUPPRESS-O2-LABEL: DecoderTable64 -// DECODER-SUPPRESS-O2-NOT: Opcode: bar -// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncDefault:foo // DECODER-SUPPRESS-O2-LABEL: DecoderTable_ModeA32 // DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncA:foo // DECODER-SUPPRESS-O2-NOT: Opcode: bar @@ -181,6 +178,9 @@ def unrelated: Instruction { // DECODER-SUPPRESS-O2-NOT: Opcode: bar // DECODER-SUPPRESS-O2-LABEL: DecoderTableAlt32 // DECODER-SUPPRESS-O2-DAG: Opcode: unrelated +// DECODER-SUPPRESS-O2-LABEL: DecoderTable64 +// DECODER-SUPPRESS-O2-NOT: Opcode: bar +// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncDefault:foo // For 'bar' and 'unrelated', we didn't assign any HwModes for them, // they should keep the same in the following four tables. diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td index 769c5895ec3c1..10e254f7673e6 100644 --- a/llvm/test/TableGen/VarLenDecoder.td +++ b/llvm/test/TableGen/VarLenDecoder.td @@ -47,6 +47,12 @@ def FOO32 : MyVarInst { ); } +// Instruction length table +// CHECK: InstrLenTable +// CHECK: 27, +// CHECK-NEXT: 43, +// CHECK-NEXT: }; + // CHECK-SMALL: /* 0 */ MCD::OPC_ExtractField, 3, 5, // Inst{7-3} ... // CHECK-SMALL-NEXT: /* 3 */ MCD::OPC_FilterValue, 8, 4, 0, // Skip to: 11 // CHECK-SMALL-NEXT: /* 7 */ MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 0, // Opcode: FOO16 @@ -61,11 +67,6 @@ def FOO32 : MyVarInst { // CHECK-LARGE-NEXT: /* 14 */ MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: FOO32 // CHECK-LARGE-NEXT: }; -// Instruction length table -// CHECK: 27, -// CHECK-NEXT: 43, -// CHECK-NEXT: }; - // CHECK: case 0: // CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3); // CHECK-NEXT: if (!Check(S, DecodeRegClassRegisterClass(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; } diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index e4992b9e9e725..354c2a788d5b1 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -93,6 +94,16 @@ static cl::opt UseFnTableInDecodeToMCInst( "of the generated code."), cl::init(false), cl::cat(DisassemblerEmitterCat)); +// Enabling this option requires use of different `InsnType` for different +// bitwidths and defining `InsnBitWidth` template specialization for the +// `InsnType` types used. Some common specializations are already defined in +// MCDecoder.h. +static cl::opt SpecializeDecodersPerBitwidth( + "specialize-decoders-per-bitwidth", + cl::desc("Specialize the generated `decodeToMCInst` function per bitwidth. " + "Helps reduce the code size."), + cl::init(false), cl::cat(DisassemblerEmitterCat)); + STATISTIC(NumEncodings, "Number of encodings considered"); STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); @@ -360,7 +371,8 @@ class DecoderEmitter { void emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates) const; void emitDecoderFunction(formatted_raw_ostream &OS, - DecoderSet &Decoders) const; + const DecoderSet &Decoders, + unsigned BucketBitWidth) const; // run - Output the code emitter void run(raw_ostream &o) const; @@ -930,7 +942,8 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, } void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, - DecoderSet &Decoders) const { + const DecoderSet &Decoders, + unsigned BucketBitWidth) const { // The decoder function is just a big switch statement or a table of function // pointers based on the input decoder index. @@ -944,12 +957,32 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const " "MCDisassembler *Decoder, bool &DecodeComplete"; + // Print the name of the decode function to OS. + auto PrintDecodeFnName = [&OS, BucketBitWidth](unsigned DecodeIdx) { + OS << "decodeFn"; + if (BucketBitWidth != 0) { + OS << '_' << BucketBitWidth << "bit"; + } + OS << '_' << DecodeIdx; + }; + + // Print the template statement. + auto PrintTemplate = [&OS, BucketBitWidth]() { + OS << "template \n"; + OS << "static "; + if (BucketBitWidth != 0) + OS << "std::enable_if_t == " << BucketBitWidth + << ", DecodeStatus>\n"; + else + OS << "DecodeStatus "; + }; + if (UseFnTableInDecodeToMCInst) { // Emit a function for each case first. for (const auto &[Index, Decoder] : enumerate(Decoders)) { - OS << "template \n"; - OS << "static DecodeStatus decodeFn" << Index << "(" << DecodeParams - << ") {\n"; + PrintTemplate(); + PrintDecodeFnName(Index); + OS << "(" << DecodeParams << ") {\n"; OS << " using namespace llvm::MCD;\n"; OS << " " << TmpTypeDecl; OS << " [[maybe_unused]] TmpType tmp;\n"; @@ -960,9 +993,8 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, } OS << "// Handling " << Decoders.size() << " cases.\n"; - OS << "template \n"; - OS << "static DecodeStatus decodeToMCInst(unsigned Idx, " << DecodeParams - << ") {\n"; + PrintTemplate(); + OS << "decodeToMCInst(unsigned Idx, " << DecodeParams << ") {\n"; OS << " using namespace llvm::MCD;\n"; OS << " DecodeComplete = true;\n"; @@ -970,12 +1002,14 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, // Build a table of function pointers OS << " using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n"; OS << " static constexpr DecodeFnTy decodeFnTable[] = {\n"; - for (size_t Index : llvm::seq(Decoders.size())) - OS << " decodeFn" << Index << ",\n"; + for (size_t Index : llvm::seq(Decoders.size())) { + OS << " "; + PrintDecodeFnName(Index); + OS << ",\n"; + } OS << " };\n"; OS << " if (Idx >= " << Decoders.size() << ")\n"; OS << " llvm_unreachable(\"Invalid decoder index!\");\n"; - OS << " return decodeFnTable[Idx](S, insn, MI, Address, Decoder, " "DecodeComplete);\n"; } else { @@ -2448,57 +2482,89 @@ namespace { )"; // Do extra bookkeeping for variable-length encodings. - std::vector InstrLen; bool IsVarLenInst = Target.hasVariableLengthEncodings(); unsigned MaxInstLen = 0; if (IsVarLenInst) { - InstrLen.resize(Target.getInstructions().size(), 0); + std::vector InstrLen(Target.getInstructions().size(), 0); for (const InstructionEncoding &Encoding : Encodings) { MaxInstLen = std::max(MaxInstLen, Encoding.getBitWidth()); InstrLen[Target.getInstrIntValue(Encoding.getInstruction()->TheDef)] = Encoding.getBitWidth(); } + + // For variable instruction, we emit an instruction length table to let the + // decoder know how long the instructions are. You can see example usage in + // M68k's disassembler. + emitInstrLenTable(OS, InstrLen); } - // Map of (namespace, hwmode, size) tuple to encoding IDs. - std::map, std::vector> - EncMap; + // Map of (bitwidth, namespace, hwmode) tuple to encoding IDs. + // Its organized as a nested map, with the (namespace, hwmode) as the key for + // the inner map and bitwidth as the key for the outer map. We use std::map + // for deterministic iteration order so that the code emitted is also + // deterministic. + using InnerKeyTy = std::pair; + using InnerMapTy = std::map>; + std::map EncMap; + for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) { for (unsigned EncodingID : EncodingIDs) { const InstructionEncoding &Encoding = Encodings[EncodingID]; - const Record *EncodingDef = Encoding.getRecord(); - unsigned Size = EncodingDef->getValueAsInt("Size"); + const unsigned BitWidth = + IsVarLenInst ? MaxInstLen : Encoding.getBitWidth(); StringRef DecoderNamespace = Encoding.getDecoderNamespace(); - EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID); + EncMap[BitWidth][{DecoderNamespace, HwModeID}].push_back(EncodingID); } } + // Variable length instructions use the same `APInt` type for all instructions + // so we cannot specialize decoders based on instruction bitwidths (which + // requires using different `InstType` for differet bitwidths for the correct + // template specialization to kick in). + if (IsVarLenInst && SpecializeDecodersPerBitwidth) + PrintFatalError( + "Cannot specialize decoders for variable length instuctions"); + + // Entries in `EncMap` are already sorted by bitwidth. So bucketing per + // bitwidth can be done on-the-fly as we iterate over the map. DecoderTableInfo TableInfo; DecoderTableBuilder TableBuilder(Target, Encodings, TableInfo); unsigned OpcodeMask = 0; - for (const auto &[Key, EncodingIDs] : EncMap) { - auto [DecoderNamespace, HwModeID, Size] = Key; - const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size; - // Emit the decoder for this (namespace, hwmode, width) combination. - FilterChooser FC(Encodings, EncodingIDs); - - // The decode table is cleared for each top level decoder function. The - // predicates and decoders themselves, however, are shared across all - // decoders to give more opportunities for uniqueing. - TableInfo.Table.clear(); - TableBuilder.buildTable(FC); + for (const auto &[BitWidth, BWMap] : EncMap) { + for (const auto &[Key, EncodingIDs] : BWMap) { + auto [DecoderNamespace, HwModeID] = Key; + + // Emit the decoder for this (namespace, hwmode, width) combination. + FilterChooser FC(Encodings, EncodingIDs); + + // The decode table is cleared for each top level decoder function. The + // predicates and decoders themselves, however, are shared across + // different decoders to give more opportunities for uniqueing. + // - If `SpecializeDecodersPerBitwidth` is enabled, decoders are shared + // across all decoder tables for a given bitwidth, else they are shared + // across all decoder tables. + // - predicates are shared across all decoder tables. + TableInfo.Table.clear(); + TableBuilder.buildTable(FC); + + // Print the table to the output stream. + OpcodeMask |= emitTable(OS, TableInfo.Table, DecoderNamespace, HwModeID, + BitWidth, EncodingIDs); + } - // Print the table to the output stream. - OpcodeMask |= emitTable(OS, TableInfo.Table, DecoderNamespace, HwModeID, - BitWidth, EncodingIDs); + // Each BitWidth get's its own decoders and decoder function if + // SpecializeDecodersPerBitwidth is enabled. + if (SpecializeDecodersPerBitwidth) { + emitDecoderFunction(OS, TableInfo.Decoders, BitWidth); + TableInfo.Decoders.clear(); + } } - // For variable instruction, we emit a instruction length table - // to let the decoder know how long the instructions are. - // You can see example usage in M68k's disassembler. - if (IsVarLenInst) - emitInstrLenTable(OS, InstrLen); + // Emit the decoder function for the last bucket. This will also emit the + // single decoder function if SpecializeDecodersPerBitwidth = false. + if (!SpecializeDecodersPerBitwidth) + emitDecoderFunction(OS, TableInfo.Decoders, 0); const bool HasCheckPredicate = OpcodeMask & @@ -2508,9 +2574,6 @@ namespace { if (HasCheckPredicate) emitPredicateFunction(OS, TableInfo.Predicates); - // Emit the decoder function. - emitDecoderFunction(OS, TableInfo.Decoders); - // Emit the main entry point for the decoder, decodeInstruction(). emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask); diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn index 11bc537936508..9cc98cd8642d6 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn @@ -2,7 +2,12 @@ import("//llvm/utils/TableGen/tablegen.gni") tablegen("AMDGPUGenDisassemblerTables") { visibility = [ ":Disassembler" ] - args = [ "-gen-disassembler" ] + args = [ + "-gen-disassembler", + "-specialize-decoders-per-bitwidth", + ] + + args = [ ] td_file = "../AMDGPU.td" } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn index cb579221fd366..447a67af6be7b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn @@ -2,7 +2,10 @@ import("//llvm/utils/TableGen/tablegen.gni") tablegen("RISCVGenDisassemblerTables") { visibility = [ ":Disassembler" ] - args = [ "-gen-disassembler" ] + args = [ + "-gen-disassembler", + "-specialize-decoders-per-bitwidth", + ] td_file = "../RISCV.td" }