diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 9a635bbe5f85c..ca1232648a79b 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -169,29 +169,12 @@ X86GenericDisassembler::X86GenericDisassembler( llvm_unreachable("Invalid CPU mode"); } -namespace { -struct Region { - ArrayRef Bytes; - uint64_t Base; - Region(ArrayRef Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} -}; -} // end anonymous namespace - /// A callback function that wraps the readByte method from Region. /// /// @param Arg - The generic callback parameter. In this case, this should /// be a pointer to a Region. /// @param Byte - A pointer to the byte to be read. /// @param Address - The address to be read. -static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { - auto *R = static_cast(Arg); - ArrayRef Bytes = R->Bytes; - unsigned Index = Address - R->Base; - if (Bytes.size() <= Index) - return -1; - *Byte = Bytes[Index]; - return 0; -} /// logger - a callback function that wraps the operator<< method from /// raw_ostream. @@ -222,10 +205,9 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( if (&VStream == &nulls()) LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). - Region R(Bytes, Address); + std::pair, uint64_t> R(Bytes, Address); - int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, - LoggerFn, (void *)&VStream, + int Ret = decodeInstruction(&InternalInstr, &R, LoggerFn, (void *)&VStream, (const void *)MII.get(), Address, fMode); if (Ret) { diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index bf97680c719d5..f7d7ebf1a6c40 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86DisassemblerDecoder.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include /* for va_*() */ @@ -190,74 +191,34 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { return &INSTRUCTIONS_SYM[uid]; } -/* - * consumeByte - Uses the reader function provided by the user to consume one - * byte from the instruction's memory and advance the cursor. - * - * @param insn - The instruction with the reader function to use. The cursor - * for this instruction is advanced. - * @param byte - A pointer to a pre-allocated memory buffer to be populated - * with the data read. - * @return - 0 if the read was successful; nonzero otherwise. - */ -static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { - int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); - - if (!ret) - ++(insn->readerCursor); - - return ret; -} - -/* - * lookAtByte - Like consumeByte, but does not advance the cursor. - * - * @param insn - See consumeByte(). - * @param byte - See consumeByte(). - * @return - See consumeByte(). - */ -static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { - return insn->reader(insn->readerArg, byte, insn->readerCursor); +static bool peek(struct InternalInstruction *insn, uint8_t &byte) { + auto *r = static_cast, uint64_t> *>( + insn->readerArg); + uint64_t offset = insn->readerCursor - r->second; + if (offset >= r->first.size()) + return true; + byte = r->first[offset]; + return false; } static void unconsumeByte(struct InternalInstruction* insn) { insn->readerCursor--; } -#define CONSUME_FUNC(name, type) \ - static int name(struct InternalInstruction* insn, type* ptr) { \ - type combined = 0; \ - unsigned offset; \ - for (offset = 0; offset < sizeof(type); ++offset) { \ - uint8_t byte; \ - int ret = insn->reader(insn->readerArg, \ - &byte, \ - insn->readerCursor + offset); \ - if (ret) \ - return ret; \ - combined = combined | ((uint64_t)byte << (offset * 8)); \ - } \ - *ptr = combined; \ - insn->readerCursor += sizeof(type); \ - return 0; \ - } - -/* - * consume* - Use the reader function provided by the user to consume data - * values of various sizes from the instruction's memory and advance the - * cursor appropriately. These readers perform endian conversion. - * - * @param insn - See consumeByte(). - * @param ptr - A pointer to a pre-allocated memory of appropriate size to - * be populated with the data read. - * @return - See consumeByte(). - */ -CONSUME_FUNC(consumeInt8, int8_t) -CONSUME_FUNC(consumeInt16, int16_t) -CONSUME_FUNC(consumeInt32, int32_t) -CONSUME_FUNC(consumeUInt16, uint16_t) -CONSUME_FUNC(consumeUInt32, uint32_t) -CONSUME_FUNC(consumeUInt64, uint64_t) +template +static bool consume(InternalInstruction *insn, T &ptr) { + auto *r = static_cast, uint64_t> *>( + insn->readerArg); + uint64_t offset = insn->readerCursor - r->second; + if (offset + sizeof(T) > r->first.size()) + return true; + T ret = 0; + for (unsigned i = 0; i < sizeof(T); ++i) + ret |= (uint64_t)r->first[offset + i] << (i * 8); + ptr = ret; + insn->readerCursor += sizeof(T); + return false; +} /* * dbgprintf - Uses the logging function provided by the user to log a single @@ -307,7 +268,7 @@ static int readPrefixes(struct InternalInstruction* insn) { while (isPrefix) { /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ - if (consumeByte(insn, &byte)) + if (consume(insn, byte)) break; /* @@ -317,7 +278,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { + if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -345,10 +306,10 @@ static int readPrefixes(struct InternalInstruction* insn) { if (isREX(insn, nextByte)) { uint8_t nnextByte; // Go to REX prefix after the current one - if (consumeByte(insn, &nnextByte)) + if (consume(insn, nnextByte)) return -1; // We should be able to read next byte after REX prefix - if (lookAtByte(insn, &nnextByte)) + if (peek(insn, nnextByte)) return -1; unconsumeByte(insn); } @@ -361,7 +322,7 @@ static int readPrefixes(struct InternalInstruction* insn) { case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ { uint8_t nextByte; - if (lookAtByte(insn, &nextByte)) + if (peek(insn, nextByte)) break; // TODO: // 1. There could be several 0x66 @@ -376,39 +337,27 @@ static int readPrefixes(struct InternalInstruction* insn) { break; } case 0x2e: /* CS segment override -OR- Branch not taken */ + insn->segmentOverride = SEG_OVERRIDE_CS; + break; case 0x36: /* SS segment override -OR- Branch taken */ - case 0x3e: /* DS segment override */ - case 0x26: /* ES segment override */ - case 0x64: /* FS segment override */ - case 0x65: /* GS segment override */ - switch (byte) { - case 0x2e: - insn->segmentOverride = SEG_OVERRIDE_CS; - break; - case 0x36: - insn->segmentOverride = SEG_OVERRIDE_SS; - break; - case 0x3e: - insn->segmentOverride = SEG_OVERRIDE_DS; - break; - case 0x26: - insn->segmentOverride = SEG_OVERRIDE_ES; - break; - case 0x64: - insn->segmentOverride = SEG_OVERRIDE_FS; - break; - case 0x65: - insn->segmentOverride = SEG_OVERRIDE_GS; - break; - default: - debug("Unhandled override"); - return -1; - } + insn->segmentOverride = SEG_OVERRIDE_SS; + break; + case 0x3e: /* DS segment override */ + insn->segmentOverride = SEG_OVERRIDE_DS; + break; + case 0x26: /* ES segment override */ + insn->segmentOverride = SEG_OVERRIDE_ES; + break; + case 0x64: /* FS segment override */ + insn->segmentOverride = SEG_OVERRIDE_FS; + break; + case 0x65: /* GS segment override */ + insn->segmentOverride = SEG_OVERRIDE_GS; break; - case 0x66: /* Operand-size override */ { + case 0x66: /* Operand-size override */ { uint8_t nextByte; insn->hasOpSize = true; - if (lookAtByte(insn, &nextByte)) + if (peek(insn, nextByte)) break; // 0x66 can't overwrite existing mandatory prefix and should be ignored if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) @@ -431,13 +380,12 @@ static int readPrefixes(struct InternalInstruction* insn) { if (byte == 0x62) { uint8_t byte1, byte2; - - if (consumeByte(insn, &byte1)) { + if (consume(insn, byte1)) { dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); return -1; } - if (lookAtByte(insn, &byte2)) { + if (peek(insn, byte2)) { dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); return -1; } @@ -453,11 +401,11 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->vectorExtensionType == TYPE_EVEX) { insn->vectorExtensionPrefix[0] = byte; insn->vectorExtensionPrefix[1] = byte1; - if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { + if (consume(insn, insn->vectorExtensionPrefix[2])) { dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); return -1; } - if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { + if (consume(insn, insn->vectorExtensionPrefix[3])) { dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); return -1; } @@ -477,8 +425,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } } else if (byte == 0xc4) { uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { + if (peek(insn, byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } @@ -490,8 +437,8 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->vectorExtensionType == TYPE_VEX_3B) { insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); - consumeByte(insn, &insn->vectorExtensionPrefix[2]); + consume(insn, insn->vectorExtensionPrefix[1]); + consume(insn, insn->vectorExtensionPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ @@ -508,8 +455,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } } else if (byte == 0xc5) { uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { + if (peek(insn, byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } @@ -521,7 +467,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); + consume(insn, insn->vectorExtensionPrefix[1]); if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 @@ -541,8 +487,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } } else if (byte == 0x8f) { uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { + if (peek(insn, byte1)) { dbgprintf(insn, "Couldn't read second byte of XOP"); return -1; } @@ -554,8 +499,8 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->vectorExtensionType == TYPE_XOP) { insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); - consumeByte(insn, &insn->vectorExtensionPrefix[2]); + consume(insn, insn->vectorExtensionPrefix[1]); + consume(insn, insn->vectorExtensionPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ @@ -579,7 +524,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[2]); } } else if (isREX(insn, byte)) { - if (lookAtByte(insn, &nextByte)) + if (peek(insn, nextByte)) return -1; insn->rexPrefix = byte; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); @@ -618,11 +563,8 @@ static int readModRM(struct InternalInstruction* insn); /* * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of * extended or escape opcodes). - * - * @param insn - The instruction whose opcode is to be read. - * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ -static int readOpcode(struct InternalInstruction* insn) { +static bool readOpcode(struct InternalInstruction* insn) { /* Determine the length of the primary opcode */ uint8_t current; @@ -636,74 +578,74 @@ static int readOpcode(struct InternalInstruction* insn) { default: dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); - return -1; + return true; case VEX_LOB_0F: insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case VEX_LOB_0F38: insn->opcodeType = THREEBYTE_38; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case VEX_LOB_0F3A: insn->opcodeType = THREEBYTE_3A; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); } } else if (insn->vectorExtensionType == TYPE_VEX_3B) { switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); - return -1; + return true; case VEX_LOB_0F: insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case VEX_LOB_0F38: insn->opcodeType = THREEBYTE_38; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case VEX_LOB_0F3A: insn->opcodeType = THREEBYTE_3A; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); } } else if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); } else if (insn->vectorExtensionType == TYPE_XOP) { switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); - return -1; + return true; case XOP_MAP_SELECT_8: insn->opcodeType = XOP8_MAP; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case XOP_MAP_SELECT_9: insn->opcodeType = XOP9_MAP; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); case XOP_MAP_SELECT_A: insn->opcodeType = XOPA_MAP; - return consumeByte(insn, &insn->opcode); + return consume(insn, insn->opcode); } } - if (consumeByte(insn, ¤t)) - return -1; + if (consume(insn, current)) + return true; if (current == 0x0f) { dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); - if (consumeByte(insn, ¤t)) - return -1; + if (consume(insn, current)) + return true; if (current == 0x38) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - if (consumeByte(insn, ¤t)) + if (consume(insn, current)) return -1; insn->opcodeType = THREEBYTE_38; } else if (current == 0x3a) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - if (consumeByte(insn, ¤t)) + if (consume(insn, current)) return -1; insn->opcodeType = THREEBYTE_3A; @@ -712,10 +654,10 @@ static int readOpcode(struct InternalInstruction* insn) { // Consume operands before the opcode to comply with the 3DNow encoding if (readModRM(insn)) - return -1; + return true; - if (consumeByte(insn, ¤t)) - return -1; + if (consume(insn, current)) + return true; insn->opcodeType = THREEDNOW_MAP; } else { @@ -735,7 +677,7 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcode = current; - return 0; + return false; } /* @@ -1164,7 +1106,7 @@ static int readSIB(struct InternalInstruction* insn) { break; } - if (consumeByte(insn, &insn->sib)) + if (consume(insn, insn->sib)) return -1; index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); @@ -1233,17 +1175,17 @@ static int readDisplacement(struct InternalInstruction* insn) { insn->consumedDisplacement = false; break; case EA_DISP_8: - if (consumeInt8(insn, &d8)) + if (consume(insn, d8)) return -1; insn->displacement = d8; break; case EA_DISP_16: - if (consumeInt16(insn, &d16)) + if (consume(insn, d16)) return -1; insn->displacement = d16; break; case EA_DISP_32: - if (consumeInt32(insn, &d32)) + if (consume(insn, d32)) return -1; insn->displacement = d32; break; @@ -1268,7 +1210,7 @@ static int readModRM(struct InternalInstruction* insn) { if (insn->consumedModRM) return 0; - if (consumeByte(insn, &insn->modRM)) + if (consume(insn, insn->modRM)) return -1; insn->consumedModRM = true; @@ -1623,22 +1565,22 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { switch (size) { case 1: - if (consumeByte(insn, &imm8)) + if (consume(insn, imm8)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm8; break; case 2: - if (consumeUInt16(insn, &imm16)) + if (consume(insn, imm16)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm16; break; case 4: - if (consumeUInt32(insn, &imm32)) + if (consume(insn, imm32)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm32; break; case 8: - if (consumeUInt64(insn, &imm64)) + if (consume(insn, imm64)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm64; break; @@ -1884,13 +1826,14 @@ static int readOperands(struct InternalInstruction* insn) { * @return - 0 if the instruction's memory could be read; nonzero if * not. */ -int llvm::X86Disassembler::decodeInstruction( - struct InternalInstruction *insn, byteReader_t reader, - const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, - uint64_t startLoc, DisassemblerMode mode) { +int llvm::X86Disassembler::decodeInstruction(struct InternalInstruction *insn, + const void *readerArg, + dlog_t logger, void *loggerArg, + const void *miiArg, + uint64_t startLoc, + DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); - insn->reader = reader; insn->readerArg = readerArg; insn->dlog = logger; insn->dlogArg = loggerArg; diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 7c0a42c019e35..a79fcec48fc6d 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -446,12 +446,12 @@ enum SIBBase { }; /// Possible displacement types for effective-address computations. -typedef enum { +enum EADisplacement { EA_DISP_NONE, EA_DISP_8, EA_DISP_16, EA_DISP_32 -} EADisplacement; +}; /// All possible values of the reg field in the ModR/M byte. enum Reg { @@ -529,8 +529,6 @@ struct InstructionSpecifier { /// The x86 internal instruction, which is produced by the decoder. struct InternalInstruction { - // Reader interface (C) - byteReader_t reader; // Opaque value passed to the reader const void* readerArg; // The address of the next byte to read via the reader @@ -661,7 +659,6 @@ struct InternalInstruction { /// a buffer provided by the consumer. /// \param insn The buffer to store the instruction in. Allocated by the /// consumer. -/// \param reader The byteReader_t for the bytes to be read. /// \param readerArg An argument to pass to the reader for storing context /// specific to the consumer. May be NULL. /// \param logger The dlog_t to be used in printing status messages from the @@ -673,7 +670,6 @@ struct InternalInstruction { /// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in. /// \return Nonzero if there was an error during decode, 0 otherwise. int decodeInstruction(InternalInstruction *insn, - byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg,