Skip to content

Commit

Permalink
[RISCV] Support instruction sizes up to 176-bits in disassembler. (#9…
Browse files Browse the repository at this point in the history
…0371)

We don't have any instructions defined yet, but that we can still read the correct number of bytes when disassembling. This should better match GNU objdump behavior.
  • Loading branch information
topperc committed Apr 29, 2024
1 parent 99df06a commit 618adc7
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 5 deletions.
42 changes: 37 additions & 5 deletions llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,12 +656,44 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
// TODO: This will need modification when supporting instruction set
// extensions with instructions > 32-bits (up to 176 bits wide).
// It's a 16 bit instruction if bit 0 and 1 are not 0b11.
if ((Bytes[0] & 0b11) != 0b11)
return getInstruction16(MI, Size, Bytes, Address, CS);

// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((Bytes[0] & 0x3) == 0x3)
// It's a 32 bit instruction if bit 1:0 are 0b11(checked above) and bits 4:2
// are not 0b111.
if ((Bytes[0] & 0b1'1100) != 0b1'1100)
return getInstruction32(MI, Size, Bytes, Address, CS);

return getInstruction16(MI, Size, Bytes, Address, CS);
// 48-bit instructions are encoded as 0bxx011111.
if ((Bytes[0] & 0b11'1111) == 0b01'1111) {
Size = Bytes.size() >= 6 ? 6 : 0;
return MCDisassembler::Fail;
}

// 64-bit instructions are encoded as 0x0111111.
if ((Bytes[0] & 0b111'1111) == 0b011'1111) {
Size = Bytes.size() >= 8 ? 8 : 0;
return MCDisassembler::Fail;
}

// Remaining cases need to check a second byte.
if (Bytes.size() < 2) {
Size = 0;
return MCDisassembler::Fail;
}

// 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111.
// Where the number of bits is (80 + (nnn * 16)) for nnn != 0b111.
unsigned nnn = (Bytes[1] >> 4) & 0b111;
if (nnn != 0b111) {
Size = 10 + (nnn * 2);
if (Bytes.size() < Size)
Size = 0;
return MCDisassembler::Fail;
}

// Remaining encodings are reserved for > 176-bit instructions.
Size = 0;
return MCDisassembler::Fail;
}
29 changes: 29 additions & 0 deletions llvm/test/MC/RISCV/large-instructions.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
# RUN: | llvm-objdump -d - | FileCheck %s

# CHECK: 011f 4523 8967 <unknown>
.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89

# CHECK: 4523013f cdab8967 <unknown>
.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd

# CHECK: 007f 4523 8967 cdab feef <unknown>
.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe

# CHECK: 4523107f cdab8967 badcfeef <unknown>
.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba

# CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76

# CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32

# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12

# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56

# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a

0 comments on commit 618adc7

Please sign in to comment.