Skip to content

Commit

Permalink
Sleigh Preliminary Floating Point Support (#632)
Browse files Browse the repository at this point in the history
* re-enable broken float code

* try to cast floats as integers

* fix assingment for function calls

* actually fix calls

* make sure to set branch not taken

* add initial neon registers

* fix missing unit128 fields

* add more debug

* do armv8 since that's what we need right now

* add complete fpscr

* make FPSCR capital

* initialize return_pc

* fix build on linux (#633)

* undo superfluous change (#634)

* revert aarch64 linux changes for now

* fix off by 2 when defining INST_NEXT_PTR in patches for thumb

* fix off by 2

* fix typo

* fix typo

* use maybe float directly

* fix test

* add additional float types

Co-authored-by: William Tan <1284324+Ninja3047@users.noreply.github.com>
  • Loading branch information
2over12 and Ninja3047 committed Nov 17, 2022
1 parent d5c5035 commit 829ee4a
Show file tree
Hide file tree
Showing 10 changed files with 342 additions and 110 deletions.
56 changes: 56 additions & 0 deletions include/remill/Arch/AArch32/Runtime/State.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include "remill/Arch/Runtime/Int.h"
#pragma clang diagnostic push
#pragma clang diagnostic fatal "-Wpadded"

Expand All @@ -26,6 +27,26 @@ struct Reg final {
alignas(4) uint32_t dword;
} __attribute__((packed));


struct NeonReg final {
union {
uint128_t qword;
struct {
uint64_t low_dword;
uint64_t high_dword;
} dwords;
struct {
uint32_t ll_word;
uint32_t lh_word;
uint32_t hl_word;
uint32_t hh_word;
} words;
};
} __attribute__((packed));

static_assert(sizeof(uint128_t) == sizeof(NeonReg),
"Invalid packing of NeonReg");

static_assert(sizeof(uint32_t) == sizeof(Reg), "Invalid packing of `Reg`.");
static_assert(0 == __builtin_offsetof(Reg, dword),
"Invalid packing of `Reg::dword`.");
Expand Down Expand Up @@ -105,10 +126,45 @@ struct alignas(8) SR final {
uint8_t _padding[2];
} __attribute__((packed));


// Ghidra maintain a uint32_t representing FPSCR that gets synced to NG ZR CY and OV, so we maintain this state too
// Since we dont support Neon in our aarch32 semantics this will be untouched in those manual semantics
struct FPSCR {
uint32_t value;
uint8_t _padding[12];
} __attribute__((packed));
static_assert(16 == sizeof(FPSCR), "Invalid packing of FPSCR");


struct alignas(16) NeonBank {
NeonReg q0;
NeonReg q1;
NeonReg q2;
NeonReg q3;
NeonReg q4;
NeonReg q5;
NeonReg q6;
NeonReg q7;
NeonReg q8;
NeonReg q9;
NeonReg q10;
NeonReg q11;
NeonReg q12;
NeonReg q13;
NeonReg q14;
NeonReg q15;
} __attribute__((packed));

static_assert(sizeof(uint128_t) * 16 == sizeof(NeonBank),
"Invalid packing of NeonBank");


struct alignas(16) AArch32State : public ArchState {


GPR gpr; // 528 bytes.
NeonBank neon;
FPSCR fpscr;
SR sr;
uint64_t _0;

Expand Down
93 changes: 92 additions & 1 deletion lib/Arch/AArch32/Base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ void AArch32ArchBase::PopulateRegisterTable(void) const {

auto u32 = llvm::Type::getInt32Ty(*context);

auto u64 = llvm::Type::getInt64Ty(*context);

auto u128 = llvm::Type::getInt128Ty(*context);


#define OFFSET_OF(type, access) \
(reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
static_cast<type *>(nullptr)->access)))
Expand Down Expand Up @@ -101,6 +106,92 @@ void AArch32ArchBase::PopulateRegisterTable(void) const {
SUB_REG(LR, gpr.r14.dword, u32, R14);
SUB_REG(PC, gpr.r15.dword, u32, R15);

REG(Q0, neon.q0, u128);
REG(Q1, neon.q1, u128);
REG(Q2, neon.q2, u128);
REG(Q3, neon.q3, u128);
REG(Q4, neon.q4, u128);
REG(Q5, neon.q5, u128);
REG(Q6, neon.q6, u128);
REG(Q7, neon.q7, u128);
REG(Q8, neon.q8, u128);
REG(Q9, neon.q9, u128);
REG(Q10, neon.q10, u128);
REG(Q11, neon.q11, u128);
REG(Q12, neon.q12, u128);
REG(Q13, neon.q13, u128);
REG(Q14, neon.q14, u128);
REG(Q15, neon.q15, u128);

REG(FPSCR, fpscr.value, u32);

SUB_REG(D0, neon.q0.dwords.low_dword, u64, Q0);
SUB_REG(D1, neon.q0.dwords.high_dword, u64, Q0);
SUB_REG(D2, neon.q1.dwords.low_dword, u64, Q1);
SUB_REG(D3, neon.q1.dwords.high_dword, u64, Q1);
SUB_REG(D4, neon.q2.dwords.low_dword, u64, Q2);
SUB_REG(D5, neon.q2.dwords.high_dword, u64, Q2);
SUB_REG(D6, neon.q3.dwords.low_dword, u64, Q3);
SUB_REG(D7, neon.q3.dwords.high_dword, u64, Q3);
SUB_REG(D8, neon.q4.dwords.low_dword, u64, Q4);
SUB_REG(D9, neon.q4.dwords.high_dword, u64, Q4);
SUB_REG(D10, neon.q5.dwords.low_dword, u64, Q5);
SUB_REG(D11, neon.q5.dwords.high_dword, u64, Q5);
SUB_REG(D12, neon.q6.dwords.low_dword, u64, Q6);
SUB_REG(D13, neon.q6.dwords.high_dword, u64, Q6);
SUB_REG(D14, neon.q7.dwords.low_dword, u64, Q7);
SUB_REG(D15, neon.q7.dwords.high_dword, u64, Q7);
SUB_REG(D16, neon.q8.dwords.low_dword, u64, Q8);
SUB_REG(D17, neon.q8.dwords.high_dword, u64, Q8);
SUB_REG(D18, neon.q9.dwords.low_dword, u64, Q9);
SUB_REG(D19, neon.q9.dwords.high_dword, u64, Q9);
SUB_REG(D20, neon.q10.dwords.low_dword, u64, Q10);
SUB_REG(D21, neon.q10.dwords.high_dword, u64, Q10);
SUB_REG(D22, neon.q11.dwords.low_dword, u64, Q11);
SUB_REG(D23, neon.q11.dwords.high_dword, u64, Q11);
SUB_REG(D24, neon.q12.dwords.low_dword, u64, Q12);
SUB_REG(D25, neon.q12.dwords.high_dword, u64, Q12);
SUB_REG(D26, neon.q13.dwords.low_dword, u64, Q13);
SUB_REG(D27, neon.q13.dwords.high_dword, u64, Q13);
SUB_REG(D28, neon.q14.dwords.low_dword, u64, Q14);
SUB_REG(D29, neon.q14.dwords.high_dword, u64, Q14);
SUB_REG(D30, neon.q15.dwords.low_dword, u64, Q15);
SUB_REG(D31, neon.q15.dwords.high_dword, u64, Q15);

SUB_REG(S0, neon.q0.words.ll_word, u32, D0);
SUB_REG(S1, neon.q0.words.lh_word, u32, D0);
SUB_REG(S2, neon.q0.words.hl_word, u32, D1);
SUB_REG(S3, neon.q0.words.hh_word, u32, D1);
SUB_REG(S4, neon.q1.words.ll_word, u32, D2);
SUB_REG(S5, neon.q1.words.lh_word, u32, D2);
SUB_REG(S6, neon.q1.words.hl_word, u32, D3);
SUB_REG(S7, neon.q1.words.hh_word, u32, D3);
SUB_REG(S8, neon.q2.words.ll_word, u32, D4);
SUB_REG(S9, neon.q2.words.lh_word, u32, D4);
SUB_REG(S10, neon.q2.words.hl_word, u32, D5);
SUB_REG(S11, neon.q2.words.hh_word, u32, D5);
SUB_REG(S12, neon.q3.words.ll_word, u32, D6);
SUB_REG(S13, neon.q3.words.lh_word, u32, D6);
SUB_REG(S14, neon.q3.words.hl_word, u32, D7);
SUB_REG(S15, neon.q3.words.hh_word, u32, D7);
SUB_REG(S16, neon.q4.words.ll_word, u32, D8);
SUB_REG(S17, neon.q4.words.lh_word, u32, D8);
SUB_REG(S18, neon.q4.words.hl_word, u32, D9);
SUB_REG(S19, neon.q4.words.hh_word, u32, D9);
SUB_REG(S20, neon.q5.words.ll_word, u32, D10);
SUB_REG(S21, neon.q5.words.lh_word, u32, D10);
SUB_REG(S22, neon.q5.words.hl_word, u32, D11);
SUB_REG(S23, neon.q5.words.hh_word, u32, D11);
SUB_REG(S24, neon.q6.words.ll_word, u32, D12);
SUB_REG(S25, neon.q6.words.lh_word, u32, D12);
SUB_REG(S26, neon.q6.words.hl_word, u32, D13);
SUB_REG(S27, neon.q6.words.hh_word, u32, D13);
SUB_REG(S28, neon.q7.words.ll_word, u32, D14);
SUB_REG(S29, neon.q7.words.lh_word, u32, D14);
SUB_REG(S30, neon.q7.words.hl_word, u32, D15);
SUB_REG(S31, neon.q7.words.hh_word, u32, D15);


REG(N, sr.n, u8);
REG(C, sr.c, u8);
REG(Z, sr.z, u8);
Expand All @@ -114,7 +205,7 @@ void AArch32ArchBase::FinishLiftedFunctionInitialization(
llvm::Module *module, llvm::Function *bb_func) const {
const auto &dl = module->getDataLayout();
CHECK_EQ(sizeof(State), dl.getTypeAllocSize(StateStructType()))
<< "Mismatch between size of State type for x86/amd64 and what is in "
<< "Mismatch between size of State type for aarch32 and what is in "
<< "the bitcode module";

auto &context = module->getContext();
Expand Down
5 changes: 3 additions & 2 deletions lib/Arch/AArch32/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down Expand Up @@ -46,13 +46,14 @@ function(add_runtime_helper target_name little_endian)
add_runtime(${target_name}
SOURCES ${ARMRUNTIME_SOURCEFILES}
ADDRESS_SIZE 32
DEFINITIONS "LITTLE_ENDIAN=${little_endian}" "REMILL_DISABLE_INT128=1"
DEFINITIONS "LITTLE_ENDIAN=${little_endian}"
BCFLAGS "${arch_flags}" "-std=${required_cpp_standard}"
INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}"
INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}"
ARCH arm

DEPENDENCIES
""
"${REMILL_LIB_DIR}/Arch/AArch32/Semantics/BINARY.cpp"
"${REMILL_LIB_DIR}/Arch/AArch32/Semantics/FLAGS.cpp"
"${REMILL_LIB_DIR}/Arch/AArch32/Semantics/COND.cpp"
Expand Down
8 changes: 8 additions & 0 deletions lib/Arch/Sleigh/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ void SleighDecoder::ApplyFlowToInstruction(remill::Instruction &inst) const {
[&inst](const remill::Instruction::DirectFunctionCall &cat) {
// TODO(Ian) maybe add a return_to_flow for function call flows
inst.branch_not_taken_pc = inst.next_pc;
inst.branch_taken_pc = cat.taken_flow.known_target;
inst.category =
remill::Instruction::Category::kCategoryDirectFunctionCall;
},
Expand All @@ -430,29 +431,36 @@ void SleighDecoder::ApplyFlowToInstruction(remill::Instruction &inst) const {
auto conditional_applyer = Overload{
[&inst](
const remill::Instruction::DirectFunctionCall &cat) -> void {
inst.branch_not_taken_pc = inst.next_pc;
inst.branch_taken_pc = cat.taken_flow.known_target;
inst.category = remill::Instruction::Category::
kCategoryConditionalDirectFunctionCall;
},
[&inst](const remill::Instruction::IndirectFunctionCall &cat) {
inst.category = remill::Instruction::Category::
kCategoryConditionalIndirectFunctionCall;
inst.branch_not_taken_pc = inst.next_pc;
},
[&inst](const remill::Instruction::FunctionReturn &cat) {
inst.category = remill::Instruction::Category::
kCategoryConditionalFunctionReturn;
inst.branch_not_taken_pc = inst.next_pc;
},
[&inst](const remill::Instruction::AsyncHyperCall &cat) {
inst.category = remill::Instruction::Category::
kCategoryConditionalAsyncHyperCall;
inst.branch_not_taken_pc = inst.next_pc;
},
[&inst](const remill::Instruction::IndirectJump &cat) {
inst.category = remill::Instruction::Category::
kCategoryConditionalIndirectJump;
inst.branch_not_taken_pc = inst.next_pc;
},
[&inst](const remill::Instruction::DirectJump &cat) {
inst.category =
remill::Instruction::Category::kCategoryConditionalBranch;
inst.branch_taken_pc = cat.taken_flow.known_target;
inst.branch_not_taken_pc = inst.next_pc;
}};

std::visit(conditional_applyer, cat.taken_branch);
Expand Down
4 changes: 2 additions & 2 deletions lib/Arch/Sleigh/Thumb2Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ namespace {
const size_t kThumbInstructionSize = 2;
}

//ARM7_le.sla"
// TODO(Ian): support different arm versions
SleighThumb2Decoder::SleighThumb2Decoder(const remill::Arch &arch)
: SleighDecoder(arch, "ARM7_le.sla", "ARMtTHUMB.pspec",
: SleighDecoder(arch, "ARM8_le.sla", "ARMtTHUMB.pspec",
{{"ISAModeSwitch", std::string(kThumbModeRegName)}},
{{"CY", "C"}, {"NG", "N"}, {"ZR", "Z"}, {"OV", "V"}}) {}

Expand Down
Loading

0 comments on commit 829ee4a

Please sign in to comment.