Skip to content

Commit

Permalink
[ORC] add lazy jit support for riscv64
Browse files Browse the repository at this point in the history
This adds resolver, indirection and trampoline stubs for riscv64,
allowing lazy compilation to work.

It assumes hard float extension exists. I don't know the proper way to detect it as Triple doesn't provide the interface to check riscv +f +d abi.

I am also not sure if orclazy tests should be enabled because lli needs an additional -codemodel=melany for tests to pass.

Reviewed By: lhames

Differential Revision: https://reviews.llvm.org/D122543
  • Loading branch information
alexfanqi committed Apr 10, 2022
1 parent c74d729 commit acb408f
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 0 deletions.
39 changes: 39 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
Expand Up @@ -330,6 +330,45 @@ class OrcMips64 {
JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
};

// @brief riscv64 support.
//
// RISC-V 64 supports lazy JITing.
class OrcRiscv64 {
public:
static constexpr unsigned PointerSize = 8;
static constexpr unsigned TrampolineSize = 16;
static constexpr unsigned StubSize = 16;
static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
static constexpr unsigned ResolverCodeSize = 0x148;

/// Write the resolver code into the given memory. The user is
/// responsible for allocating the memory and setting permissions.
///
/// ReentryFnAddr should be the address of a function whose signature matches
/// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
JITTargetAddress ResolverTargetAddress,
JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr);

/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
JITTargetAddress TrampolineBlockTargetAddress,
JITTargetAddress ResolverFnAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
static void writeIndirectStubsBlock(
char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
};

} // end namespace orc
} // end namespace llvm

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
Expand Up @@ -261,6 +261,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
case Triple::mips64el:
return CreateWithABI<OrcMips64>(EPC);

case Triple::riscv64:
return CreateWithABI<OrcRiscv64>(EPC);

case Triple::x86_64:
if (TT.getOS() == Triple::OSType::Win32)
return CreateWithABI<OrcX86_64_Win32>(EPC);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
Expand Up @@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return CCMgrT::Create(ES, ErrorHandlerAddress);
}

case Triple::riscv64: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcRiscv64> CCMgrT;
return CCMgrT::Create(ES, ErrorHandlerAddress);
}

case Triple::x86_64: {
if (T.getOS() == Triple::OSType::Win32) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
Expand Down Expand Up @@ -206,6 +211,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
orc::LocalIndirectStubsManager<orc::OrcMips64>>();
};

case Triple::riscv64:
return []() {
return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
};

case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
Expand Up @@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
case Triple::mips64el:
return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);

case Triple::riscv64:
return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
ErrorHandlerAddr);

case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32)
return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
Expand Down
171 changes: 171 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
Expand Up @@ -906,5 +906,176 @@ void OrcMips64::writeIndirectStubsBlock(
Stub[8 * I + 7] = 0x00000000; // nop
}
}

void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
JITTargetAddress ResolverTargetAddress,
JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) {

const uint32_t ResolverCode[] = {
0xef810113, // 0x00: addi sp,sp,-264
0x00813023, // 0x04: sd s0,0(sp)
0x00913423, // 0x08: sd s1,8(sp)
0x01213823, // 0x0c: sd s2,16(sp)
0x01313c23, // 0x10: sd s3,24(sp)
0x03413023, // 0x14: sd s4,32(sp)
0x03513423, // 0x18: sd s5,40(sp)
0x03613823, // 0x1c: sd s6,48(sp)
0x03713c23, // 0x20: sd s7,56(sp)
0x05813023, // 0x24: sd s8,64(sp)
0x05913423, // 0x28: sd s9,72(sp)
0x05a13823, // 0x2c: sd s10,80(sp)
0x05b13c23, // 0x30: sd s11,88(sp)
0x06113023, // 0x34: sd ra,96(sp)
0x06a13423, // 0x38: sd a0,104(sp)
0x06b13823, // 0x3c: sd a1,112(sp)
0x06c13c23, // 0x40: sd a2,120(sp)
0x08d13023, // 0x44: sd a3,128(sp)
0x08e13423, // 0x48: sd a4,136(sp)
0x08f13823, // 0x4c: sd a5,144(sp)
0x09013c23, // 0x50: sd a6,152(sp)
0x0b113023, // 0x54: sd a7,160(sp)
0x0a813427, // 0x58: fsd fs0,168(sp)
0x0a913827, // 0x5c: fsd fs1,176(sp)
0x0b213c27, // 0x60: fsd fs2,184(sp)
0x0d313027, // 0x64: fsd fs3,192(sp)
0x0d413427, // 0x68: fsd fs4,200(sp)
0x0d513827, // 0x6c: fsd fs5,208(sp)
0x0d613c27, // 0x70: fsd fs6,216(sp)
0x0f713027, // 0x74: fsd fs7,224(sp)
0x0f813427, // 0x78: fsd fs8,232(sp)
0x0f913827, // 0x7c: fsd fs9,240(sp)
0x0fa13c27, // 0x80: fsd fs10,248(sp)
0x11b13027, // 0x84: fsd fs11,256(sp)
0x00000517, // 0x88: auipc a0,0x0
0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
0x00030593, // 0x90: mv a1,t1
0xff458593, // 0x94: addi a1,a1,-12
0x00000617, // 0x98: auipc a2,0x0
0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
0x000600e7, // 0xa0: jalr a2
0x00050293, // 0xa4: mv t0,a0
0x00013403, // 0xa8: ld s0,0(sp)
0x00813483, // 0xac: ld s1,8(sp)
0x01013903, // 0xb0: ld s2,16(sp)
0x01813983, // 0xb4: ld s3,24(sp)
0x02013a03, // 0xb8: ld s4,32(sp)
0x02813a83, // 0xbc: ld s5,40(sp)
0x03013b03, // 0xc0: ld s6,48(sp)
0x03813b83, // 0xc4: ld s7,56(sp)
0x04013c03, // 0xc8: ld s8,64(sp)
0x04813c83, // 0xcc: ld s9,72(sp)
0x05013d03, // 0xd0: ld s10,80(sp)
0x05813d83, // 0xd4: ld s11,88(sp)
0x06013083, // 0xd8: ld ra,96(sp)
0x06813503, // 0xdc: ld a0,104(sp)
0x07013583, // 0xe0: ld a1,112(sp)
0x07813603, // 0xe4: ld a2,120(sp)
0x08013683, // 0xe8: ld a3,128(sp)
0x08813703, // 0xec: ld a4,136(sp)
0x09013783, // 0xf0: ld a5,144(sp)
0x09813803, // 0xf4: ld a6,152(sp)
0x0a013883, // 0xf8: ld a7,160(sp)
0x0a813407, // 0xfc: fld fs0,168(sp)
0x0b013487, // 0x100: fld fs1,176(sp)
0x0b813907, // 0x104: fld fs2,184(sp)
0x0c013987, // 0x108: fld fs3,192(sp)
0x0c813a07, // 0x10c: fld fs4,200(sp)
0x0d013a87, // 0x110: fld fs5,208(sp)
0x0d813b07, // 0x114: fld fs6,216(sp)
0x0e013b87, // 0x118: fld fs7,224(sp)
0x0e813c07, // 0x11c: fld fs8,232(sp)
0x0f013c87, // 0x120: fld fs9,240(sp)
0x0f813d07, // 0x124: fld fs10,248(sp)
0x10013d87, // 0x128: fld fs11,256(sp)
0x10810113, // 0x12c: addi sp,sp,264
0x00028067, // 0x130: jr t0
0x12345678, // 0x134: padding to align at 8 byte
0x12345678, // 0x138: Lreentry_ctx_ptr:
0xdeadbeef, // 0x13c: .quad 0
0x98765432, // 0x140: Lreentry_fn_ptr:
0xcafef00d // 0x144: .quad 0
};

const unsigned ReentryCtxAddrOffset = 0x138;
const unsigned ReentryFnAddrOffset = 0x140;

memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
sizeof(uint64_t));
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
sizeof(uint64_t));
}

void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
JITTargetAddress TrampolineBlockTargetAddress,
JITTargetAddress ResolverAddr,
unsigned NumTrampolines) {

unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);

memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
sizeof(uint64_t));

uint32_t *Trampolines =
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
uint32_t Lo12 = OffsetToPtr - Hi20;
Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
Trampolines[4 * I + 1] =
0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0
Trampolines[4 * I + 3] = 0xdeadface; // padding
}
}

void OrcRiscv64::writeIndirectStubsBlock(
char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
// stub1:
// auipc t0, %hi(ptr1) ; PC-rel load of ptr1
// ld t0, %lo(t0)
// jr t0 ; Jump to resolver
// .quad 0 ; Pad to 16 bytes
// stub2:
// auipc t0, %hi(ptr1) ; PC-rel load of ptr1
// ld t0, %lo(t0)
// jr t0 ; Jump to resolver
// .quad 0
//
// ...
//
// .section __orc_ptrs
// ptr1:
// .quad 0x0
// ptr2:
// .quad 0x0
//
// ...

assert(stubAndPointerRangesOk<OrcRiscv64>(
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
"PointersBlock is out of range");

uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);

for (unsigned I = 0; I < NumStubs; ++I) {
uint64_t PtrDisplacement =
PointersBlockTargetAddress - StubsBlockTargetAddress;
uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
uint32_t Lo12 = PtrDisplacement - Hi20;
Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
Stub[4 * I + 2] = 0x00028067; // jr t0
Stub[4 * I + 3] = 0xfeedbeef; // padding
PointersBlockTargetAddress += PointerSize;
StubsBlockTargetAddress += StubSize;
}
}

} // End namespace orc.
} // End namespace llvm.

0 comments on commit acb408f

Please sign in to comment.