@@ -15,6 +15,14 @@ module core.thread;
public import core.time; // for Duration
import core.exception : onOutOfMemoryError;

version(LDC)
{
import ldc.attributes;
import ldc.llvmasm;

version(Windows) version = LDC_Windows;
}

version (OSX)
version = Darwin;
else version (iOS)
@@ -2490,8 +2498,6 @@ else
{
version (PPC)
{
import ldc.llvmasm;

// Nonvolatile registers, according to:
// System V Application Binary Interface
// PowerPC Processor Supplement, September 1995
@@ -2521,8 +2527,6 @@ else
}
else version (PPC64)
{
import ldc.llvmasm;

// Nonvolatile registers, according to:
// ELFv1: 64-bit PowerPC ELF ABI Supplement 1.9, July 2004
// ELFv2: Power Architecture, 64-Bit ELV V2 ABI Specification,
@@ -2553,8 +2557,6 @@ else
}
else version (AArch64)
{
import ldc.llvmasm;

// Callee-save registers, x19-x28 according to AAPCS64, section
// 5.1.1. Include x29 fp because it optionally can be a callee
// saved reg
@@ -2569,8 +2571,6 @@ else
}
else version (ARM)
{
import ldc.llvmasm;

// Callee-save registers, according to AAPCS, section 5.1.1.
// arm and thumb2 instructions
size_t[8] regs = void;
@@ -2579,8 +2579,6 @@ else
}
else version (MIPS32)
{
import ldc.llvmasm;

// Callee-save registers, according to MIPS Calling Convention
size_t[8] regs = void;
__asm(`.set noat;
@@ -2597,7 +2595,6 @@ else
}
else version (MIPS64)
{
import ldc.llvmasm;

// Callee-save registers, according to MIPSpro N32 ABI Handbook,
// chapter 2, table 2-1.
@@ -3348,12 +3345,10 @@ extern (C) @nogc nothrow
}


private void* getStackTop() nothrow @nogc
version (LDC)
{
version (LDC)
private void* getStackTop() nothrow @nogc @naked
{
import ldc.llvmasm;

/* The inline assembler is written in a style that the code can be
* inlined.
* The use of intrinsic llvm_frameaddress is a reasonable default for
@@ -3362,35 +3357,35 @@ private void* getStackTop() nothrow @nogc
*/
version (X86)
{
return __asm!(void *)("movl %esp, $0", "=r");
return __asm!(void*)("movl %esp, $0", "=r");
}
else version (X86_64)
{
return __asm!(void *)("movq %rsp, $0", "=r");
return __asm!(void*)("movq %rsp, $0", "=r");
}
else version (AArch64)
{
return __asm!(void *)("mov $0, sp", "=r");
return __asm!(void*)("mov $0, sp", "=r");
}
else version (ARM)
{
return __asm!(void *)("mov $0, sp", "=r");
return __asm!(void*)("mov $0, sp", "=r");
}
else version (PPC)
{
return __asm!(void *)("mr $0, 1", "=r");
return __asm!(void*)("mr $0, 1", "=r");
}
else version (PPC64)
{
return __asm!(void *)("mr $0, 1", "=r");
return __asm!(void*)("mr $0, 1", "=r");
}
else version (MIPS32)
{
return __asm!(void *)(".set noat; move $0, $$sp; .set at", "=r");
return __asm!(void*)(".set noat; move $0, $$sp; .set at", "=r");
}
else version (MIPS64)
{
return __asm!(void *)("move $0, $$sp", "=r");
return __asm!(void*)("move $0, $$sp", "=r");
}
else
{
@@ -3399,7 +3394,11 @@ private void* getStackTop() nothrow @nogc
return llvm_frameaddress(0);
}
}
else version (D_InlineAsm_X86)
}
else
private void* getStackTop() nothrow @nogc
{
version (D_InlineAsm_X86)
asm pure nothrow @nogc { naked; mov EAX, ESP; ret; }
else version (D_InlineAsm_X86_64)
asm pure nothrow @nogc { naked; mov RAX, RSP; ret; }
@@ -3410,22 +3409,24 @@ private void* getStackTop() nothrow @nogc
}


version(LDC_Windows)
{
private void* getStackBottom() nothrow @nogc @naked
{
version(X86)
return __asm!(void*)("mov %fs:(4), $0", "=r");
else version(X86_64)
return __asm!(void*)("mov %gs:0($1), $0", "=r,r", 8);
else
static assert(false, "Architecture not supported.");
}
}
else
private void* getStackBottom() nothrow @nogc
{
version (Windows)
{
version (LDC)
{
// Use LLVM inline assembler to enable inlining.
import ldc.llvmasm;
version (X86)
return __asm!(void*)("movl %fs:(4), $0", "=r");
else version (X86_64)
return __asm!(void*)("movq %gs:0($0), %rax", "={rax},r", 8);
else
static assert(false, "Architecture not supported.");
}
else version (D_InlineAsm_X86)
version (D_InlineAsm_X86)
asm pure nothrow @nogc { naked; mov EAX, FS:4; ret; }
else version(D_InlineAsm_X86_64)
asm pure nothrow @nogc
@@ -3850,7 +3851,7 @@ shared static this()

private
{
extern (C) void fiber_entryPoint() nothrow
extern (C) void fiber_entryPoint() nothrow /* LDC */ @assumeUsed
{
Fiber obj = Fiber.getThis();
assert( obj );
@@ -3879,6 +3880,122 @@ private
// Look above the definition of 'class Fiber' for some information about the implementation of this routine
version( AsmExternal )
extern (C) void fiber_switchContext( void** oldp, void* newp ) nothrow @nogc;
else version( LDC_Windows )
{
extern (C) void fiber_switchContext( void** oldp, void* newp ) nothrow @nogc @naked
{
version(X86)
{
pragma(LDC_never_inline);

__asm(
`// save current stack state
push %ebp
mov %esp, %ebp
push %edi
push %esi
push %ebx
push %fs:(0)
push %fs:(4)
push %fs:(8)
push %eax
// store oldp again with more accurate address
mov 8(%ebp), %eax
mov %esp, (%eax)
// load newp to begin context switch
mov 12(%ebp), %esp
// load saved state from new stack
pop %eax
pop %fs:(8)
pop %fs:(4)
pop %fs:(0)
pop %ebx
pop %esi
pop %edi
pop %ebp
// 'return' to complete switch
pop %ecx
jmp *%ecx`,
"~{memory},~{ebp},~{esp},~{eax},~{ebx},~{ecx},~{esi},~{edi}"
);
}
else version(X86_64)
{
// This inline asm assumes a return address has been pushed onto the stack
// (and so a stack not aligned to 16 bytes).
pragma(LDC_never_inline);

__asm(
`// save current stack state
push %rbp
mov %rsp, %rbp
push %r12
push %r13
push %r14
push %r15
push %rdi
push %rsi
// 7 registers = 56 bytes; stack is now aligned to 16 bytes
sub $$0xA0, %rsp
movdqa %xmm6, 0x90(%rsp)
movdqa %xmm7, 0x80(%rsp)
movdqa %xmm8, 0x70(%rsp)
movdqa %xmm9, 0x60(%rsp)
movdqa %xmm10, 0x50(%rsp)
movdqa %xmm11, 0x40(%rsp)
movdqa %xmm12, 0x30(%rsp)
movdqa %xmm13, 0x20(%rsp)
movdqa %xmm14, 0x10(%rsp)
movdqa %xmm15, (%rsp)
push %rbx
xor %rax, %rax
push %gs:(%rax)
push %gs:8(%rax)
push %gs:16(%rax)
// store oldp
mov %rsp, (%rcx)
// load newp to begin context switch
mov %rdx, %rsp
// load saved state from new stack
pop %gs:16(%rax)
pop %gs:8(%rax)
pop %gs:(%rax)
pop %rbx;
movdqa (%rsp), %xmm15
movdqa 0x10(%rsp), %xmm14
movdqa 0x20(%rsp), %xmm13
movdqa 0x30(%rsp), %xmm12
movdqa 0x40(%rsp), %xmm11
movdqa 0x50(%rsp), %xmm10
movdqa 0x60(%rsp), %xmm9
movdqa 0x70(%rsp), %xmm8
movdqa 0x80(%rsp), %xmm7
movdqa 0x90(%rsp), %xmm6
add $$0xA0, %rsp
pop %rsi
pop %rdi
pop %r15
pop %r14
pop %r13
pop %r12
pop %rbp
// 'return' to complete switch
pop %rcx
jmp *%rcx`,
"~{memory},~{rbp},~{rsp},~{rax},~{rbx},~{rcx},~{rsi},~{rdi},~{r12},~{r13},~{r14},~{r15}," ~
"~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"
);
}
else
static assert(false);
}
}
else
extern (C) void fiber_switchContext( void** oldp, void* newp ) nothrow @nogc
{
@@ -5072,15 +5189,26 @@ private:
__gshared static fp_t finalHandler = null;
if ( finalHandler is null )
{
static EXCEPTION_REGISTRATION* fs0() nothrow
version(LDC)
{
asm pure nothrow @nogc
static EXCEPTION_REGISTRATION* fs0() nothrow @naked
{
naked;
mov EAX, FS:[0];
ret;
return __asm!(EXCEPTION_REGISTRATION*)("mov %fs:(0), $0", "=r");
}
}
else
{
static EXCEPTION_REGISTRATION* fs0() nothrow
{
asm pure nothrow @nogc
{
naked;
mov EAX, FS:[0];
ret;
}
}
}

auto reg = fs0();
while ( reg.next != sehChainEnd ) reg = reg.next;

@@ -5117,15 +5245,31 @@ private:
// to be shifted by 8 bytes for the first call, as fiber_entryPoint
// is an actual function expecting a stack which is not aligned
// to 16 bytes.
static void trampoline()
version(LDC)
{
asm pure nothrow @nogc
static void trampoline() @naked
{
naked;
sub RSP, 32; // Shadow space (Win64 calling convention)
call fiber_entryPoint;
xor RCX, RCX; // This should never be reached, as
jmp RCX; // fiber_entryPoint must never return.
__asm(
`sub $$32, %rsp
call fiber_entryPoint
xor %rcx, %rcx
jmp *%rcx`,
"~{rsp},~{rcx}"
);
}
}
else
{
static void trampoline()
{
asm pure nothrow @nogc
{
naked;
sub RSP, 32; // Shadow space (Win64 calling convention)
call fiber_entryPoint;
xor RCX, RCX; // This should never be reached, as
jmp RCX; // fiber_entryPoint must never return.
}
}
}

@@ -10,6 +10,8 @@ import core.sys.windows.windows;
import core.exception : onOutOfMemoryError, OutOfMemoryError;
import core.stdc.stdlib : malloc, free, abort;
import core.stdc.string : memcpy;
import ldc.attributes;
import ldc.llvmasm;
import rt.util.container.common : xmalloc;

// pointers are image relative for Win64 versions
@@ -325,173 +327,171 @@ extern(C) terminate_handler set_terminate(terminate_handler new_handler);
terminate_handler old_terminate_handler; // explicitely per thread

// helper to access TLS from naked asm
size_t tlsUncaughtExceptions() nothrow
size_t tlsUncaughtExceptions() nothrow @assumeUsed
{
return exceptionStack.length;
}

auto tlsOldTerminateHandler() nothrow
auto tlsOldTerminateHandler() nothrow @assumeUsed
{
return old_terminate_handler;
}

void msvc_eh_terminate() nothrow
void msvc_eh_terminate() nothrow @naked
{
version(Win32)
{
asm nothrow
{
naked;
call tlsUncaughtExceptions;
cmp EAX, 1;
jle L_term;
__asm(
`call __D3ldc7eh_msvc21tlsUncaughtExceptionsFNbZk
cmp $$1, %eax
jle L_term
// hacking into the call chain to return EXCEPTION_EXECUTE_HANDLER
// as the return value of __FrameUnwindFilter so that
// __FrameUnwindToState continues with the next unwind block
// undo one level of exception frames from terminate()
mov EAX,FS:[0];
mov EAX,[EAX];
mov FS:[0], EAX;
mov %fs:(0), %eax
mov (%eax), %eax
mov %eax, %fs:(0)
// assume standard stack frames for callers
mov EAX,EBP; // frame pointer of terminate()
mov EAX,[EAX]; // frame pointer of __FrameUnwindFilter
mov ESP,EAX; // restore stack
pop EBP; // and frame pointer
mov EAX, 1; // return EXCEPTION_EXECUTE_HANDLER
ret;
mov %ebp, %eax // frame pointer of terminate()
mov (%eax), %eax // frame pointer of __FrameUnwindFilter
mov %eax, %esp // restore stack
pop %ebp // and frame pointer
mov $$1, %eax // return EXCEPTION_EXECUTE_HANDLER
ret
L_term:
call tlsOldTerminateHandler;
cmp EAX, 0;
je L_ret;
jmp EAX;
call __D3ldc7eh_msvc22tlsOldTerminateHandlerFNbNiNfZPFZv
cmp $$0, %eax
je L_ret
jmp *%eax
L_ret:
ret;
}
ret`,
"~{memory},~{flags},~{ebp},~{esp},~{eax}"
);
}
else
{
asm nothrow
{
naked;
push RBX; // align stack for better debuggability
call tlsUncaughtExceptions;
cmp RAX, 1;
jle L_term;
__asm(
`push %rbx // align stack for better debuggability
call _D3ldc7eh_msvc21tlsUncaughtExceptionsFNbZm
cmp $$1, %rax
jle L_term
// update stack and IP so we just continue in __FrameUnwindHandler
// NOTE: these checks can fail if you have breakpoints set at
// the respective code locations
mov RAX,[RSP+8]; // get return address
cmp byte ptr[RAX], 0xEB; // jmp?
jne noJump;
movsx RDX, byte ptr[RAX+1]; // follow jmp
lea RAX,[RAX+RDX+2];
mov 8(%rsp), %rax // get return address
cmpb $$0xEB, (%rax) // jmp?
jne noJump
movsbq 1(%rax), %rdx // follow jmp
lea 2(%rax,%rdx), %rax
noJump:
cmp byte ptr[RAX], 0xE8; // call abort?
jne L_term;
add RAX,5;
mov EDX,[RAX];
mov RBX, 0xFFFFFF;
and RDX, RBX;
cmp RDX, 0xC48348; // add ESP,nn (debug UCRT libs)
je L_addESP_found;
cmp DL, 0x90; // nop; (release libs)
jne L_term;
cmpb $$0xE8, (%rax) // call abort?
jne L_term
add $$5, %rax
mov (%rax), %edx
mov $$0xFFFFFF, %rbx
and %rbx, %rdx
cmp $$0xC48348, %rdx // add ESP,nn (debug UCRT libs)
je L_addESP_found
cmp $$0x90, %dl // nop; (release libs)
jne L_term
L_release_ucrt:
mov RDX,[RSP+8];
cmp word ptr[RDX-2], 0xD3FF; // call ebx?
sete BL; // if not, it's UCRT 10.0.14393.0
movzx RBX,BL;
mov RDX, 0x28; // release build of vcruntimelib
jmp L_retTerminate;
mov 8(%rsp), %rdx
cmpw $$0xD3FF, -2(%rdx) // call ebx?
sete %bl // if not, it's UCRT 10.0.14393.0
movzbq %bl, %rbx
mov $$0x28, %rdx // release build of vcruntimelib
jmp L_retTerminate
L_addESP_found:
xor RBX,RBX; // debug version: RBX not pushed inside terminate()
movzx RDX,byte ptr[RAX+3]; // read nn
xor %rbx, %rbx // debug version: RBX not pushed inside terminate()
movzbq 3(%rax), %rdx // read nn
cmp byte ptr [RAX+4], 0xC3; // ret?
jne L_term;
cmpb $$0xC3, 4(%rax) // ret?
jne L_term
L_retTerminate:
lea RDX,[RSP+RDX+0x10]; // RSP before returning from terminate()
lea 0x10(%rsp,%rdx), %rdx // RSP before returning from terminate()
mov RAX,[RDX]; // return address inside __FrameUnwindHandler
mov (%rdx), %rax // return address inside __FrameUnwindHandler
or RDX,RBX; // RDX aligned, save RBX == 0 for UCRT 10.0.14393.0, 1 otherwise
or %rbx, %rdx // RDX aligned, save RBX == 0 for UCRT 10.0.14393.0, 1 otherwise
cmp byte ptr [RAX-19], 0xEB; // skip back to default jump inside "switch" (libvcruntimed.lib)
je L_switchFound;
cmpb $$0xEB, -19(%rax) // skip back to default jump inside "switch" (libvcruntimed.lib)
je L_switchFound
cmp byte ptr [RAX-20], 0xEB; // skip back to default jump inside "switch" (vcruntime140d.dll)
je L_switchFound2;
cmpb $$0xEB, -20(%rax) // skip back to default jump inside "switch" (vcruntime140d.dll)
je L_switchFound2
mov RBX, 0xc48348c0333048ff; // dec [rax+30h]; xor eax,eax; add rsp,nn (libvcruntime.lib)
cmp RBX,[RAX-0x18];
je L_retFound;
mov $$0xC48348C0333048FF, %rbx // dec [rax+30h]; xor eax,eax; add rsp,nn (libvcruntime.lib)
cmp -0x18(%rax), %rbx
je L_retFound
cmp RBX,[RAX+0x29]; // dec [rax+30h]; xor eax,eax; add rsp,nn (vcruntime140.dll)
je L_retVC14_11;
cmp 0x29(%rax), %rbx // dec [rax+30h]; xor eax,eax; add rsp,nn (vcruntime140.dll)
je L_retVC14_11
cmp RBX,[RAX+0x1b]; // dec [rax+30h]; xor eax,eax; add rsp,nn (vcruntime140.dll 14.14.x.y)
jne L_term;
lea RAX, [RAX+0x20];
jmp L_retContinue;

L_retVC14_11: // vcruntime140 14.11.25415.0 or earlier
lea RAX, [RAX+0x2E];
L_retContinue: // vcruntime140 14.00.23026.0 or later?
cmp word ptr[RAX], 0x8348; // add rsp,nn?
je L_xorSkipped;
cmp 0x1B(%rax), %rbx // dec [rax+30h]; xor eax,eax; add rsp,nn (vcruntime140.dll 14.14.x.y)
jne L_term
lea 0x20(%rax), %rax
jmp L_retContinue
inc RAX; // vcruntime140 earlier than 14.00.23026.0?
jmp L_xorSkipped;
L_retVC14_11: // vcruntime140 14.11.25415.0 or earlier
lea 0x2E(%rax), %rax
L_retContinue: // vcruntime140 14.00.23026.0 or later?
cmpw $$0x8348, (%rax) // add rsp,nn?
je L_xorSkipped
inc %rax // vcruntime140 earlier than 14.00.23026.0?
jmp L_xorSkipped
L_retFound:
lea RAX, [RAX-19];
jmp L_xorSkipped;
lea -19(%rax), %rax
jmp L_xorSkipped
L_switchFound2:
dec RAX;
dec %rax
L_switchFound:
movsx RBX, byte ptr [RAX-18]; // follow jump
lea RAX, [RAX+RBX-17];
movsbq -18(%rax), %rbx // follow jump
lea -17(%rax,%rbx), %rax
cmp word ptr[RAX],0xC033; // xor EAX,EAX?
jne L_term;
cmpw $$0xC033, (%rax) // xor EAX,EAX?
jne L_term
add RAX,2;
add $$2, %rax
L_xorSkipped:
mov RBX, RDX; // extract UCRT marker from EDX
and RDX, ~1;
and RBX, 1;
mov %rdx, %rbx // extract UCRT marker from EDX
and $$~1, %rdx
and $$1, %rbx
cmovnz RBX,[RDX-8]; // restore RBX (pushed inside terminate())
cmovz RBX,[RSP]; // RBX not changed in terminate inside UCRT 10.0.14393.0
cmovnz -8(%rdx), %rbx // restore RBX (pushed inside terminate())
cmovz (%rsp), %rbx // RBX not changed in terminate inside UCRT 10.0.14393.0
lea RSP,[RDX+8];
push RAX; // new return after setting return value in __frameUnwindHandler
lea 8(%rdx), %rsp
push %rax // new return after setting return value in __frameUnwindHandler
call __processing_throw;
mov [RAX], 1;
call __processing_throw
movq $$1, (%rax)
//add RSP,0x68; // TODO: needs to be verified for different CRT builds
mov RAX,1; // return EXCEPTION_EXECUTE_HANDLER
ret;
//add $$0x68, %rsp // TODO: needs to be verified for different CRT builds
mov $$1, %rax // return EXCEPTION_EXECUTE_HANDLER
ret
L_term:
call tlsOldTerminateHandler;
pop RBX;
cmp RAX, 0;
je L_ret;
jmp RAX;
call _D3ldc7eh_msvc22tlsOldTerminateHandlerFNbNiNfZPFZv
pop %rbx
cmp $$0, %rax
je L_ret
jmp *%rax
L_ret:
ret;
}
ret`,
"~{memory},~{flags},~{rbp},~{rsp},~{rax},~{rbx},~{rdx}"
);
}
}

@@ -843,7 +843,12 @@ else
{
extern (D) void QueryPerformanceCounter(timer_t* ctr)
{
version (D_InlineAsm_X86)
version (LDC)
{
import ldc.intrinsics: llvm_readcyclecounter;
*ctr = llvm_readcyclecounter();
}
else version (D_InlineAsm_X86)
{
asm
{
@@ -866,11 +871,6 @@ else
ret ;
}
}
else version (LDC)
{
import ldc.intrinsics: llvm_readcyclecounter;
*ctr = llvm_readcyclecounter();
}
else
{
static assert(0);