Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

2141 lines (1751 sloc) 61.996 kb
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team, 1998-2011
*
* Out-of-line primitive operations
*
* This file contains the implementations of all the primitive
* operations ("primops") which are not expanded inline. See
* ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
* this file contains code for most of those with the attribute
* out_of_line=True.
*
* Entry convention: the entry convention for a primop is that all the
* args are in Stg registers (R1, R2, etc.). This is to make writing
* the primops easier. (see compiler/codeGen/CgCallConv.hs).
*
* Return convention: results from a primop are generally returned
* using the ordinary unboxed tuple return convention. The C-- parser
* implements the RET_xxxx() macros to perform unboxed-tuple returns
* based on the prevailing return convention.
*
* This file is written in a subset of C--, extended with various
* features specific to GHC. It is compiled by GHC directly. For the
* syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
*
* ---------------------------------------------------------------------------*/
#include "Cmm.h"
#ifdef __PIC__
import pthread_mutex_lock;
import pthread_mutex_unlock;
#endif
import base_ControlziExceptionziBase_nestedAtomically_closure;
import EnterCriticalSection;
import LeaveCriticalSection;
import ghczmprim_GHCziTypes_False_closure;
#if defined(GhcUnregisterised) || !defined(mingw32_HOST_OS)
import sm_mutex;
#endif
/*-----------------------------------------------------------------------------
Array Primitives
Basically just new*Array - the others are all inline macros.
The size arg is always passed in R1, and the result returned in R1.
The slow entry point is for returning from a heap check, the saved
size argument must be re-loaded from the stack.
-------------------------------------------------------------------------- */
/* for objects that are *less* than the size of a word, make sure we
* round up to the nearest word for the size of the array.
*/
stg_newByteArrayzh
{
W_ words, payload_words, n, p;
MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
n = R1;
payload_words = ROUNDUP_BYTES_TO_WDS(n);
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
#define BA_ALIGN 16
#define BA_MASK (BA_ALIGN-1)
stg_newPinnedByteArrayzh
{
W_ words, n, bytes, payload_words, p;
MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
n = R1;
bytes = n;
/* payload_words is what we will tell the profiler we had to allocate */
payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrWords;
/* And we want to align to BA_ALIGN bytes, so we need to allow space
to shift up to BA_ALIGN - 1 bytes: */
bytes = bytes + BA_ALIGN - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
stg_newAlignedPinnedByteArrayzh
{
W_ words, n, bytes, payload_words, p, alignment;
MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
n = R1;
alignment = R2;
/* we always supply at least word-aligned memory, so there's no
need to allow extra space for alignment if the requirement is less
than a word. This also prevents mischief with alignment == 0. */
if (alignment <= SIZEOF_W) { alignment = 1; }
bytes = n;
/* payload_words is what we will tell the profiler we had to allocate */
payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrWords;
/* And we want to align to <alignment> bytes, so we need to allow space
to shift up to <alignment - 1> bytes: */
bytes = bytes + alignment - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
to <alignment> bytes. Note that we are assuming that
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
stg_newArrayzh
{
W_ words, n, init, arr, p, size;
/* Args: R1 = words, R2 = initialisation value */
n = R1;
MAYBE_GC(R2_PTR,stg_newArrayzh);
// the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
// in the array, making sure we round up, and then rounding up to a whole
// number of words.
size = n + mutArrPtrsCardWords(n);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
// Initialise all elements of the the array with the value in R2
init = R2;
p = arr + SIZEOF_StgMutArrPtrs;
for:
if (p < arr + WDS(words)) {
W_[p] = init;
p = p + WDS(1);
goto for;
}
// Initialise the mark bits with 0
for2:
if (p < arr + WDS(size)) {
W_[p] = 0;
p = p + WDS(1);
goto for2;
}
RET_P(arr);
}
stg_unsafeThawArrayzh
{
// SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
//
// A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
// normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave
// it on the mutable list for the GC to remove (removing something from
// the mutable list is not easy).
//
// So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
// when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
// to indicate that it is still on the mutable list.
//
// So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
// either it is on a mut_list, or it isn't. We adopt the convention that
// the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
// and MUT_ARR_PTRS_FROZEN otherwise. In fact it wouldn't matter if
// we put it on the mutable list more than once, but it would get scavenged
// multiple times during GC, which would be unnecessarily slow.
//
if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
recordMutable(R1, R1);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
RET_P(R1);
} else {
SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
RET_P(R1);
}
}
stg_newArrayArrayzh
{
W_ words, n, arr, p, size;
/* Args: R1 = words */
n = R1;
MAYBE_GC(NO_PTRS,stg_newArrayArrayzh);
// the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
// in the array, making sure we round up, and then rounding up to a whole
// number of words.
size = n + mutArrPtrsCardWords(n);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [];
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
// Initialise all elements of the array with a pointer to the new array
p = arr + SIZEOF_StgMutArrPtrs;
for:
if (p < arr + WDS(words)) {
W_[p] = arr;
p = p + WDS(1);
goto for;
}
// Initialise the mark bits with 0
for2:
if (p < arr + WDS(size)) {
W_[p] = 0;
p = p + WDS(1);
goto for2;
}
RET_P(arr);
}
/* -----------------------------------------------------------------------------
MutVar primitives
-------------------------------------------------------------------------- */
stg_newMutVarzh
{
W_ mv;
/* Args: R1 = initialisation value */
ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = R1;
RET_P(mv);
}
stg_casMutVarzh
/* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */
{
W_ mv, old, new, h;
mv = R1;
old = R2;
new = R3;
(h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var,
old, new) [];
if (h != old) {
RET_NP(1,h);
} else {
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
}
RET_NP(0,h);
}
}
stg_atomicModifyMutVarzh
{
W_ mv, f, z, x, y, r, h;
/* Args: R1 :: MutVar#, R2 :: a -> (a,b) */
/* If x is the current contents of the MutVar#, then
We want to make the new contents point to
(sel_0 (f x))
and the return value is
(sel_1 (f x))
obviously we can share (f x).
z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE)
r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE)
*/
#if MIN_UPD_SIZE > 1
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif
#if MIN_UPD_SIZE > 2
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif
#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
mv = R1;
f = R2;
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
SET_HDR(z, stg_ap_2_upd_info, CCCS);
LDV_RECORD_CREATE(z);
StgThunk_payload(z,0) = f;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
y = z - THUNK_1_SIZE;
SET_HDR(y, stg_sel_0_upd_info, CCCS);
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
r = y - THUNK_1_SIZE;
SET_HDR(r, stg_sel_1_upd_info, CCCS);
LDV_RECORD_CREATE(r);
StgThunk_payload(r,0) = z;
retry:
x = StgMutVar_var(mv);
StgThunk_payload(z,1) = x;
#ifdef THREADED_RTS
(h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
if (h != x) { goto retry; }
#else
StgMutVar_var(mv) = y;
#endif
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
}
RET_P(r);
}
/* -----------------------------------------------------------------------------
Weak Pointer Primitives
-------------------------------------------------------------------------- */
STRING(stg_weak_msg,"New weak pointer at %p\n")
stg_mkWeakzh
{
/* R1 = key
R2 = value
R3 = finalizer (or NULL)
*/
W_ w;
if (R3 == NULL) {
R3 = stg_NO_FINALIZER_closure;
}
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
SET_HDR(w, stg_WEAK_info, CCCS);
// We don't care about cfinalizer here.
// Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
// something else?
StgWeak_key(w) = R1;
StgWeak_value(w) = R2;
StgWeak_finalizer(w) = R3;
StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
ACQUIRE_LOCK(sm_mutex);
StgWeak_link(w) = W_[weak_ptr_list];
W_[weak_ptr_list] = w;
RELEASE_LOCK(sm_mutex);
IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
RET_P(w);
}
stg_mkWeakForeignEnvzh
{
/* R1 = key
R2 = value
R3 = finalizer
R4 = pointer
R5 = has environment (0 or 1)
R6 = environment
*/
W_ w, payload_words, words, p;
W_ key, val, fptr, ptr, flag, eptr;
key = R1;
val = R2;
fptr = R3;
ptr = R4;
flag = R5;
eptr = R6;
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
SET_HDR(w, stg_WEAK_info, CCCS);
payload_words = 4;
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = WDS(payload_words);
StgArrWords_payload(p,0) = fptr;
StgArrWords_payload(p,1) = ptr;
StgArrWords_payload(p,2) = eptr;
StgArrWords_payload(p,3) = flag;
// We don't care about the value here.
// Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
StgWeak_key(w) = key;
StgWeak_value(w) = val;
StgWeak_finalizer(w) = stg_NO_FINALIZER_closure;
StgWeak_cfinalizer(w) = p;
ACQUIRE_LOCK(sm_mutex);
StgWeak_link(w) = W_[weak_ptr_list];
W_[weak_ptr_list] = w;
RELEASE_LOCK(sm_mutex);
IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
RET_P(w);
}
stg_finalizzeWeakzh
{
/* R1 = weak ptr
*/
W_ w, f, arr;
w = R1;
// already dead?
if (GET_INFO(w) == stg_DEAD_WEAK_info) {
RET_NP(0,stg_NO_FINALIZER_closure);
}
// kill it
#ifdef PROFILING
// @LDV profiling
// A weak pointer is inherently used, so we do not need to call
// LDV_recordDead_FILL_SLOP_DYNAMIC():
// LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
// or, LDV_recordDead():
// LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
// Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
// large as weak pointers, so there is no need to fill the slop, either.
// See stg_DEAD_WEAK_info in StgMiscClosures.hc.
#endif
//
// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
//
SET_INFO(w,stg_DEAD_WEAK_info);
LDV_RECORD_CREATE(w);
f = StgWeak_finalizer(w);
arr = StgWeak_cfinalizer(w);
StgDeadWeak_link(w) = StgWeak_link(w);
if (arr != stg_NO_FINALIZER_closure) {
foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
StgArrWords_payload(arr,1),
StgArrWords_payload(arr,2),
StgArrWords_payload(arr,3)) [];
}
/* return the finalizer */
if (f == stg_NO_FINALIZER_closure) {
RET_NP(0,stg_NO_FINALIZER_closure);
} else {
RET_NP(1,f);
}
}
stg_deRefWeakzh
{
/* R1 = weak ptr */
W_ w, code, val;
w = R1;
if (GET_INFO(w) == stg_WEAK_info) {
code = 1;
val = StgWeak_value(w);
} else {
code = 0;
val = w;
}
RET_NP(code,val);
}
/* -----------------------------------------------------------------------------
Floating point operations.
-------------------------------------------------------------------------- */
stg_decodeFloatzuIntzh
{
W_ p;
F_ arg;
W_ mp_tmp1;
W_ mp_tmp_w;
STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
mp_tmp1 = Sp - WDS(1);
mp_tmp_w = Sp - WDS(2);
/* arguments: F1 = Float# */
arg = F1;
/* Perform the operation */
foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
/* returns: (Int# (mantissa), Int# (exponent)) */
RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
}
stg_decodeDoublezu2Intzh
{
D_ arg;
W_ p;
W_ mp_tmp1;
W_ mp_tmp2;
W_ mp_result1;
W_ mp_result2;
STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
mp_tmp1 = Sp - WDS(1);
mp_tmp2 = Sp - WDS(2);
mp_result1 = Sp - WDS(3);
mp_result2 = Sp - WDS(4);
/* arguments: D1 = Double# */
arg = D1;
/* Perform the operation */
foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
mp_result1 "ptr", mp_result2 "ptr",
arg) [];
/* returns:
(Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
}
/* -----------------------------------------------------------------------------
* Concurrency primitives
* -------------------------------------------------------------------------- */
stg_forkzh
{
/* args: R1 = closure to spark */
MAYBE_GC(R1_PTR, stg_forkzh);
W_ closure;
W_ threadid;
closure = R1;
("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
RtsFlags_GcFlags_initialStkSize(RtsFlags),
closure "ptr") [];
/* start blocked if the current thread is blocked */
StgTSO_flags(threadid) = %lobits16(
TO_W_(StgTSO_flags(threadid)) |
TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
// context switch soon, but not immediately: we don't want every
// forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
}
stg_forkOnzh
{
/* args: R1 = cpu, R2 = closure to spark */
MAYBE_GC(R2_PTR, stg_forkOnzh);
W_ cpu;
W_ closure;
W_ threadid;
cpu = R1;
closure = R2;
("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
RtsFlags_GcFlags_initialStkSize(RtsFlags),
closure "ptr") [];
/* start blocked if the current thread is blocked */
StgTSO_flags(threadid) = %lobits16(
TO_W_(StgTSO_flags(threadid)) |
TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
// context switch soon, but not immediately: we don't want every
// forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
}
stg_yieldzh
{
jump stg_yield_noregs;
}
stg_myThreadIdzh
{
/* no args. */
RET_P(CurrentTSO);
}
stg_labelThreadzh
{
/* args:
R1 = ThreadId#
R2 = Addr# */
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
foreign "C" labelThread(MyCapability() "ptr", R1 "ptr", R2 "ptr") [];
#endif
jump %ENTRY_CODE(Sp(0));
}
stg_isCurrentThreadBoundzh
{
/* no args */
W_ r;
(r) = foreign "C" isThreadBound(CurrentTSO) [];
RET_N(r);
}
stg_threadStatuszh
{
/* args: R1 :: ThreadId# */
W_ tso;
W_ why_blocked;
W_ what_next;
W_ ret, cap, locked;
tso = R1;
what_next = TO_W_(StgTSO_what_next(tso));
why_blocked = TO_W_(StgTSO_why_blocked(tso));
// Note: these two reads are not atomic, so they might end up
// being inconsistent. It doesn't matter, since we
// only return one or the other. If we wanted to return the
// contents of block_info too, then we'd have to do some synchronisation.
if (what_next == ThreadComplete) {
ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus
} else {
if (what_next == ThreadKilled) {
ret = 17;
} else {
ret = why_blocked;
}
}
cap = TO_W_(Capability_no(StgTSO_cap(tso)));
if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
locked = 1;
} else {
locked = 0;
}
RET_NNN(ret,cap,locked);
}
/* -----------------------------------------------------------------------------
* TVar primitives
* -------------------------------------------------------------------------- */
#define SP_OFF 0
// Catch retry frame ------------------------------------------------------------
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
W_ unused3, P_ unused4, P_ unused5)
{
W_ r, frame, trec, outer;
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
(r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
if (r != 0) {
/* Succeeded (either first branch or second branch) */
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchRetryFrame;
jump %ENTRY_CODE(Sp(SP_OFF));
} else {
/* Did not commit: re-execute */
W_ new_trec;
("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
StgTSO_trec(CurrentTSO) = new_trec;
if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
R1 = StgCatchRetryFrame_alt_code(frame);
} else {
R1 = StgCatchRetryFrame_first_code(frame);
}
jump stg_ap_v_fast;
}
}
// Atomically frame ------------------------------------------------------------
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
P_ code, P_ next_invariant_to_check, P_ result)
{
W_ frame, trec, valid, next_invariant, q, outer;
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
result = R1;
outer = StgTRecHeader_enclosing_trec(trec);
if (outer == NO_TREC) {
/* First time back at the atomically frame -- pick up invariants */
("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
StgAtomicallyFrame_next_invariant_to_check(frame) = q;
StgAtomicallyFrame_result(frame) = result;
} else {
/* Second/subsequent time back at the atomically frame -- abort the
* tx that's checking the invariant and move on to the next one */
StgTSO_trec(CurrentTSO) = outer;
q = StgAtomicallyFrame_next_invariant_to_check(frame);
StgInvariantCheckQueue_my_execution(q) = trec;
foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
/* Don't free trec -- it's linked from q and will be stashed in the
* invariant if we eventually commit. */
q = StgInvariantCheckQueue_next_queue_entry(q);
StgAtomicallyFrame_next_invariant_to_check(frame) = q;
trec = outer;
}
q = StgAtomicallyFrame_next_invariant_to_check(frame);
if (q != END_INVARIANT_CHECK_QUEUE) {
/* We can't commit yet: another invariant to check */
("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
next_invariant = StgInvariantCheckQueue_invariant(q);
R1 = StgAtomicInvariant_code(next_invariant);
jump stg_ap_v_fast;
} else {
/* We've got no more invariants to check, try to commit */
(valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
if (valid != 0) {
/* Transaction was valid: commit succeeded */
StgTSO_trec(CurrentTSO) = NO_TREC;
R1 = StgAtomicallyFrame_result(frame);
Sp = Sp + SIZEOF_StgAtomicallyFrame;
jump %ENTRY_CODE(Sp(SP_OFF));
} else {
/* Transaction was not valid: try again */
("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
R1 = StgAtomicallyFrame_code(frame);
jump stg_ap_v_fast;
}
}
}
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
P_ code, P_ next_invariant_to_check, P_ result)
{
W_ frame, trec, valid;
frame = Sp;
/* The TSO is currently waiting: should we stop waiting? */
(valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
if (valid != 0) {
/* Previous attempt is still valid: no point trying again yet */
jump stg_block_noregs;
} else {
/* Previous attempt is no longer valid: try again */
("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
StgHeader_info(frame) = stg_atomically_frame_info;
R1 = StgAtomicallyFrame_code(frame);
jump stg_ap_v_fast;
}
}
// STM catch frame --------------------------------------------------------------
#define SP_OFF 0
/* Catch frames are very similar to update frames, but when entering
* one we just pop the frame off the stack and perform the correct
* kind of return to the activation record underneath us on the stack.
*/
INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
P_ unused3, P_ unused4)
{
W_ r, frame, trec, outer;
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
(r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
if (r != 0) {
/* Commit succeeded */
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchSTMFrame;
jump %ENTRY_CODE(Sp(SP_OFF));
} else {
/* Commit failed */
W_ new_trec;
("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
StgTSO_trec(CurrentTSO) = new_trec;
R1 = StgCatchSTMFrame_code(frame);
jump stg_ap_v_fast;
}
}
// Primop definition ------------------------------------------------------------
stg_atomicallyzh
{
W_ frame;
W_ old_trec;
W_ new_trec;
// stmStartTransaction may allocate
MAYBE_GC (R1_PTR, stg_atomicallyzh);
/* Args: R1 = m :: STM a */
STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
old_trec = StgTSO_trec(CurrentTSO);
/* Nested transactions are not allowed; raise an exception */
if (old_trec != NO_TREC) {
R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
jump stg_raisezh;
}
/* Set up the atomically frame */
Sp = Sp - SIZEOF_StgAtomicallyFrame;
frame = Sp;
SET_HDR(frame,stg_atomically_frame_info, CCCS);
StgAtomicallyFrame_code(frame) = R1;
StgAtomicallyFrame_result(frame) = NO_TREC;
StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
/* Start the memory transcation */
("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
StgTSO_trec(CurrentTSO) = new_trec;
/* Apply R1 to the realworld token */
jump stg_ap_v_fast;
}
// A closure representing "atomically x". This is used when a thread
// inside a transaction receives an asynchronous exception; see #5866.
// It is somewhat similar to the stg_raise closure.
//
INFO_TABLE(stg_atomically,1,0,THUNK_1_0,"atomically","atomically")
{
R1 = StgThunk_payload(R1,0);
jump stg_atomicallyzh;
}
stg_catchSTMzh
{
W_ frame;
/* Args: R1 :: STM a */
/* Args: R2 :: Exception -> STM a */
STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
/* Set up the catch frame */
Sp = Sp - SIZEOF_StgCatchSTMFrame;
frame = Sp;
SET_HDR(frame, stg_catch_stm_frame_info, CCCS);
StgCatchSTMFrame_handler(frame) = R2;
StgCatchSTMFrame_code(frame) = R1;
/* Start a nested transaction to run the body of the try block in */
W_ cur_trec;
W_ new_trec;
cur_trec = StgTSO_trec(CurrentTSO);
("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
StgTSO_trec(CurrentTSO) = new_trec;
/* Apply R1 to the realworld token */
jump stg_ap_v_fast;
}
stg_catchRetryzh
{
W_ frame;
W_ new_trec;
W_ trec;
// stmStartTransaction may allocate
MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
/* Args: R1 :: STM a */
/* Args: R2 :: STM a */
STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
/* Start a nested transaction within which to run the first code */
trec = StgTSO_trec(CurrentTSO);
("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
StgTSO_trec(CurrentTSO) = new_trec;
/* Set up the catch-retry frame */
Sp = Sp - SIZEOF_StgCatchRetryFrame;
frame = Sp;
SET_HDR(frame, stg_catch_retry_frame_info, CCCS);
StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
StgCatchRetryFrame_first_code(frame) = R1;
StgCatchRetryFrame_alt_code(frame) = R2;
/* Apply R1 to the realworld token */
jump stg_ap_v_fast;
}
stg_retryzh
{
W_ frame_type;
W_ frame;
W_ trec;
W_ outer;
W_ r;
MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
// Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
retry_pop_stack:
SAVE_THREAD_STATE();
(frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
LOAD_THREAD_STATE();
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
if (frame_type == CATCH_RETRY_FRAME) {
// The retry reaches a CATCH_RETRY_FRAME before the atomic frame
ASSERT(outer != NO_TREC);
// Abort the transaction attempting the current branch
foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
// Retry in the first branch: try the alternative
("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
R1 = StgCatchRetryFrame_alt_code(frame);
jump stg_ap_v_fast;
} else {
// Retry in the alternative code: propagate the retry
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchRetryFrame;
goto retry_pop_stack;
}
}
// We've reached the ATOMICALLY_FRAME: attempt to wait
ASSERT(frame_type == ATOMICALLY_FRAME);
if (outer != NO_TREC) {
// We called retry while checking invariants, so abort the current
// invariant check (merging its TVar accesses into the parents read
// set so we'll wait on them)
foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
trec = outer;
StgTSO_trec(CurrentTSO) = trec;
outer = StgTRecHeader_enclosing_trec(trec);
}
ASSERT(outer == NO_TREC);
(r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
if (r != 0) {
// Transaction was valid: stmWait put us on the TVars' queues, we now block
StgHeader_info(frame) = stg_atomically_waiting_frame_info;
Sp = frame;
// Fix up the stack in the unregisterised case: the return convention is different.
R3 = trec; // passing to stmWaitUnblock()
jump stg_block_stmwait;
} else {
// Transaction was not valid: retry immediately
("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(frame);
Sp = frame;
jump stg_ap_v_fast;
}
}
stg_checkzh
{
W_ trec, closure;
/* Args: R1 = invariant closure */
MAYBE_GC (R1_PTR, stg_checkzh);
trec = StgTSO_trec(CurrentTSO);
closure = R1;
foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
trec "ptr",
closure "ptr") [];
jump %ENTRY_CODE(Sp(0));
}
stg_newTVarzh
{
W_ tv;
W_ new_value;
/* Args: R1 = initialisation value */
MAYBE_GC (R1_PTR, stg_newTVarzh);
new_value = R1;
("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
RET_P(tv);
}
stg_readTVarzh
{
W_ trec;
W_ tvar;
W_ result;
/* Args: R1 = TVar closure */
MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
trec = StgTSO_trec(CurrentTSO);
tvar = R1;
("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
RET_P(result);
}
stg_readTVarIOzh
{
W_ result;
again:
result = StgTVar_current_value(R1);
if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
goto again;
}
RET_P(result);
}
stg_writeTVarzh
{
W_ trec;
W_ tvar;
W_ new_value;
/* Args: R1 = TVar closure */
/* R2 = New value */
MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
trec = StgTSO_trec(CurrentTSO);
tvar = R1;
new_value = R2;
foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
jump %ENTRY_CODE(Sp(0));
}
/* -----------------------------------------------------------------------------
* MVar primitives
*
* take & putMVar work as follows. Firstly, an important invariant:
*
* If the MVar is full, then the blocking queue contains only
* threads blocked on putMVar, and if the MVar is empty then the
* blocking queue contains only threads blocked on takeMVar.
*
* takeMvar:
* MVar empty : then add ourselves to the blocking queue
* MVar full : remove the value from the MVar, and
* blocking queue empty : return
* blocking queue non-empty : perform the first blocked putMVar
* from the queue, and wake up the
* thread (MVar is now full again)
*
* putMVar is just the dual of the above algorithm.
*
* How do we "perform a putMVar"? Well, we have to fiddle around with
* the stack of the thread waiting to do the putMVar. See
* stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
* the stack layout, and the PerformPut and PerformTake macros below.
*
* It is important that a blocked take or put is woken up with the
* take/put already performed, because otherwise there would be a
* small window of vulnerability where the thread could receive an
* exception and never perform its take or put, and we'd end up with a
* deadlock.
*
* -------------------------------------------------------------------------- */
stg_isEmptyMVarzh
{
/* args: R1 = MVar closure */
if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
RET_N(1);
} else {
RET_N(0);
}
}
stg_newMVarzh
{
/* args: none */
W_ mvar;
ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
mvar = Hp - SIZEOF_StgMVar + WDS(1);
SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
RET_P(mvar);
}
#define PerformTake(stack, value) \
W_ sp; \
sp = StgStack_sp(stack); \
W_[sp + WDS(1)] = value; \
W_[sp + WDS(0)] = stg_gc_unpt_r1_info;
#define PerformPut(stack,lval) \
W_ sp; \
sp = StgStack_sp(stack) + WDS(3); \
StgStack_sp(stack) = sp; \
lval = W_[sp - WDS(1)];
stg_takeMVarzh
{
W_ mvar, val, info, tso, q;
/* args: R1 = MVar closure */
mvar = R1;
#if defined(THREADED_RTS)
("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
#else
info = GET_INFO(mvar);
#endif
if (info == stg_MVAR_CLEAN_info) {
foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
}
/* If the MVar is empty, put ourselves on its blocking queue,
* and wait until we're woken up.
*/
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
// Note [mvar-heap-check] We want to do the heap check in the
// branch here, to avoid the conditional in the common case.
// However, we've already locked the MVar above, so we better
// be careful to unlock it again if the the heap check fails.
// Unfortunately we don't have an easy way to inject any code
// into the heap check generated by the code generator, so we
// have to do it in stg_gc_gen (see HeapStackCheck.cmm).
HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0);
CCCS_ALLOC(SIZEOF_StgMVarTSOQueue);
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
} else {
StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
foreign "C" recordClosureMutated(MyCapability() "ptr",
StgMVar_tail(mvar)) [];
}
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgMVar_tail(mvar) = q;
R1 = mvar;
jump stg_block_takemvar;
}
/* we got the value... */
val = StgMVar_value(mvar);
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further putMVars, MVar is now empty */
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_P(val);
}
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
// There are putMVar(s) waiting... wake up the first thread on the queue
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the putMVar for the thread that we just woke up
W_ stack;
stack = StgTSO_stackobj(tso);
PerformPut(stack, StgMVar_value(mvar));
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
// no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_P(val);
}
stg_tryTakeMVarzh
{
W_ mvar, val, info, tso, q;
/* args: R1 = MVar closure */
mvar = R1;
#if defined(THREADED_RTS)
("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
#else
info = GET_INFO(mvar);
#endif
/* If the MVar is empty, put ourselves on its blocking queue,
* and wait until we're woken up.
*/
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
#if defined(THREADED_RTS)
unlockClosure(mvar, info);
#endif
/* HACK: we need a pointer to pass back,
* so we abuse NO_FINALIZER_closure
*/
RET_NP(0, stg_NO_FINALIZER_closure);
}
if (info == stg_MVAR_CLEAN_info) {
foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
}
/* we got the value... */
val = StgMVar_value(mvar);
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further putMVars, MVar is now empty */
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_NP(1, val);
}
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
// There are putMVar(s) waiting... wake up the first thread on the queue
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the putMVar for the thread that we just woke up
W_ stack;
stack = StgTSO_stackobj(tso);
PerformPut(stack, StgMVar_value(mvar));
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
// no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_NP(1,val);
}
stg_putMVarzh
{
W_ mvar, val, info, tso, q;
/* args: R1 = MVar, R2 = value */
mvar = R1;
val = R2;
#if defined(THREADED_RTS)
("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
#else
info = GET_INFO(mvar);
#endif
if (info == stg_MVAR_CLEAN_info) {
foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
}
if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
// see Note [mvar-heap-check] above
HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0);
CCCS_ALLOC(SIZEOF_StgMVarTSOQueue);
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
} else {
StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
foreign "C" recordClosureMutated(MyCapability() "ptr",
StgMVar_tail(mvar)) [];
}
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgMVar_tail(mvar) = q;
R1 = mvar;
R2 = val;
jump stg_block_putmvar;
}
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
StgMVar_value(mvar) = val;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
jump %ENTRY_CODE(Sp(0));
}
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
// There are takeMVar(s) waiting: wake up the first one
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the takeMVar
W_ stack;
stack = StgTSO_stackobj(tso);
PerformTake(stack, val);
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
if (TO_W_(StgStack_dirty(stack)) == 0) {
foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
}
foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
unlockClosure(mvar, stg_MVAR_DIRTY_info);
jump %ENTRY_CODE(Sp(0));
}
stg_tryPutMVarzh
{
W_ mvar, val, info, tso, q;
/* args: R1 = MVar, R2 = value */
mvar = R1;
val = R2;
#if defined(THREADED_RTS)
("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
#else
info = GET_INFO(mvar);
#endif
if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
#if defined(THREADED_RTS)
unlockClosure(mvar, info);
#endif
RET_N(0);
}
if (info == stg_MVAR_CLEAN_info) {
foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
}
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
StgMVar_value(mvar) = val;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_N(1);
}
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
// There are takeMVar(s) waiting: wake up the first one
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the takeMVar
W_ stack;
stack = StgTSO_stackobj(tso);
PerformTake(stack, val);
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
if (TO_W_(StgStack_dirty(stack)) == 0) {
foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
}
foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
unlockClosure(mvar, stg_MVAR_DIRTY_info);
RET_N(1);
}
/* -----------------------------------------------------------------------------
Stable pointer primitives
------------------------------------------------------------------------- */
stg_makeStableNamezh
{
W_ index, sn_obj;
ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
(index) = foreign "C" lookupStableName(R1 "ptr") [];
/* Is there already a StableName for this heap object?
* stable_ptr_table is a pointer to an array of snEntry structs.
*/
if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
}
RET_P(sn_obj);
}
stg_makeStablePtrzh
{
/* Args: R1 = a */
W_ sp;
MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
RET_N(sp);
}
stg_deRefStablePtrzh
{
/* Args: R1 = the stable ptr */
W_ r, sp;
sp = R1;
r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
RET_P(r);
}
/* -----------------------------------------------------------------------------
Bytecode object primitives
------------------------------------------------------------------------- */
stg_newBCOzh
{
/* R1 = instrs
R2 = literals
R3 = ptrs
R4 = arity
R5 = bitmap array
*/
W_ bco, bitmap_arr, bytes, words;
bitmap_arr = R5;
words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
bytes = WDS(words);
ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
bco = Hp - bytes + WDS(1);
SET_HDR(bco, stg_BCO_info, CCCS);
StgBCO_instrs(bco) = R1;
StgBCO_literals(bco) = R2;
StgBCO_ptrs(bco) = R3;
StgBCO_arity(bco) = HALF_W_(R4);
StgBCO_size(bco) = HALF_W_(words);
// Copy the arity/bitmap info into the BCO
W_ i;
i = 0;
for:
if (i < BYTE_ARR_WDS(bitmap_arr)) {
StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
i = i + 1;
goto for;
}
RET_P(bco);
}
stg_mkApUpd0zh
{
// R1 = the BCO# for the AP
//
W_ ap;
// This function is *only* used to wrap zero-arity BCOs in an
// updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always
// saturated and always points directly to a FUN or BCO.
ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
StgBCO_arity(R1) == HALF_W_(0));
HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
TICK_ALLOC_UP_THK(0, 0);
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
SET_HDR(ap, stg_AP_info, CCCS);
StgAP_n_args(ap) = HALF_W_(0);
StgAP_fun(ap) = R1;
RET_P(ap);
}
stg_unpackClosurezh
{
/* args: R1 = closure to analyze */
// TODO: Consider the absence of ptrs or nonptrs as a special case ?
W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
info = %GET_STD_INFO(UNTAG(R1));
// Some closures have non-standard layout, so we omit those here.
W_ type;
type = TO_W_(%INFO_TYPE(info));
switch [0 .. N_CLOSURE_TYPES] type {
case THUNK_SELECTOR : {
ptrs = 1;
nptrs = 0;
goto out;
}
case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
ptrs = 0;
nptrs = 0;
goto out;
}
default: {
ptrs = TO_W_(%INFO_PTRS(info));
nptrs = TO_W_(%INFO_NPTRS(info));
goto out;
}}
out:
W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs);
ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
W_ clos;
clos = UNTAG(R1);
ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS);
StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
p = 0;
for:
if(p < ptrs) {
W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
p = p + 1;
goto for;
}
/* We can leave the card table uninitialised, since the array is
allocated in the nursery. The GC will fill it in if/when the array
is promoted. */
SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
p = 0;
for2:
if(p < nptrs) {
W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
p = p + 1;
goto for2;
}
RET_NPP(info, ptrs_arr, nptrs_arr);
}
/* -----------------------------------------------------------------------------
Thread I/O blocking primitives
-------------------------------------------------------------------------- */
/* Add a thread to the end of the blocked queue. (C-- version of the C
* macro in Schedule.h).
*/
#define APPEND_TO_BLOCKED_QUEUE(tso) \
ASSERT(StgTSO__link(tso) == END_TSO_QUEUE); \
if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \
W_[blocked_queue_hd] = tso; \
} else { \
foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
} \
W_[blocked_queue_tl] = tso;
stg_waitReadzh
{
/* args: R1 */
#ifdef THREADED_RTS
foreign "C" barf("waitRead# on threaded RTS") never returns;
#else
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
StgTSO_block_info(CurrentTSO) = R1;
// No locking - we're not going to use this interface in the
// threaded RTS anyway.
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_noregs;
#endif
}
stg_waitWritezh
{
/* args: R1 */
#ifdef THREADED_RTS
foreign "C" barf("waitWrite# on threaded RTS") never returns;
#else
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
StgTSO_block_info(CurrentTSO) = R1;
// No locking - we're not going to use this interface in the
// threaded RTS anyway.
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_noregs;
#endif
}
STRING(stg_delayzh_malloc_str, "stg_delayzh")
stg_delayzh
{
#ifdef mingw32_HOST_OS
W_ ares;
CInt reqID;
#else
W_ t, prev, target;
#endif
#ifdef THREADED_RTS
foreign "C" barf("delay# on threaded RTS") never returns;
#else
/* args: R1 (microsecond delay amount) */
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
#ifdef mingw32_HOST_OS
/* could probably allocate this on the heap instead */
("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
stg_delayzh_malloc_str);
(reqID) = foreign "C" addDelayRequest(R1);
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
/* Having all async-blocked threads reside on the blocked_queue
* simplifies matters, so change the status to OnDoProc put the
* delayed thread on the blocked_queue.
*/
StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_async_void;
#else
W_ time;
(time) = foreign "C" getourtimeofday() [R1];
// getourtimeofday() returns a value in units of 10ms
// R1 is in microseconds, we need to (/ 10000), rounding up
target = time + 1 + (R1 + 10000-1) / 10000;
StgTSO_block_info(CurrentTSO) = target;
/* Insert the new thread in the sleeping queue. */
prev = NULL;
t = W_[sleeping_queue];
while:
if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
prev = t;
t = StgTSO__link(t);
goto while;
}
StgTSO__link(CurrentTSO) = t;
if (prev == NULL) {
W_[sleeping_queue] = CurrentTSO;
} else {
foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
}
jump stg_block_noregs;
#endif
#endif /* !THREADED_RTS */
}
#ifdef mingw32_HOST_OS
STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
stg_asyncReadzh
{
W_ ares;
CInt reqID;
#ifdef THREADED_RTS
foreign "C" barf("asyncRead# on threaded RTS") never returns;
#else
/* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
/* could probably allocate this on the heap instead */
("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
stg_asyncReadzh_malloc_str)
[R1,R2,R3,R4];
(reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_async;
#endif
}
STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
stg_asyncWritezh
{
W_ ares;
CInt reqID;
#ifdef THREADED_RTS
foreign "C" barf("asyncWrite# on threaded RTS") never returns;
#else
/* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
stg_asyncWritezh_malloc_str)
[R1,R2,R3,R4];
(reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_async;
#endif
}
STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
stg_asyncDoProczh
{
W_ ares;
CInt reqID;
#ifdef THREADED_RTS
foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
#else
/* args: R1 = proc, R2 = param */
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
/* could probably allocate this on the heap instead */
("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
stg_asyncDoProczh_malloc_str)
[R1,R2];
(reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
jump stg_block_async;
#endif
}
#endif
/* -----------------------------------------------------------------------------
* noDuplicate#
*
* noDuplicate# tries to ensure that none of the thunks under
* evaluation by the current thread are also under evaluation by
* another thread. It relies on *both* threads doing noDuplicate#;
* the second one will get blocked if they are duplicating some work.
*
* The idea is that noDuplicate# is used within unsafePerformIO to
* ensure that the IO operation is performed at most once.
* noDuplicate# calls threadPaused which acquires an exclusive lock on
* all the thunks currently under evaluation by the current thread.
*
* Consider the following scenario. There is a thunk A, whose
* evaluation requires evaluating thunk B, where thunk B is an
* unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2
* is pre-empted before it enters B, and claims A by blackholing it
* (in threadPaused). Thread 1 now enters B, and calls noDuplicate#.
*
* thread 1 thread 2
* +-----------+ +---------------+
* | -------+-----> A <-------+------- |
* | update | BLACKHOLE | marked_update |
* +-----------+ +---------------+
* | | | |
* ... ...
* | | +---------------+
* +-----------+
* | ------+-----> B
* | update | BLACKHOLE
* +-----------+
*
* At this point: A is a blackhole, owned by thread 2. noDuplicate#
* calls threadPaused, which walks up the stack and
* - claims B on behalf of thread 1
* - then it reaches the update frame for A, which it sees is already
* a BLACKHOLE and is therefore owned by another thread. Since
* thread 1 is duplicating work, the computation up to the update
* frame for A is suspended, including thunk B.
* - thunk B, which is an unsafePerformIO, has now been reverted to
* an AP_STACK which could be duplicated - BAD!
* - The solution is as follows: before calling threadPaused, we
* leave a frame on the stack (stg_noDuplicate_info) that will call
* noDuplicate# again if the current computation is suspended and
* restarted.
*
* See the test program in concurrent/prog003 for a way to demonstrate
* this. It needs to be run with +RTS -N3 or greater, and the bug
* only manifests occasionally (once very 10 runs or so).
* -------------------------------------------------------------------------- */
INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
{
Sp_adj(1);
jump stg_noDuplicatezh;
}
stg_noDuplicatezh
{
STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
// leave noDuplicate frame in case the current
// computation is suspended and restarted (see above).
Sp_adj(-1);
Sp(0) = stg_noDuplicate_info;
SAVE_THREAD_STATE();
ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
jump stg_threadFinished;
} else {
LOAD_THREAD_STATE();
ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
// remove the stg_noDuplicate frame if it is still there.
if (Sp(0) == stg_noDuplicate_info) {
Sp_adj(1);
}
jump %ENTRY_CODE(Sp(0));
}
}
/* -----------------------------------------------------------------------------
Misc. primitives
-------------------------------------------------------------------------- */
stg_getApStackValzh
{
W_ ap_stack, offset, val, ok;
/* args: R1 = AP_STACK, R2 = offset */
ap_stack = R1;
offset = R2;
if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
ok = 1;
val = StgAP_STACK_payload(ap_stack,offset);
} else {
ok = 0;
val = R1;
}
RET_NP(ok,val);
}
// Write the cost center stack of the first argument on stderr; return
// the second. Possibly only makes sense for already evaluated
// things?
stg_traceCcszh
{
W_ ccs;
#ifdef PROFILING
ccs = StgHeader_ccs(UNTAG(R1));
foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
#endif
R1 = R2;
ENTER();
}
stg_getSparkzh
{
W_ spark;
#ifndef THREADED_RTS
RET_NP(0,ghczmprim_GHCziTypes_False_closure);
#else
(spark) = foreign "C" findSpark(MyCapability());
if (spark != 0) {
RET_NP(1,spark);
} else {
RET_NP(0,ghczmprim_GHCziTypes_False_closure);
}
#endif
}
stg_numSparkszh
{
W_ n;
#ifdef THREADED_RTS
(n) = foreign "C" dequeElements(Capability_sparks(MyCapability()));
#else
n = 0;
#endif
RET_N(n);
}
stg_traceEventzh
{
W_ msg;
msg = R1;
#if defined(TRACING) || defined(DEBUG)
foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
#elif defined(DTRACE)
W_ enabled;
// We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
// RtsProbes.h, but that header file includes unistd.h, which doesn't
// work in Cmm
#if !defined(solaris2_TARGET_OS)
(enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
#else
// Solaris' DTrace can't handle the
// __dtrace_isenabled$HaskellEvent$user__msg$v1
// call above. This call is just for testing whether the user__msg
// probe is enabled, and is here for just performance optimization.
// Since preparation for the probe is not that complex I disable usage of
// this test above for Solaris and enable the probe usage manually
// here. Please note that this does not mean that the probe will be
// used during the runtime! You still need to enable it by consumption
// in your dtrace script as you do with any other probe.
enabled = 1;
#endif
if (enabled != 0) {
foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
}
#endif
jump %ENTRY_CODE(Sp(0));
}
Jump to Line
Something went wrong with that request. Please try again.