/
thread.c
1399 lines (1293 loc) · 54.1 KB
/
thread.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* This software is part of the SBCL system. See the README file for
* more information.
*
* This software is derived from the CMU CL system, which was
* written at Carnegie Mellon University and released into the
* public domain. The software is in the public domain and is
* provided with absolutely no warranty. See the COPYING and CREDITS
* files for more information.
*/
#ifdef __linux__
#define _GNU_SOURCE // for pthread_setname_np()
#endif
#include "genesis/sbcl.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifndef LISP_FEATURE_WIN32
#include <sched.h>
#endif
#include <stddef.h>
#include <errno.h>
#include <sys/types.h>
#ifndef LISP_FEATURE_WIN32
#include <sys/wait.h>
#endif
#include "runtime.h"
#include "validate.h" /* for BINDING_STACK_SIZE etc */
#include "thread.h"
#include "genesis/thread.h"
#include "arch.h"
#include "target-arch-os.h"
#include "os.h"
#include "globals.h"
#include "genesis/cons.h"
#include "genesis/symbol.h"
#include "genesis/instance.h"
#include "genesis/vector.h"
#include "interr.h" /* for lose() */
#include "gc.h"
#include "pseudo-atomic.h"
#include "interrupt.h"
#include "lispregs.h"
#include "atomiclog.inc"
#ifdef LISP_FEATURE_SB_THREAD
#if defined LISP_FEATURE_OPENBSD || defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_DRAGONFLY
#include <pthread_np.h>
#endif
#ifdef LISP_FEATURE_SUNOS
#include <thread.h>
#endif
#endif
int dynamic_values_bytes = 4096 * sizeof(lispobj); // same for all threads
// exposed to lisp for pthread_create if not C_STACK_IS_CONTROL_STACK
os_vm_size_t thread_alien_stack_size = ALIEN_STACK_SIZE;
struct thread *all_threads;
#ifdef LISP_FEATURE_SB_THREAD
#ifdef LISP_FEATURE_GCC_TLS
__thread struct thread *current_thread;
#elif !defined LISP_FEATURE_WIN32
pthread_key_t current_thread = 0;
#endif
#ifdef LISP_FEATURE_WIN32
CRITICAL_SECTION all_threads_lock;
static CRITICAL_SECTION recyclebin_lock;
static CRITICAL_SECTION in_gc_lock;
#else
pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t recyclebin_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t in_gc_lock = PTHREAD_MUTEX_INITIALIZER;
#endif
#endif
#if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs);
#endif
static void
link_thread(struct thread *th)
{
if (all_threads) all_threads->prev=th;
th->next=all_threads;
th->prev=0;
all_threads=th;
}
#ifdef LISP_FEATURE_SB_THREAD
static void
unlink_thread(struct thread *th)
{
if (th->prev)
th->prev->next = th->next;
else
all_threads = th->next;
if (th->next)
th->next->prev = th->prev;
}
/* Not safe in general, but if your thread names are all
* simple-base-string and won't move, this is slightly ok */
char* vm_thread_name(struct thread* th)
{
if (!th) return "non-lisp";
struct thread_instance *lispthread = (void*)INSTANCE(th->lisp_thread);
lispobj name = lispthread->_name;
if (simple_base_string_p(name)) return vector_sap(name);
return "?";
}
#define get_thread_state(thread) \
(int)__sync_val_compare_and_swap(&thread->state_word.state, -1, -1)
#ifndef LISP_FEATURE_SB_SAFEPOINT
void
set_thread_state(struct thread *thread,
char state,
bool signals_already_blocked) // for foreign thread
{
struct extra_thread_data *semaphores = thread_extra_data(thread);
int i, waitcount = 0;
sigset_t old;
// If we've already masked the blockable signals we can avoid two syscalls here.
if (!signals_already_blocked)
block_blockable_signals(&old);
os_sem_wait(&semaphores->state_sem);
if (thread->state_word.state != state) {
if ((STATE_STOPPED==state) ||
(STATE_DEAD==state)) {
waitcount = semaphores->state_not_running_waitcount;
semaphores->state_not_running_waitcount = 0;
for (i=0; i<waitcount; i++)
os_sem_post(&semaphores->state_not_running_sem);
}
if ((STATE_RUNNING==state) ||
(STATE_DEAD==state)) {
waitcount = semaphores->state_not_stopped_waitcount;
semaphores->state_not_stopped_waitcount = 0;
for (i=0; i<waitcount; i++)
os_sem_post(&semaphores->state_not_stopped_sem);
}
thread->state_word.state = state;
}
os_sem_post(&semaphores->state_sem);
if (!signals_already_blocked)
thread_sigmask(SIG_SETMASK, &old, NULL);
}
// Wait until "thread's" state is something other than 'undesired_state'
// and return whatever the new state is.
int thread_wait_until_not(int undesired_state,
struct thread *thread)
{
struct extra_thread_data *semaphores = thread_extra_data(thread);
sigset_t old;
os_sem_t *wait_sem;
block_blockable_signals(&old);
start:
os_sem_wait(&semaphores->state_sem);
/* "The following functions synchronize memory with respect to other threads:
* ... pthread_mutex_lock() ... "
* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_11
* But we still have to ensure no compiler reordering.
*/
int ending_state = get_thread_state(thread);
if (ending_state == undesired_state) {
switch (undesired_state) {
case STATE_RUNNING:
wait_sem = &semaphores->state_not_running_sem;
semaphores->state_not_running_waitcount++;
break;
case STATE_STOPPED:
wait_sem = &semaphores->state_not_stopped_sem;
semaphores->state_not_stopped_waitcount++;
break;
default:
lose("thread_wait_until_not: invalid argument %x", ending_state);
}
} else {
wait_sem = NULL;
}
os_sem_post(&semaphores->state_sem);
if (wait_sem) {
os_sem_wait(wait_sem);
goto start;
}
thread_sigmask(SIG_SETMASK, &old, NULL);
return ending_state;
}
#endif /* sb-safepoint */
#endif /* sb-thread */
#ifdef LISP_FEATURE_WIN32
#define sb_GetTID() GetCurrentThreadId()
#elif defined __linux__
// gettid() was added in glibc 2.30 but we support older glibc
int sb_GetTID() { return syscall(SYS_gettid); }
#elif defined __DragonFly__
#include <sys/lwp.h>
lwpid_t sb_GetTID() { return lwp_gettid(); }
#elif defined __FreeBSD__
#include <sys/thr.h>
int sb_GetTID()
{
long id;
thr_self(&id);
// man thr_self(2) says: the thread identifier is an integer in the range
// from PID_MAX + 2 (100001) to INT_MAX. So casting to int is safe.
return (int)id;
}
#elif defined __OpenBSD__
int sb_GetTID()
{
return getthrid();
}
#elif defined __APPLE__ && defined LISP_FEATURE_SB_THREAD
int sb_GetTID() {
return pthread_mach_thread_np(pthread_self());
}
#else
#define sb_GetTID() 0
#endif
/* Our futex-based lisp mutex needs an OS-assigned unique ID.
* Why not use pthread_self? I think the reason is that that on linux,
* the TID is 4 bytes, and the futex lock word is 4 bytes.
* If the unique ID needed 8 bytes, there could be spurious aliasing
* that would make the code behave incorrectly. */
static int get_nonzero_tid()
{
int tid = sb_GetTID();
#ifdef LISP_FEATURE_SB_FUTEX
// If no futexes, don't need or want to assert that the TID is valid.
// (macOS etc)
gc_assert(tid != 0);
#endif
return tid;
}
// Because creation is synchronized by *MAKE-THREAD-LOCK*
// we only need a single 'attributes' object.
#if defined LISP_FEATURE_SB_THREAD && !defined LISP_FEATURE_WIN32
pthread_attr_t new_lisp_thread_attr;
#define init_shared_attr_object() (pthread_attr_init(&new_lisp_thread_attr)==0)
#else
#define init_shared_attr_object() (1)
#endif
struct thread *alloc_thread_struct(void*);
#ifdef LISP_FEATURE_WIN32
#define ASSOCIATE_OS_THREAD(thread) \
DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), \
GetCurrentProcess(), (LPHANDLE)&thread->os_thread, 0, TRUE, \
DUPLICATE_SAME_ACCESS)
#elif defined LISP_FEATURE_GS_SEG
#include <asm/prctl.h>
#include <sys/prctl.h>
extern int arch_prctl(int code, unsigned long *addr);
#define ASSOCIATE_OS_THREAD(thread) arch_prctl(ARCH_SET_GS, (uword_t*)thread), \
thread->os_thread = thread_self()
#else
#define ASSOCIATE_OS_THREAD(thread) thread->os_thread = thread_self()
#endif
#ifndef LISP_FEATURE_SB_THREAD
# define ASSIGN_CURRENT_THREAD(dummy)
#elif defined LISP_FEATURE_GCC_TLS
# define ASSIGN_CURRENT_THREAD(x) current_thread = x
#elif !defined LISP_FEATURE_WIN32
# define ASSIGN_CURRENT_THREAD(x) pthread_setspecific(current_thread, x)
#else
# define ASSIGN_CURRENT_THREAD(x) TlsSetValue(OUR_TLS_INDEX, x)
#endif
#ifdef LISP_FEATURE_WIN32
// Need a function callable from assembly code, where the inline one won't do.
void* read_current_thread() {
return get_sb_vm_thread();
}
#endif
#if defined LISP_FEATURE_DARWIN && defined LISP_FEATURE_SB_THREAD
extern pthread_key_t ignore_stop_for_gc;
#endif
#if !defined COLLECT_GC_STATS && !defined STANDALONE_LDB && \
defined LISP_FEATURE_LINUX && defined LISP_FEATURE_SB_THREAD && defined LISP_FEATURE_64_BIT
#define COLLECT_GC_STATS
#endif
#ifdef COLLECT_GC_STATS
__attribute__((unused)) static struct timespec gc_start_time;
__attribute__((unused)) static long stw_elapsed,
stw_min_duration = LONG_MAX, stw_max_duration, stw_sum_duration,
gc_min_duration = LONG_MAX, gc_max_duration, gc_sum_duration;
int show_gc_stats, n_gcs_done;
static void summarize_gc_stats(void) {
// TODO: also collect things like number of root pages,bytes scanned
// and number of pages,bytes copied on average per GC cycle.
if (show_gc_stats && n_gcs_done)
fprintf(stderr,
"\nGC: stw_delay=%ld,%ld,%ld \u00B5s (min,avg,max) pause=%ld,%ld,%ld \u00B5s (sum=%ld) over %d GCs\n",
stw_min_duration/1000, stw_sum_duration/n_gcs_done/1000, stw_max_duration/1000,
gc_min_duration/1000, gc_sum_duration/n_gcs_done/1000, gc_max_duration/1000,
gc_sum_duration/1000, n_gcs_done);
}
void reset_gc_stats() { // after sb-posix:fork
stw_min_duration = LONG_MAX; stw_max_duration = stw_sum_duration = 0;
gc_min_duration = LONG_MAX; gc_max_duration = gc_sum_duration = 0;
n_gcs_done = 0;
show_gc_stats = 1; // won't show if never called reset
}
#endif
#ifdef ATOMIC_LOGGING
#define THREAD_NAME_MAP_MAX 20 /* KLUDGE */
struct {
pthread_t thread;
char *name; // strdup'ed
} thread_name_map[THREAD_NAME_MAP_MAX];
int thread_name_map_count;
char* thread_name_from_pthread(pthread_t pointer){
int i;
for(i=0; i<thread_name_map_count; ++i)
if (thread_name_map[i].thread == pointer) return thread_name_map[i].name;
return 0;
}
#endif
void create_main_lisp_thread(lispobj function) {
#ifdef LISP_FEATURE_WIN32
InitializeCriticalSection(&all_threads_lock);
InitializeCriticalSection(&recyclebin_lock);
InitializeCriticalSection(&in_gc_lock);
#endif
struct thread *th = alloc_thread_struct(0);
if (!th || arch_os_thread_init(th)==0 || !init_shared_attr_object())
lose("can't create initial thread");
th->state_word.sprof_enable = 1;
#if defined LISP_FEATURE_SB_THREAD && !defined LISP_FEATURE_GCC_TLS && !defined LISP_FEATURE_WIN32
pthread_key_create(¤t_thread, 0);
#endif
#if defined LISP_FEATURE_DARWIN && defined LISP_FEATURE_SB_THREAD
pthread_key_create(&ignore_stop_for_gc, 0);
#endif
#if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
__attribute__((unused)) lispobj *args = NULL;
#endif
ASSOCIATE_OS_THREAD(th);
ASSIGN_CURRENT_THREAD(th);
#if defined THREADS_USING_GCSIGNAL && \
(defined LISP_FEATURE_PPC || defined LISP_FEATURE_PPC64 || defined LISP_FEATURE_ARM64 || defined LISP_FEATURE_RISCV)
/* SIG_STOP_FOR_GC defaults to blocked on PPC? */
unblock_gc_stop_signal();
#endif
link_thread(th);
th->os_kernel_tid = get_nonzero_tid();
#ifndef LISP_FEATURE_WIN32
protect_control_stack_hard_guard_page(1, NULL);
#endif
protect_binding_stack_hard_guard_page(1, NULL);
protect_alien_stack_hard_guard_page(1, NULL);
#ifndef LISP_FEATURE_WIN32
protect_control_stack_guard_page(1, NULL);
#endif
protect_binding_stack_guard_page(1, NULL);
protect_alien_stack_guard_page(1, NULL);
#if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_X86_64)
set_thread_stack(th->control_stack_end);
#endif
#ifdef COLLECT_GC_STATS
atexit(summarize_gc_stats);
#endif
/* WIN32 has a special stack arrangement, calling
* call_into_lisp_first_time will put the new stack in the middle
* of the current stack */
#if !(defined(LISP_FEATURE_WIN32) && !defined(OS_THREAD_STACK)) \
&& (defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64))
call_into_lisp_first_time(function,args,0);
#else
funcall0(function);
#endif
// If we end up returning, clean up the initial thread.
#ifdef LISP_FEATURE_SB_THREAD
unlink_thread(th);
#else
all_threads = NULL;
#endif
arch_os_thread_cleanup(th);
ASSIGN_CURRENT_THREAD(NULL);
}
#ifdef LISP_FEATURE_SB_THREAD
void free_thread_struct(struct thread *th)
{
struct extra_thread_data *extra_data = thread_extra_data(th);
if (extra_data->arena_savearea) free(extra_data->arena_savearea);
os_deallocate((os_vm_address_t) th->os_address, THREAD_STRUCT_SIZE);
}
/* Note: scribble must be stack-allocated */
static void
init_new_thread(struct thread *th,
init_thread_data __attribute__((unused)) *scribble,
int guardp)
{
ASSIGN_CURRENT_THREAD(th);
if(arch_os_thread_init(th)==0) {
/* FIXME: handle error */
lose("arch_os_thread_init failed");
}
#define GUARD_CONTROL_STACK 1
#define GUARD_BINDING_STACK 2
#define GUARD_ALIEN_STACK 4
#ifndef LISP_FEATURE_WIN32
if (guardp & GUARD_CONTROL_STACK)
protect_control_stack_guard_page(1, NULL);
#endif
if (guardp & GUARD_BINDING_STACK)
protect_binding_stack_guard_page(1, NULL);
if (guardp & GUARD_ALIEN_STACK)
protect_alien_stack_guard_page(1, NULL);
/* Since GC can only know about this thread from the all_threads
* list and we're just adding this thread to it, there is no
* danger of deadlocking even with SIG_STOP_FOR_GC blocked (which
* it is not). */
#ifdef LISP_FEATURE_SB_SAFEPOINT
csp_around_foreign_call(th) = (lispobj)scribble;
#endif
__attribute__((unused)) int lock_ret = mutex_acquire(&all_threads_lock);
gc_assert(lock_ret);
link_thread(th);
ignore_value(mutex_release(&all_threads_lock));
/* Kludge: Changed the order of some steps between the safepoint/
* non-safepoint versions of this code. Can we unify this more?
*/
#ifdef LISP_FEATURE_SB_SAFEPOINT
WITH_GC_STATE_LOCK {
gc_state_wait(GC_NONE);
}
push_gcing_safety(&scribble->safety);
#endif
}
static void
unregister_thread(struct thread *th,
init_thread_data __attribute__((unused)) *scribble)
{
block_blockable_signals(0);
gc_close_thread_regions(th, LOCK_PAGE_TABLE|CONSUME_REMAINDER);
#ifdef LISP_FEATURE_SB_SAFEPOINT
pop_gcing_safety(&scribble->safety);
#else
/* This state change serves to "acknowledge" any stop-the-world
* signal received while the STOP_FOR_GC signal is blocked */
set_thread_state(th, STATE_DEAD, 1);
#endif
/* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
* thread, but since we are either exiting lisp code as a lisp
* thread that is dying, or exiting lisp code to return to
* former status as a C thread, it won't wait long. */
__attribute__((unused)) int lock_ret = mutex_acquire(&all_threads_lock);
gc_assert(lock_ret);
unlink_thread(th);
lock_ret = mutex_release(&all_threads_lock);
gc_assert(lock_ret);
arch_os_thread_cleanup(th);
__attribute__((unused)) struct extra_thread_data *semaphores = thread_extra_data(th);
#ifdef LISP_FEATURE_UNIX
os_sem_destroy(&semaphores->sprof_sem);
#endif
#ifndef LISP_FEATURE_SB_SAFEPOINT
os_sem_destroy(&semaphores->state_sem);
os_sem_destroy(&semaphores->state_not_running_sem);
os_sem_destroy(&semaphores->state_not_stopped_sem);
#endif
#if defined(LISP_FEATURE_WIN32)
int i;
for (i = 0; i<NUM_PRIVATE_EVENTS; ++i)
CloseHandle(thread_private_events(th,i));
#endif
/* Undo the association of the current pthread to its `struct thread',
* such that we can call get_sb_vm_thread() later in this
* thread and cleanly get back NULL. */
/* FIXME: what if, after we blocked signals, someone uses INTERRUPT-THREAD
* on this thread? It's no longer a lisp thread; I suspect the signal
* will be redirected to a lisp thread.
* Can anything else go wrong with other signals? Nothing else should
* direct signals specifically to this thread. Per-process signals are ok
* because the kernel picks a thread in which a signal isn't blocked */
ASSIGN_CURRENT_THREAD(NULL);
}
/* this is the first thing that runs in the child (which is why the
* silly calling convention). Basically it calls the user's requested
* lisp function after doing arch_os_thread_init and whatever other
* bookkeeping needs to be done
*/
#ifdef LISP_FEATURE_WIN32
__stdcall unsigned int new_thread_trampoline(LPVOID arg)
#else
void* new_thread_trampoline(void* arg)
#endif
{
struct thread* th = arg;
ASSOCIATE_OS_THREAD(th);
#ifdef LISP_FEATURE_SB_SAFEPOINT
init_thread_data scribble;
// This "scribble" thing is really quite pointless because the original sigset_t
// was passed in the thread's startup info (unless no signals at all were blocked).
// And when terminating, why does anyone care what the signal mask was???
// Well, there's a big "however": '&scribble' is no mere pass-by-reference arg-
// it is actually used as an approximation of the C stack pointer.
#define SCRIBBLE &scribble
#else
#define SCRIBBLE 0
#endif
// 'th->lisp_thread' remains valid despite not being in all_threads
// due to the pinning via *STARTING-THREADS*.
struct thread_instance *lispthread = (void*)native_pointer(th->lisp_thread);
if (lispthread->_ephemeral_p == LISP_T) th->state_word.user_thread_p = 0;
#ifdef ATOMIC_LOGGING
char* string = strdup((char*)VECTOR(name)->data); // FIXME: no such var as 'name'
int index = __sync_fetch_and_add(&thread_name_map_count, 1);
gc_assert(index < THREAD_NAME_MAP_MAX);
thread_name_map[index].thread = pthread_self();
thread_name_map[index].name = string;
#endif
struct vector* startup_info = VECTOR(lispthread->startup_info); // 'lispthread' is pinned
gc_assert(header_widetag(startup_info->header) == SIMPLE_VECTOR_WIDETAG);
lispobj startfun = startup_info->data[0]; // 'startup_info' is pinned
gc_assert(functionp(startfun));
// GC can benefit from knowing the _effective_ end of the ambiguous root range.
// Nothing at a higher address than &arg needs to be scanned for ambiguous roots.
// For x86 + linux this optimization skips over about 800 words in the stack scan,
// and for x86-64 it skip about 550 words as observed via:
// fprintf(stderr, "%d non-lisp stack words\n",
// (int)((lispobj*)th->control_stack_end - (lispobj*)&arg));
// ADDRESS_SANITIZER doesn't allow this optimization.
// Both of these assertions fail with the sanitizer enabled:
// gc_assert(th->control_stack_start <= (lispobj*)&arg
// && (lispobj*)&arg <= th->control_stack_end);
// gc_assert(th->control_stack_start <= (lispobj*)&startup_info
// && (lispobj*)&startup_info <= th->control_stack_end);
// It seems to subvert the "&" and "*" operators in a way that only it understands,
// while the stack pointer register is unperturbed.
// (gencgc takes '&raise' for the current thread, but it disables the sanitizers)
//
// A stop-for-GC signal that hits after init_new_thread() releases the all_threads lock
// and returns control here needs to see in the interrupt context a stack pointer
// strictly below the computed th->control_stack_end. So make sure the value we pick
// is strictly above any value of SP that the interrupt context could have.
#if defined LISP_FEATURE_C_STACK_IS_CONTROL_STACK && !defined ADDRESS_SANITIZER \
&& !defined LISP_FEATURE_SB_SAFEPOINT
th->control_stack_end = (lispobj*)&arg + 1;
#endif
th->os_kernel_tid = get_nonzero_tid();
init_new_thread(th, SCRIBBLE, 0);
// Passing the untagged pointer ensures 2 things:
// - that the pinning mechanism works as designed, and not just by accident.
// - that the initial stack does not contain a lisp pointer after it is not needed.
// (a regression test asserts that not even a THREAD instance is on the stack)
funcall1(startfun, (lispobj)lispthread); // both pinned
// Close the GC region and unlink from all_threads
unregister_thread(th, SCRIBBLE);
return 0;
}
// This receives a VECTOR-SAP
void sb_set_os_thread_name(char* name)
{
__attribute__((unused)) struct vector* v = (void*)(name - offsetof(struct vector,data));
/* Potentially set the externally-visible name of this thread,
* and for a whole pile of crazy, look at get_max_thread_name_length_impl() in
* https://github.com/llvm-mirror/llvm/blob/394ea6522c69c2668bf328fc923e1a11cd785265/lib/Support/Unix/Threading.inc
* which among other things, suggests that Linux might not even have the syscall */
#ifdef LISP_FEATURE_LINUX
/* "The thread name is a meaningful C language string, whose length is
* restricted to 16 characters, including the terminating null byte ('\0').
* The pthread_setname_np() function can fail with the following error:
* ERANGE The length of the string ... exceeds the allowed limit." */
if (vector_len(v) <= 15) pthread_setname_np(pthread_self(), name);
#endif
#ifdef LISP_FEATURE_NETBSD
/* This constant is an upper bound on the length including the NUL.
* Exceeding it will fail the call. It happens to be 32.
* Also, don't want to printf-format a name containing a '%' */
if (vector_len(v) < PTHREAD_MAX_NAMELEN_NP) pthread_setname_np(pthread_self(), "%s", name);
#endif
#if defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_OPENBSD
/* Some places document that the length limit is either 16 or 32,
* but my testing showed that 12.1 seems to accept any length */
pthread_set_name_np(pthread_self(), name);
#endif
#if defined LISP_FEATURE_DARWIN && !defined LISP_FEATURE_AVOID_PTHREAD_SETNAME_NP
if (vector_len(v) < 64) pthread_setname_np(name);
#endif
}
#ifdef LISP_FEATURE_OS_THREAD_STACK
extern void* funcall1_switching_stack(void*, void *(*fun)(void *));
void* new_thread_trampoline_switch_stack(void* th) {
return funcall1_switching_stack(th, new_thread_trampoline);
}
#endif
static struct thread* recyclebin_threads;
static struct thread* get_recyclebin_item()
{
struct thread* result = 0;
__attribute__((unused)) int rc = mutex_acquire(&recyclebin_lock);
gc_assert(rc);
if (recyclebin_threads) {
result = recyclebin_threads;
recyclebin_threads = result->next;
}
ignore_value(mutex_release(&recyclebin_lock));
return result ? result->os_address : 0;
}
static void put_recyclebin_item(struct thread* th)
{
__attribute__((unused)) int rc = mutex_acquire(&recyclebin_lock);
gc_assert(rc);
th->next = recyclebin_threads;
recyclebin_threads = th;
ignore_value(mutex_release(&recyclebin_lock));
}
void empty_thread_recyclebin()
{
if (!recyclebin_threads) return;
sigset_t old;
block_deferrable_signals(&old);
// no big deal if already locked (recursive GC?)
if (TryEnterCriticalSection(&recyclebin_lock)) {
struct thread* this = recyclebin_threads;
while (this) {
struct thread* next = this->next;
free_thread_struct(this);
this = next;
}
recyclebin_threads = 0;
ignore_value(mutex_release(&recyclebin_lock));
}
thread_sigmask(SIG_SETMASK, &old, 0);
}
static void attach_os_thread(init_thread_data *scribble)
{
#ifndef LISP_FEATURE_WIN32 // native threads have no signal maskk
block_deferrable_signals(&scribble->oldset);
#endif
void* recycled_memory = get_recyclebin_item();
struct thread *th = alloc_thread_struct(recycled_memory);
#ifndef LISP_FEATURE_SB_SAFEPOINT
/* new-lisp-thread-trampoline doesn't like when the GC signal is blocked */
/* FIXME: could be done using a single call to pthread_sigmask
together with blocking the deferrable signals above. */
unblock_gc_stop_signal();
#endif
th->os_kernel_tid = get_nonzero_tid();
/* win32: While ASSOCIATE_OS_THREAD performs a relatively expensive DuplicateHandle(),
* simplicity here is preferable to the complexity entailed by memoizing the handle
* in a TLS slot and registering a waiter on the foreign thread to close to handle.
* In contrast to the previous approach, the new handle is closed in detach_os_thread(),
* and if C calls lisp again in this thread... then lather, rinse, repeat.
* A benchmark based on 'fcb-threads.impure' shows that we're still 8x faster
* at callback entry than the code as it was prior to git rev 91f86339b4 */
ASSOCIATE_OS_THREAD(th);
#if !defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_C_STACK_IS_CONTROL_STACK)
/* On windows, arch_os_thread_init will take care of finding the
* stack. */
void *stack_addr;
size_t stack_size;
# ifdef LISP_FEATURE_OPENBSD
stack_t stack;
pthread_stackseg_np(th->os_thread, &stack);
stack_size = stack.ss_size;
stack_addr = (void*)((size_t)stack.ss_sp - stack_size);
# elif defined LISP_FEATURE_SUNOS
stack_t stack;
thr_stksegment(&stack);
stack_size = stack.ss_size;
stack_addr = (void*)((size_t)stack.ss_sp - stack_size);
# elif defined(LISP_FEATURE_DARWIN)
stack_size = pthread_get_stacksize_np(th->os_thread);
stack_addr = (char*)pthread_get_stackaddr_np(th->os_thread) - stack_size;
# else
pthread_attr_t attr;
pthread_attr_init(&attr);
# if defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_DRAGONFLY
pthread_attr_get_np(th->os_thread, &attr);
# else
int pthread_getattr_np(pthread_t, pthread_attr_t *);
pthread_getattr_np(th->os_thread, &attr);
# endif
pthread_attr_getstack(&attr, &stack_addr, &stack_size);
pthread_attr_destroy(&attr);
# endif
th->control_stack_start = stack_addr;
th->control_stack_end = (void *) (((uintptr_t) stack_addr) + stack_size);
#endif
/* We don't protect the control stack when adopting a foreign thread
* because we wouldn't know where to put the guard */
init_new_thread(th, scribble,
/* recycled memory already had mprotect() done,
* so avoid 2 syscalls when possible */
recycled_memory ? 0 : GUARD_BINDING_STACK|GUARD_ALIEN_STACK);
}
static void detach_os_thread(init_thread_data *scribble)
{
struct thread *th = get_sb_vm_thread();
#if defined(LISP_FEATURE_WIN32)
CloseHandle((HANDLE)th->os_thread);
#endif
unregister_thread(th, scribble);
/* We have to clear a STOP_FOR_GC signal if pending. Consider:
* - on entry to unregister_thread, we block all signals
* - simultaneously some other thread decides that it needs to initiate a GC
* - that thread observes that this thread exists in all_threads and sends
* STOP_FOR_GC, so it becomes pending but undeliverable in this thread
* - immediately after blocking signals, we change state to DEAD,
* which allows the GCing thread to ignore this thread
* (it sees the state change criterion as having been satisfied)
* - the GCing thread releases the all_threads lock
* - this thread acquires the lock and removes itself from all_threads,
* and indicates that it is no longer a lisp thread
* - but STOP_FOR_GC is pending because it was in the blocked set.
* Bad things happen unless we clear the pending GC signal.
*/
#if !defined LISP_FEATURE_SB_SAFEPOINT
sigset_t pending;
sigpending(&pending);
if (sigismember(&pending, SIG_STOP_FOR_GC)) {
#ifdef LISP_FEATURE_DARWIN
/* sigwait is not reliable on macOS, but sigsuspend is. It unfortunately
* requires that the signal be delivered, so set a flag to ignore it.
* If you don't believe the preceding statement, try enabling the other
* branch of this #ifdef and running fcb-threads.impure.lisp which will
* sporadically fail with "Can't handle sig31 in non-lisp thread".
* So either sigpending was sometimes lying (hence we didn't try to clear
* the signal), or else sigwait did not dequeue the signal. Clearly the
* latter must be true, because if only the former were true, then we
* would also see the test fail with sigsuspend */
sigset_t blockmask;
sigfillset(&blockmask);
sigdelset(&blockmask, SIG_STOP_FOR_GC);
pthread_setspecific(ignore_stop_for_gc, (void*)1);
/* sigsuspend takes the mask of signals to block */
sigsuspend(&blockmask);
pthread_setspecific(ignore_stop_for_gc, 0);
sigpending(&pending);
if (sigismember(&pending, SIG_STOP_FOR_GC)) lose("clear stop-for-GC did not work");
#else
__attribute__((unused)) int sig, rc;
/* sigwait takes the mask of signals to allow through */
rc = sigwait(&gc_sigset, &sig);
gc_assert(rc == 0 && sig == SIG_STOP_FOR_GC);
#endif
}
#endif
put_recyclebin_item(th);
#ifndef LISP_FEATURE_WIN32 // native threads have no signal mask
thread_sigmask(SIG_SETMASK, &scribble->oldset, 0);
#endif
}
#if defined(LISP_FEATURE_X86_64) && !defined(LISP_FEATURE_WIN32)
extern void funcall_alien_callback(lispobj arg1, lispobj arg2, lispobj arg0,
struct thread* thread)
__attribute__((sysv_abi));
#endif
/* This function's address is assigned into a static symbol's value slot,
* so it has to look like a fixnum. lp#1991485 */
void __attribute__((aligned(8)))
callback_wrapper_trampoline(
#if !(defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64))
/* On the x86oid backends, the assembly wrapper happens to not pass
* in ENTER_ALIEN_CALLBACK explicitly for safepoints. However, the
* platforms with precise GC are tricky enough already, and I want
* to minimize the read-time conditionals. For those platforms, I'm
* only replacing funcall3 with callback_wrapper_trampoline while
* keeping the arguments unchanged. --DFL */
lispobj __attribute__((__unused__)) fun,
#endif
lispobj arg0, lispobj arg1, lispobj arg2)
{
struct thread* th = get_sb_vm_thread();
if (!th) { /* callback invoked in non-lisp thread */
init_thread_data scribble;
attach_os_thread(&scribble);
WITH_GC_AT_SAFEPOINTS_ONLY()
{
funcall3(StaticSymbolFunction(ENTER_FOREIGN_CALLBACK), arg0,arg1,arg2);
}
detach_os_thread(&scribble);
return;
}
#ifdef LISP_FEATURE_WIN32
/* arg2 is the pointer to a return value, which sits on the stack */
thread_extra_data(th)->carried_base_pointer = (os_context_register_t) *(((void**)arg2)-1);
#endif
WITH_GC_AT_SAFEPOINTS_ONLY()
{
#if defined(LISP_FEATURE_X86_64) && !defined(LISP_FEATURE_WIN32)
funcall_alien_callback(arg1, arg2, arg0, th);
#else
funcall3(StaticSymbolFunction(ENTER_ALIEN_CALLBACK), arg0,arg1,arg2);
#endif
}
}
#endif /* LISP_FEATURE_SB_THREAD */
/* this is called from any other thread to create the new one, and
* initialize all parts of it that can be initialized from another
* thread
*
* The allocated memory will be laid out as depicted below.
* Left-to-right is in order of lowest to highest address:
*
* ______ spaces as obtained from OS
* / ___ aligned_spaces
* / /
* (0) (1) (2) (3) (4) (5) (6)
* | | CONTROL | BINDING | ALIEN | CSP | thread | |
* | | STACK | STACK | STACK | PAGE | structure | altstack |
* |...|------------------------------------------------------------|
* 2MiB 1MiB 1MiB (*) (**)
*
* | Lisp TLS | (**) altstack |
* |-----------------------------------|----------|--------------|
* | thread + struct + dynamically | extra | sigstack |
* | header thread assigned TLS | data | |
* +---------+-------------------------|----------+--------------|
* | | <--- TLS_SIZE words --> | ~1kb | 32*SIGSTKSZ |
* ^ thread base
*
* (1) = control stack start. default size shown
* (2) = binding stack start. size = BINDING_STACK_SIZE
* (3) = alien stack start. size = ALIEN_STACK_SIZE
* (4) = C safepoint page. size = BACKEND_PAGE_BYTES or 0
* (5) = per_thread_data. size = (THREAD_HEADER_SLOTS+TLS_SIZE) words
* (6) = arbitrarily-sized "extra" data and signal stack.
*
* (0) and (1) may coincide; (4) and (5) may coincide
*
* - Lisp TLS overlaps 'struct thread' so that the first N (~30) words
* have preassigned TLS indices.
*
* - "extra" data are not in 'struct thread' because placing them there
* makes it tough to calculate addresses in 'struct thread' from Lisp.
* (Every 'struct thread' slot has a known size)
*
* On sb-safepoint builds one page before the thread base is used for the foreign calls safepoint.
*/
struct thread *
alloc_thread_struct(void* spaces) {
/* Allocate the thread structure in one fell swoop as there is no way to recover
* from failing to obtain contiguous memory. Note that the OS may have a smaller
* alignment granularity than BACKEND_PAGE_BYTES so we may have to adjust the
* result to make it conform to our guard page alignment requirement. */
bool zeroize_stack = 0;
if (spaces) {
// If reusing memory from a previously exited thread, start by removing
// some old junk from the stack. This is imperfect since we only clear a little
// at the top, but doing so enables diagnosing some garbage-retention issues
// using a fine-toothed comb. It would not be possible at all to diagnose
// if any newly started thread could refer a dead thread's heap objects.
zeroize_stack = 1;
} else {
spaces = os_alloc_gc_space(THREAD_STRUCT_CORE_SPACE_ID, MOVABLE,
NULL, THREAD_STRUCT_SIZE);
if (!spaces) return NULL;
}
/* Aligning up is safe as THREAD_STRUCT_SIZE has
* THREAD_ALIGNMENT_BYTES padding. */
char *aligned_spaces = PTR_ALIGN_UP(spaces, THREAD_ALIGNMENT_BYTES);
char* csp_page = aligned_spaces + thread_control_stack_size +
BINDING_STACK_SIZE + ALIEN_STACK_SIZE;
// Refer to the ASCII art in the block comment above
struct thread *th = (void*)(csp_page + THREAD_CSP_PAGE_SIZE
+ THREAD_HEADER_SLOTS*N_WORD_BYTES);
#ifdef LISP_FEATURE_SB_SAFEPOINT
// Out of caution I'm supposing that the last thread to use this memory
// might have left this page as read-only. Could it? I have no idea.
os_protect(csp_page, THREAD_CSP_PAGE_SIZE, OS_VM_PROT_READ|OS_VM_PROT_WRITE);
#endif
#ifdef LISP_FEATURE_SB_THREAD
memset(th, 0, sizeof *th);
lispobj* ptr = (lispobj*)(th + 1);
lispobj* end = (lispobj*)((char*)th + dynamic_values_bytes);
memset(ptr, NO_TLS_VALUE_MARKER & 0xFF, (char*)end-(char*)ptr);
th->tls_size = dynamic_values_bytes;
#endif
__attribute((unused)) lispobj* tls = (lispobj*)th;
#ifdef THREAD_T_NIL_CONSTANTS_SLOT
tls[THREAD_T_NIL_CONSTANTS_SLOT] = (NIL << 32) | LISP_T;
#endif
#ifdef THREAD_ALIEN_LINKAGE_TABLE_BASE_SLOT
tls[THREAD_ALIEN_LINKAGE_TABLE_BASE_SLOT] = (lispobj)ALIEN_LINKAGE_TABLE_SPACE_START;
#endif
#if defined LISP_FEATURE_X86_64 && defined LISP_FEATURE_LINUX
tls[THREAD_MSAN_XOR_CONSTANT_SLOT] = 0x500000000000;
#endif
#ifdef LAYOUT_OF_FUNCTION
tls[THREAD_FUNCTION_LAYOUT_SLOT] = LAYOUT_OF_FUNCTION << 32;
#endif
#ifdef THREAD_TEXT_CARD_MARKS_SLOT
extern unsigned int* text_page_touched_bits;
tls[THREAD_TEXT_SPACE_ADDR_SLOT] = TEXT_SPACE_START;
tls[THREAD_TEXT_CARD_COUNT_SLOT] = text_space_size / IMMOBILE_CARD_BYTES;
tls[THREAD_TEXT_CARD_MARKS_SLOT] = (lispobj)text_page_touched_bits;
#endif
th->os_address = spaces;
th->control_stack_start = (lispobj*)aligned_spaces;
th->binding_stack_start=
(lispobj*)((char*)th->control_stack_start+thread_control_stack_size);
th->control_stack_end = th->binding_stack_start;
if (zeroize_stack) {
#if GENCGC_IS_PRECISE
/* Clear the entire control stack. Without this I was able to induce a GC failure
* in a test which hammered on thread creation for hours. The control stack is
* scavenged before the heap, so a stale word could point to the start (or middle)
* of an object using a bad lowtag, for whatever object formerly was there.
* Then a wrong transport function would be called and (if it worked at all) would
* place a wrongly tagged FP into a word that might not be the base of an object.
* Assume for simplicity (as is true) that stacks grow upward if GENCGC_IS_PRECISE.
* This could just call scrub_thread_control_stack but the comment there says that
* it's a lame algorithm and only mostly right - it stops after (1<<12) words
* and checks if the next is nonzero, looping again if it isn't.
* There's no reason not to be exactly right here instead of probably right */
memset((char*)th->control_stack_start, 0,
// take off 2 pages because of the soft and hard guard pages
thread_control_stack_size - 2*os_vm_page_size);
#else
/* This is a little wasteful of cycles to pre-zero the pthread overhead (which in glibc
* resides at the highest stack addresses) comprising about 5kb, below which is the lisp
* stack. We don't need to zeroize above the lisp stack end, but we don't know exactly
* where that will be. Zeroizing more than necessary is conservative, and helps ensure
* that garbage retention from reused stacks does not pose a huge problem. */
memset((char*)th->control_stack_end - 16384, 0, 16384);
#endif
}
th->state_word.control_stack_guard_page_protected = 1;
th->alien_stack_start=
(lispobj*)((char*)th->binding_stack_start+BINDING_STACK_SIZE);
set_binding_stack_pointer(th,th->binding_stack_start);
th->this = th;
th->os_kernel_tid = 0;
th->os_thread = 0;
// Once allocated, the allocation profiling buffer sticks around.