/
gc.c
3532 lines (3048 loc) · 129 KB
/
gc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* gc.c
* Copyright 1984-2017 Cisco Systems, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sort.h"
#ifndef WIN32
#include <sys/wait.h>
#endif /* WIN32 */
#include "popcount.h"
#include <assert.h>
/*
GC Implementation
-----------------
The copying, sweeping, and marking operations that depend on
object's shape are mostly implemented in "mkgc.ss". That script
generates "gc-ocd.inc" (for modes where object counting and
backpointers are disabled), "gc-oce.inc", and "gc-par.inc". The
rest of the implementation here can still depend on representation
details, though, especially for pairs, weak pairs, and ephemerons.
GC Copying versus Marking
-------------------------
Generations range from 0 to `S_G.max_nonstatic_generation` plus a
static generation. After an object moves to the static generation,
it doesn't move anymore. In the case of code objects, relocations
may be discarded when the code object moves into a static
generation.
For the most part, collecting generations 0 through MAX_CG (= max
copied generation) to MIN_TG to MAX_TG (= target generation) means
copying objects from old segments into fresh segments generations
MIN_TG through MAX_TG. Note that MAX_TG is either the same as or
one larger than MAX_CG. For objects in generation 0 through MAX_CG,
the target generation is either one more than the current
generation or it's MIN_TG.
Objects might be marked [and swept] instead of copied [and swept]
as triggered by two possibilities: one or more objects on the
source segment are immobile (subsumes locked) or MAX_CG == MAX_TG
and the object is on a MAX_CG segment that hasn't been discovered as
sparse by a previous marking (non-copying) pass. Segments with
marked objects are promoted to the target generation.
As a special case, locking on `space_new` does not mark all objects
on that segment, because dirty-write handling cannot deal with
`space_new`; only locked objects stay on the old segment in that
case, and they have to be marked by looking at a list of locked
objects.
During a collection, the `old_space` flag is set on a segment if
objects are being copied out of it or marked on it; that is,
`old_space` is set if the segment starts out in one of the
generations 0 through mgc. If a segment is being marked instead of
copied, the `use_marks` bit is also set; note that the bit will not
be set for a `space_new` segment, and locked objects in that space
will be specially marked.
Marking an object means setting a bit in `marked_mask`, which is
allocated as needed. Any segments that ends up with a non-NULL
`marked_mask` is kept in its new generation at the end of
collection. If a marked object spans multiple segments, then
`masked_mask` is created across all of the segments. It's possible
for a segment to end up with `marked_mask` even though `use_marks`
was not set: an marked object spanned into the segment, or it's a
`space_new` segment with locked objects; in that case, other
objects will be copied out of the segment, because `use_marks` is
how relocation decides whether to copy or mark.
If an object is copied, then its first word is set to
`forward_marker` and its second word is set to the new address.
Obviously, that doesn't happen if an object is marked. So, to test
whether an object has been reached:
* the object must be in an `old_space` segment, otherwise it counts
as reached because it's in a generation older than MAX_CG;
* the object either starts with `forward_marker` or its mark bit is
set (and those are mutually exclusive).
Besides the one bit for the start of an object in the mark mask,
extra bits for the object content may be set as well. Those extra
bits tell the dirty-object sweeper which words in a previously
marked page should be swept and which should be skipped, so the
extra bits are only needed for impure objects in certain kinds of
spaces. Only every alternate word needs to be marked that way, so
half of the mark bits are usually irrelevant; the exception is that
flonums can be between normal object-start positions, so those mark
bits can matter, at least if we're preserving `eq?` on flonums (but
the bits are not relevant to dirty-object sweeping, since flonums
don't have pointer fields).
It's ok to sweep an object multiple times, but that's to be be
avoided if possible.
Pending Ephemerons and Guardians
--------------------------------
Ephemerons and guardians act as a kind of "and": an object stays
reachable only if some other object (besides the the
ephemeron/guardian itself) is reachable or not. Instead of
rechecking all guardians and ephemerons constantly, the collector
queues pending guardians and ephemerons on the segment where the
relevant object lives. If any object on that segment is discovered
to be reachable (i.e., copied or marked), the guardian/ephemeron is
put into a list of things to check again.
Parallel Collection
-------------------
Parallel mode runs `sweep_generation` concurrently in multiple
sweeper threads. It relies on a number of invariants:
* There are no attempts to take tc_mutex during sweeping. To the
degree that locking is needed (e.g., to allocate new segments),
the allocation mutex is used. No other locks can be taken while
that one is held.
Along similar lines, get_thread_context() must not be used,
because the sweepers threads are not the same as Scheme threads,
and a sweeper thread may temporarily adapt a different Scheme
thread context.
* To copy from or mark on a segment, a sweeper must own the
segment. A sweeper during sweeping may encounter a "remote"
reference to a segment that it doesn't own; in that case, it
registers the object containing the remote reference to be
re-swept by the sweeper that owns the target of the reference.
A segment is owned by the thread that originally allocated it.
When a GC starts, for old-space segments that are owned by
threads that do no have a corresponding sweeper, the segment is
moved to the main collecting thread's ownership.
Note that copying and marking are constrained so that they don't
have to recursively copy or mark. In some cases, this property
is achieved by not caring whether a reference goes to an old
copy or unmarked object; for example, a record type's size field
will be the same in both places, so either copy can be used to
determine a record size of copying. A record type's parent field
would not be available, however, since it can get overwritten
with forwarding information.
* An object that is marked does not count as "remote".
Sweepers might attempt to access marked-object information at
the same time that it is being updated by the owning sweeper.
It's ok if the non-owning sweepers get stale information;
they'll just send the referencing object to the owning thread
for re-sweeping. A write fence ensures that non-owning sweepers
do not inspect mark-bitmap bits that have not been initialized.
* Normally, a sweeper that encounters a remote reference can
continue sweeping and eventually register the remote re-sweep.
An object is swept by only one sweeper at a time; if multiple
remote references to different sweepers are discovered in an
object, it is sent to only one of the remote sweepers, and that
sweeper will eventually send on the object to the other sweeper.
At worst, each object is swept N times for N sweepers.
In rare cases, a sweeper cannot fully process an object, because
doing so would require inspecting a remote object. For example,
a record type's pointer mask or a stack frame's live-pointer
mask can be a bignum, and the bignum might be remote. In those
cases, the object might have to be sent back to the original
sweeper, and so on. In the worst case, the object can be swept
more than N times ---- but, again, this case rarely happens at
all, and sweeping more than N times is very unlikely.
* In counting/backtrace/measure mode, "parallel" collection can be
used to preserve object ownership, but no extra sweeper threads
are used. So, it is not really parallel, and counting and
backtrace operations do not need locks.
Counting needs to copy or mark a record-type or object-count
object as part of a copy or mark operation, which is otherwise
not allowed (but ok with counting, since it's not actually in
parallel). For that purpose, `relocate_pure_in_owner`
temporarily switches to the owning thread.
*/
/* locally defined functions */
static IGEN copy(thread_gc *tgc, ptr pp, seginfo *si, ptr *dest);
static IGEN mark_object(thread_gc *tgc, ptr pp, seginfo *si);
static void sweep(thread_gc *tgc, ptr p, IGEN from_g);
static void sweep_in_old(thread_gc *tgc, ptr p);
static void sweep_object_in_old(thread_gc *tgc, ptr p);
static IBOOL object_directly_refers_to_self(ptr p);
static ptr copy_stack(thread_gc *tgc, ptr old, iptr *length, iptr clength);
static void resweep_weak_pairs(thread_gc *tgc, seginfo *oldweakspacesegments);
static void forward_or_bwp(thread_gc *tgc, IGEN from_g, ptr *pp, ptr p);
static void sweep_generation(thread_gc *tgc);
static iptr sweep_from_stack(thread_gc *tgc);
static void enlarge_stack(thread_gc *tgc, ptr *stack, ptr *stack_start, ptr *stack_limit, uptr grow_at_least);
static uptr size_object(ptr p);
static iptr sweep_typed_object(thread_gc *tgc, ptr p, IGEN from_g);
static void sweep_symbol(thread_gc *tgc, ptr p, IGEN from_g);
static void sweep_port(thread_gc *tgc, ptr p, IGEN from_g);
static void sweep_thread(thread_gc *tgc, ptr p);
static void sweep_continuation(thread_gc *tgc, ptr p, IGEN from_g);
static void sweep_record(thread_gc *tgc, ptr x, IGEN from_g);
static IGEN sweep_dirty_record(thread_gc *tgc, ptr x, IGEN youngest);
static IGEN sweep_dirty_port(thread_gc *tgc, ptr x, IGEN youngest);
static IGEN sweep_dirty_symbol(thread_gc *tgc, ptr x, IGEN youngest);
static void sweep_code_object(thread_gc *tgc, ptr co, IGEN from_g);
static void record_dirty_segment(IGEN from_g, IGEN to_g, seginfo *si);
static void setup_sweep_dirty(thread_gc *tgc);
static uptr sweep_dirty_segments(thread_gc *tgc, seginfo **dirty_segments);
static void resweep_dirty_weak_pairs(thread_gc *tgc);
static void mark_untyped_data_object(thread_gc *tgc, ptr p, uptr len, seginfo *si);
static void add_pending_guardian(ptr gdn, ptr tconc);
static void add_trigger_guardians_to_recheck(ptr ls);
static void add_ephemeron_to_pending(thread_gc *tgc, ptr p);
static void add_trigger_ephemerons_to_pending(thread_gc *tgc, ptr p);
static void check_triggers(thread_gc *tgc, seginfo *si);
static void check_ephemeron(thread_gc *tgc, ptr pe);
static void check_pending_ephemerons(thread_gc *tgc);
static int check_dirty_ephemeron(thread_gc *tgc, ptr pe, int youngest);
static void finish_pending_ephemerons(thread_gc *tgc, seginfo *si);
static void init_fully_marked_mask(thread_gc *tgc, IGEN g);
static void copy_and_clear_list_bits(thread_gc *tgc, seginfo *oldspacesegments);
#ifdef ENABLE_OBJECT_COUNTS
static uptr total_size_so_far();
static uptr list_length(ptr ls);
#endif
static uptr target_generation_space_so_far(thread_gc *tgc);
#ifdef ENABLE_MEASURE
static void init_measure(thread_gc *tgc, IGEN min_gen, IGEN max_gen);
static void finish_measure();
static void measure(thread_gc *tgc, ptr p);
static void flush_measure_stack(thread_gc *tgc);
static void init_measure_mask(thread_gc *tgc, seginfo *si);
static void init_counting_mask(thread_gc *tgc, seginfo *si);
static void push_measure(thread_gc *tgc, ptr p);
static void measure_add_stack_size(ptr stack, uptr size);
static void add_ephemeron_to_pending_measure(thread_gc *tgc, ptr pe);
static void add_trigger_ephemerons_to_pending_measure(ptr pe);
static void check_ephemeron_measure(thread_gc *tgc, ptr pe);
static void check_pending_measure_ephemerons(thread_gc *tgc);
#endif
#ifdef ENABLE_PARALLEL
/* # define ENABLE_TIMING */
#endif
#ifdef ENABLE_TIMING
#include <sys/time.h>
static uptr get_real_time () {
struct timeval now;
gettimeofday(&now, NULL);
return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_usec) / 1000;
}
static uptr get_cpu_time () {
struct timespec now;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_nsec) / 1000000;
}
# define GET_REAL_TIME(x) uptr x = get_real_time()
# define GET_CPU_TIME(x) uptr x = get_cpu_time()
# define ACCUM_REAL_TIME(a, y, x) uptr y = get_real_time() - x; a += y
# define ACCUM_CPU_TIME(a, y, x) uptr y = get_cpu_time() - x; a += y
# define REPORT_TIME(e) e
static uptr collect_accum, all_accum, par_accum;
# define COUNT_SWEPT_BYTES(start, end) num_swept_bytes += ((uptr)TO_PTR(end) - (uptr)TO_PTR(start))
# define ADJUST_COUNTER(e) e
#else
# define GET_REAL_TIME(x) do { } while (0)
# define GET_CPU_TIME(x) do { } while (0)
# define ACCUM_REAL_TIME(a, y, x) do { } while (0)
# define ACCUM_CPU_TIME(a, y, x) do { } while (0)
# define REPORT_TIME(e) do { } while (0)
# define COUNT_SWEPT_BYTES(start, end) do { } while (0)
# define ADJUST_COUNTER(e) do { } while (0)
#endif
#if defined(MIN_TG) && defined(MAX_TG)
# if MIN_TG == MAX_TG
# define NO_DIRTY_NEWSPACE_POINTERS
# endif
#endif
/* #define DEBUG */
/* initialized and used each gc cycle. any others should be defined in globals.h */
static ptr tlcs_to_rehash;
static ptr conts_to_promote;
static ptr recheck_guardians_ls;
static seginfo *resweep_weak_segments;
#ifdef ENABLE_OBJECT_COUNTS
static int measure_all_enabled;
static uptr count_root_bytes;
#endif
/* max_cg: maximum copied generation, i.e., maximum generation subject to collection. max_cg >= 0 && max_cg <= static_generation.
* min_tg: minimum target generation. max_tg == 0 ? min_tg == 0 : min_tg > 0 && min_tg <= max_tg;
* max_tg: maximum target generation. max_tg == max_cg || max_tg == max_cg + 1.
* Objects in generation g are collected into generation MIN(max_tg, MAX(min_tg, g+1)).
*/
#if defined(MAX_CG) && defined(MIN_TG) && defined(MAX_TG)
#else
static IGEN MAX_CG, MIN_TG, MAX_TG;
#endif
#if defined(MIN_TG) && defined(MAX_TG) && (MIN_TG == MAX_TG)
# define TARGET_GENERATION(si) MIN_TG
# define compute_target_generation(g) MIN_TG
# define CONSTANT_TARGET_GENERATION
#else
# define TARGET_GENERATION(si) si->generation
FORCEINLINE IGEN compute_target_generation(IGEN g) {
return g == MAX_TG ? g : g < MIN_TG ? MIN_TG : g + 1;
}
#endif
static octet *fully_marked_mask[static_generation+1];
static const int sweep_stack_min_size = 256;
#define push_sweep(p) do { \
if (tgc->sweep_stack == tgc->sweep_stack_limit) \
enlarge_stack(tgc, &tgc->sweep_stack, &tgc->sweep_stack_start, &tgc->sweep_stack_limit, ptr_bytes); \
*(ptr *)TO_VOIDP(tgc->sweep_stack) = p; \
tgc->sweep_stack = (ptr)((uptr)tgc->sweep_stack + ptr_bytes); \
} while (0)
#ifdef ENABLE_MEASURE
static uptr measure_total; /* updated by `measure` */
static IGEN min_measure_generation, max_measure_generation;
static ptr *measure_stack_start, *measure_stack, *measure_stack_limit;
static ptr measured_seginfos;
static ptr pending_measure_ephemerons;
#endif
#ifdef ENABLE_BACKREFERENCE
static ptr sweep_from;
# define BACKREFERENCES_ENABLED S_G.enable_object_backreferences
# define SET_SWEEP_FROM(p) if (S_G.enable_object_backreferences) sweep_from = p
# define WITH_TOP_BACKREFERENCE(v, e) SET_SWEEP_FROM(v); e; SET_SWEEP_FROM(Sfalse)
# define SET_BACKREFERENCE(p) sweep_from = p
# define PUSH_BACKREFERENCE(p) ptr old_sweep_from = sweep_from; SET_SWEEP_FROM(p);
# define POP_BACKREFERENCE() SET_SWEEP_FROM(old_sweep_from);
# define ADD_BACKREFERENCE_FROM(p, from_p, tg) do { \
IGEN TG = tg; \
if ((S_G.enable_object_backreferences) && (TG < static_generation)) \
S_G.gcbackreference[TG] = S_cons_in(tgc->tc, space_impure, TG, \
S_cons_in(tgc->tc, space_impure, TG, p, from_p), \
S_G.gcbackreference[TG]); \
} while (0)
# define ADD_BACKREFERENCE(p, tg) ADD_BACKREFERENCE_FROM(p, sweep_from, tg)
#else
# define BACKREFERENCES_ENABLED 0
# define WITH_TOP_BACKREFERENCE(v, e) e
# define SET_BACKREFERENCE(p) do { } while (0)
# define PUSH_BACKREFERENCE(p)
# define POP_BACKREFERENCE()
# define ADD_BACKREFERENCE_FROM(p, from_p, from_g)
# define ADD_BACKREFERENCE(p, from_g)
#endif
#if !defined(PTHREADS)
# undef ENABLE_PARALLEL
#endif
#ifdef ENABLE_PARALLEL
static int in_parallel_sweepers = 0;
#define HAS_SWEEPER_WRT(t_tc, tc) 1
# define GC_MUTEX_ACQUIRE() alloc_mutex_acquire()
# define GC_MUTEX_RELEASE() alloc_mutex_release()
/* shadows `tgc` binding in context: */
# define BLOCK_SET_THREAD(a_tgc) thread_gc *tgc = a_tgc
# define SEGMENT_IS_LOCAL(si, p) (((si)->creator == tgc) || marked(si, p) || !in_parallel_sweepers)
# define FLUSH_REMOTE_BLOCK thread_gc *remote_tgc = NULL;
# define RECORD_REMOTE(si) remote_tgc = si->creator
# define FLUSH_REMOTE(tgc, p) do { \
if (remote_tgc != NULL) \
push_remote_sweep(tgc, p, remote_tgc); \
} while (0)
# define ASSERT_EMPTY_FLUSH_REMOTE() do { \
if (remote_tgc != NULL) S_error_abort("non-empty remote flush"); \
} while (0);
static void setup_sweepers(thread_gc *tgc);
static void run_sweepers(void);
static void teardown_sweepers(void);
# define parallel_sweep_generation(tgc) run_sweepers()
# define parallel_sweep_dirty_and_generation(tgc) run_sweepers()
static void push_remote_sweep(thread_gc *tgc, ptr p, thread_gc *remote_tgc);
static void send_and_receive_remote_sweeps(thread_gc *tgc);
#define SWEEPER_NONE 0
#define SWEEPER_READY 1
#define SWEEPER_SWEEPING 2
#define SWEEPER_WAITING_FOR_WORK 3
typedef struct {
int status;
s_thread_cond_t done_cond, work_cond;
thread_gc *first_tgc, *last_tgc;
iptr num_swept_bytes;
#ifdef ENABLE_TIMING
int remotes_sent, remotes_received;
uptr step, sweep_accum;
#endif
} gc_sweeper;
static gc_sweeper sweepers[maximum_parallel_collect_threads+1];
static int num_sweepers;
# define PARALLEL_UNUSED UNUSED
# define NO_PARALLEL_UNUSED /* empty */
#else
#define HAS_SWEEPER_WRT(t_tc, tc) (t_tc == tc)
# define GC_MUTEX_ACQUIRE() do { } while (0)
# define GC_MUTEX_RELEASE() do { } while (0)
# define BLOCK_SET_THREAD(a_tgc) do { } while (0)
# define SEGMENT_IS_LOCAL(si, p) 1
# define FLUSH_REMOTE_BLOCK /* empty */
# define RECORD_REMOTE(si) do { } while (0)
# define FLUSH_REMOTE(tgc, p) do { } while (0)
# define ASSERT_EMPTY_FLUSH_REMOTE() do { } while (0)
# define setup_sweepers(tgc) do { } while (0)
# define parallel_sweep_generation(tgc) do { sweep_generation(tgc); } while (0)
# define parallel_sweep_dirty_and_generation(tgc) do { sweep_dirty(tgc); sweep_generation(tgc); } while (0)
# define send_and_receive_remote_sweeps(tgc) do { } while (0)
# define teardown_sweepers() do { } while (0)
static void sweep_dirty(thread_gc *tgc);
# define PARALLEL_UNUSED /* empty */
# define NO_PARALLEL_UNUSED UNUSED
#endif
#define SWEEP_NO_CHANGE 0
#define SWEEP_CHANGE_PROGRESS 1
#if ptr_alignment == 2
# define record_full_marked_mask 0x55
# define record_high_marked_bit 0x40
# define mask_bits_to_list_bits_mask(m) ((m) | ((m) << 1))
#elif ptr_alignment == 1
# define record_full_marked_mask 0xFF
# define record_high_marked_bit 0x80
# define mask_bits_to_list_bits_mask(m) (m)
#endif
#define segment_sufficiently_compact_bytes ((bytes_per_segment * 3) / 4)
#define chunk_sufficiently_compact(nsegs) ((nsegs) >> 2)
/* Values for a guardian entry's `pending` field when it's added to a
seginfo's pending list: */
enum {
GUARDIAN_PENDING_HOLD,
GUARDIAN_PENDING_FINAL
};
#ifdef ENABLE_OBJECT_COUNTS
uptr list_length(ptr ls) {
uptr i = 0;
while (ls != Snil) { ls = Scdr(ls); i += 1; }
return i;
}
#endif
#define init_mask(tgc, dest, tg, init) do { \
octet *MASK; \
find_gc_room_voidp(tgc, space_data, tg, ptr_align(segment_bitmap_bytes), MASK); \
memset(MASK, init, segment_bitmap_bytes); \
STORE_FENCE(); \
dest = MASK; \
tgc->bitmask_overhead[tg] += ptr_align(segment_bitmap_bytes); \
} while (0)
#define marked(si, p) (si->marked_mask && (si->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)))
#ifdef NO_NEWSPACE_MARKS
# define new_marked(si, p) 0
# define CAN_MARK_AND(x) 0
#else
# define new_marked(si, p) marked(si, p)
# define CAN_MARK_AND(x) x
#endif
static void init_fully_marked_mask(thread_gc *tgc, IGEN g) {
GC_MUTEX_ACQUIRE();
if (!fully_marked_mask[g]) {
init_mask(tgc, fully_marked_mask[g], g, 0xFF);
}
GC_MUTEX_RELEASE();
}
#ifdef PRESERVE_FLONUM_EQ
static void flonum_set_forwarded(thread_gc *tgc, ptr p, seginfo *si) {
if (!si->forwarded_flonums)
init_mask(tgc, si->forwarded_flonums, 0, 0);
si->forwarded_flonums[segment_bitmap_byte(p)] |= segment_bitmap_bit(p);
}
static int flonum_is_forwarded_p(ptr p, seginfo *si) {
if (!si->forwarded_flonums)
return 0;
else
return si->forwarded_flonums[segment_bitmap_byte(p)] & segment_bitmap_bit(p);
}
# define FLONUM_FWDADDRESS(p) *(ptr*)TO_VOIDP(UNTYPE(p, type_flonum))
# define FORWARDEDP(p, si) ((TYPEBITS(p) == type_flonum) ? flonum_is_forwarded_p(p, si) : (FWDMARKER(p) == forward_marker))
# define GET_FWDADDRESS(p) ((TYPEBITS(p) == type_flonum) ? FLONUM_FWDADDRESS(p) : FWDADDRESS(p))
#else
# define FORWARDEDP(p, si) (FWDMARKER(p) == forward_marker && TYPEBITS(p) != type_flonum)
# define GET_FWDADDRESS(p) FWDADDRESS(p)
#endif
#ifdef ENABLE_OBJECT_COUNTS
# define ELSE_MEASURE_NONOLDSPACE(p) \
else if (measure_all_enabled) \
push_measure(tgc, p);
#else
# define ELSE_MEASURE_NONOLDSPACE(p) /* empty */
#endif
/* use relocate_pure for newspace fields that can't point to younger
objects or where there's no need to track generations */
#define relocate_pure(ppp) do { \
ptr* PPP = ppp; ptr PP = *PPP; \
relocate_pure_help(PPP, PP); \
} while (0)
#define relocate_pure_help(ppp, pp) do { \
seginfo *SI; \
if (!FIXMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \
if (SI->old_space) \
relocate_pure_help_help(ppp, pp, SI); \
ELSE_MEASURE_NONOLDSPACE(pp) \
} \
} while (0)
#define relocate_pure_help_help(ppp, pp, si) do { \
if (SEGMENT_IS_LOCAL(si, pp)) { \
if (FORWARDEDP(pp, si)) \
*ppp = GET_FWDADDRESS(pp); \
else if (!new_marked(si, pp)) \
mark_or_copy_pure(ppp, pp, si); \
} else \
RECORD_REMOTE(si); \
} while (0)
#define relocate_code(pp, si) do { \
if (si->old_space) { \
if (SEGMENT_IS_LOCAL(si, pp)) { \
if (FWDMARKER(pp) == forward_marker) \
pp = GET_FWDADDRESS(pp); \
else if (!new_marked(si, pp)) \
mark_or_copy_pure(&pp, pp, si); \
} else \
RECORD_REMOTE(si); \
} ELSE_MEASURE_NONOLDSPACE(pp) \
} while (0)
#define mark_or_copy_pure(dest, p, si) do { \
if (CAN_MARK_AND(si->use_marks)) \
(void)mark_object(tgc, p, si); \
else \
(void)copy(tgc, p, si, dest); \
} while (0)
#define relocate_pure_now(ppp) do { \
FLUSH_REMOTE_BLOCK \
relocate_pure(ppp); \
ASSERT_EMPTY_FLUSH_REMOTE(); \
} while (0)
#if defined(ENABLE_PARALLEL) && defined(ENABLE_OBJECT_COUNTS)
static void do_relocate_pure_in_owner(thread_gc *tgc, ptr *ppp) {
seginfo *si;
ptr pp = *ppp;
if (!FIXMEDIATE(pp)
&& (si = MaybeSegInfo(ptr_get_segment(pp))) != NULL
&& si->old_space) {
BLOCK_SET_THREAD(si->creator);
relocate_pure_now(ppp);
} else {
relocate_pure_now(ppp);
}
}
# define relocate_pure_in_owner(ppp) do_relocate_pure_in_owner(tgc, ppp)
#else
# define relocate_pure_in_owner(pp) relocate_pure(pp)
#endif
/* use relocate_impure for newspace fields that can point to younger objects */
#ifdef NO_DIRTY_NEWSPACE_POINTERS
# define relocate_impure_help(PPP, PP, FROM_G) do {(void)FROM_G; relocate_pure_help(PPP, PP);} while (0)
# define relocate_impure(PPP, FROM_G) do {(void)FROM_G; relocate_pure(PPP);} while (0)
# define NO_DIRTY_NEWSPACE_UNUSED UNUSED
#else /* !NO_DIRTY_NEWSPACE_POINTERS */
#define relocate_impure(ppp, from_g) do { \
ptr* PPP = ppp; ptr PP = *PPP; IGEN FROM_G = from_g; \
relocate_impure_help(PPP, PP, FROM_G); \
} while (0)
#define relocate_impure_help(ppp, pp, from_g) do { \
seginfo *SI; \
if (!FIXMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \
if (SI->old_space) \
relocate_impure_help_help(ppp, pp, from_g, SI); \
ELSE_MEASURE_NONOLDSPACE(pp) \
} \
} while (0)
#define relocate_impure_help_help(ppp, pp, from_g, si) do { \
IGEN __to_g; \
if (SEGMENT_IS_LOCAL(si, pp)) { \
if (FORWARDEDP(pp, si)) { \
*ppp = GET_FWDADDRESS(pp); \
__to_g = TARGET_GENERATION(si); \
} else if (!new_marked(si, pp)) { \
mark_or_copy_impure(__to_g, ppp, pp, from_g, si); \
} else { \
__to_g = TARGET_GENERATION(si); \
} \
if (__to_g < from_g) S_record_new_dirty_card(tgc, ppp, __to_g); \
} else \
RECORD_REMOTE(si); \
} while (0)
#define mark_or_copy_impure(to_g, dest, p, from_g, si) do { \
if (CAN_MARK_AND(si->use_marks)) \
to_g = mark_object(tgc, p, si); \
else \
to_g = copy(tgc, p, si, dest); \
} while (0)
# define NO_DIRTY_NEWSPACE_UNUSED /* empty */
#endif /* !NO_DIRTY_NEWSPACE_POINTERS */
#define relocate_dirty(PPP, YOUNGEST) do { \
seginfo *_si; ptr *_ppp = PPP, _pp = *_ppp; IGEN _pg; \
if (!FIXMEDIATE(_pp) && (_si = MaybeSegInfo(ptr_get_segment(_pp))) != NULL) { \
if (!_si->old_space) { \
_pg = _si->generation; \
} else { \
if (SEGMENT_IS_LOCAL(_si, _pp)) { \
if (FORWARDEDP(_pp, _si)) { \
*_ppp = GET_FWDADDRESS(_pp); \
_pg = TARGET_GENERATION(_si); \
} else if (new_marked(_si, _pp)) { \
_pg = TARGET_GENERATION(_si); \
} else if (CAN_MARK_AND(_si->use_marks)) { \
_pg = mark_object(tgc, _pp, _si); \
} else { \
_pg = copy(tgc, _pp, _si, _ppp); \
} \
} else { \
RECORD_REMOTE(_si); \
_pg = 0xff; \
} \
} \
if (_pg < YOUNGEST) YOUNGEST = _pg; \
} \
} while (0)
#define relocate_reference(ppp, from_g) do { \
ptr* rPPP = ppp; ptr rPP = *rPPP; \
if (!FOREIGN_REFERENCEP(rPP)) { \
*rPPP = S_reference_to_object(rPP); \
relocate_impure(rPPP, from_g); \
*rPPP = S_object_to_reference(*rPPP); \
} \
} while (0)
#define relocate_reference_dirty(ppp, YOUNGEST) do { \
ptr* rPPP = ppp; \
if (!FOREIGN_REFERENCEP(*rPPP)) { \
*rPPP = S_reference_to_object(*rPPP); \
relocate_dirty(rPPP, YOUNGEST); \
*rPPP = S_object_to_reference(*rPPP); \
} \
} while (0)
#ifdef ENABLE_OBJECT_COUNTS
# define is_counting_root(si, p) (si->counting_mask && (si->counting_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)))
#endif
# define relocate_indirect(p) do { \
ptr _P = p; \
relocate_pure(&_P); \
} while (0)
# define relocate_reference_indirect(p) do { \
ptr _P = p; \
if (!FOREIGN_REFERENCEP(_P)) { \
_P = S_reference_to_object(_P); \
relocate_pure(&_P); \
} \
} while (0)
FORCEINLINE void check_triggers(thread_gc *tgc, seginfo *si) {
/* Registering ephemerons and guardians to recheck at the
granularity of a segment means that the worst-case complexity of
GC is quadratic in the number of objects that fit into a segment
(but that only happens if the objects are ephemeron keys that are
reachable just through a chain via the value field of the same
ephemerons). */
if (si->has_triggers) {
if (si->trigger_ephemerons) {
add_trigger_ephemerons_to_pending(tgc, si->trigger_ephemerons);
si->trigger_ephemerons = 0;
}
if (si->trigger_guardians) {
add_trigger_guardians_to_recheck(si->trigger_guardians);
si->trigger_guardians = 0;
}
si->has_triggers = 0;
}
}
#if defined(ENABLE_OBJECT_COUNTS)
# include "gc-oce.inc"
#elif defined(ENABLE_PARALLEL)
# include "gc-par.inc"
#else
# include "gc-ocd.inc"
#endif
/* sweep_in_old() is like sweep(), but the goal is to sweep the
object's content without copying the object itself, so we're sweep
an object while it's still in old space. If an object refers back
to itself, naively sweeping might copy the object while we're
trying to sweep the old copy, which interacts badly with the words
set to a forwarding marker and pointer. To handle that problem,
sweep_in_old() is allowed to copy the object, since the object
is going to get copied anyway. */
static void sweep_in_old(thread_gc *tgc, ptr p) {
/* Detect all the cases when we need to give up on in-place
sweeping: */
if (object_directly_refers_to_self(p)) {
relocate_pure_now(&p);
return;
}
/* We've determined that `p` won't refer immediately back to itself,
so it's ok to sweep(), but only update `p` for pure relocations;
impure oness must that will happen later, after `p` is
potentially copied, so the card updates will be right. */
sweep_object_in_old(tgc, p);
}
static void sweep_dirty_object_if_space_new(thread_gc *tgc, ptr p) {
seginfo *si = SegInfo(ptr_get_segment(p));
if (si->space == space_new)
(void)sweep_dirty_object(tgc, p, 0);
}
static ptr copy_stack(thread_gc *tgc, ptr old, iptr *length, iptr clength) {
iptr n, m; ptr new; IGEN newg;
seginfo *si = SegInfo(ptr_get_segment(old));
/* Don't copy non-oldspace stacks, since we may be sweeping a
continuation that is older than target_generation. Doing so would
be a waste of work anyway. */
if (!si->old_space) return old;
newg = TARGET_GENERATION(si);
n = *length;
#ifndef NO_NEWSPACE_MARKS
if (si->use_marks) {
if (!marked(si, old)) {
mark_untyped_data_object(tgc, old, n, si);
#ifdef ENABLE_OBJECT_COUNTS
S_G.countof[newg][countof_stack] += 1;
S_G.bytesof[newg][countof_stack] += n;
#endif
}
return old;
}
#endif
/* reduce headroom created for excessively large frames (typically resulting from apply with long lists) */
if (n != clength && n > default_stack_size && n > (m = clength + one_shot_headroom)) {
*length = n = m;
}
n = ptr_align(n);
#ifdef ENABLE_OBJECT_COUNTS
S_G.countof[newg][countof_stack] += 1;
S_G.bytesof[newg][countof_stack] += n;
#endif /* ENABLE_OBJECT_COUNTS */
if (n == 0) {
return (ptr)0;
} else {
find_gc_room(tgc, space_data, newg, type_untyped, n, new);
n = ptr_align(clength);
/* warning: stack may have been left non-double-aligned by split_and_resize */
memcpy_aligned(TO_VOIDP(new), TO_VOIDP(old), n);
/* also returning possibly updated value in *length */
return new;
}
}
#define NONSTATICINHEAP(si, x) (!FIXMEDIATE(x) && (si = MaybeSegInfo(ptr_get_segment(x))) != NULL && si->generation != static_generation)
#define ALWAYSTRUE(si, x) (si = SegInfo(ptr_get_segment(x)), 1)
#define partition_guardians(LS, FILTER) do { \
ptr ls; seginfo *si; \
for (ls = LS; ls != Snil; ls = next) { \
obj = GUARDIANOBJ(ls); \
next = GUARDIANNEXT(ls); \
if (FILTER(si, obj)) { \
if (!si->old_space || new_marked(si, obj)) { \
INITGUARDIANNEXT(ls) = pend_hold_ls; \
pend_hold_ls = ls; \
} else if (FORWARDEDP(obj, si)) { \
INITGUARDIANOBJ(ls) = GET_FWDADDRESS(obj); \
INITGUARDIANNEXT(ls) = pend_hold_ls; \
pend_hold_ls = ls; \
} else { \
seginfo *t_si; \
tconc = GUARDIANTCONC(ls); \
t_si = SegInfo(ptr_get_segment(tconc)); \
if (!t_si->old_space || new_marked(t_si, tconc)) { \
INITGUARDIANNEXT(ls) = final_ls; \
final_ls = ls; \
} else if (FWDMARKER(tconc) == forward_marker) { \
INITGUARDIANTCONC(ls) = FWDADDRESS(tconc); \
INITGUARDIANNEXT(ls) = final_ls; \
final_ls = ls; \
} else { \
INITGUARDIANNEXT(ls) = pend_final_ls; \
pend_final_ls = ls; \
} \
} \
} \
} \
} while (0)
typedef struct count_root_t {
ptr p;
IBOOL weak;
} count_root_t;
ptr GCENTRY(ptr tc, ptr count_roots_ls) {
thread_gc *tgc = THREAD_GC(tc);
IGEN g; ISPC s;
seginfo *oldspacesegments, *oldweakspacesegments, *si, *nextsi;
ptr ls;
bucket_pointer_list *buckets_to_rebuild;
uptr pre_finalization_size, pre_phantom_bytes;
#ifdef ENABLE_OBJECT_COUNTS
ptr count_roots_counts = Snil;
iptr count_roots_len;
count_root_t *count_roots;
#endif
GET_REAL_TIME(astart);
S_thread_start_code_write(tc, MAX_TG, 0, NULL, 0);
/* flush instruction cache: effectively clear_code_mod but safer */
for (ls = S_threads; ls != Snil; ls = Scdr(ls)) {
ptr t_tc = (ptr)THREADTC(Scar(ls));
S_flush_instruction_cache(t_tc);
}
tlcs_to_rehash = Snil;
conts_to_promote = Snil;
#ifndef NO_DIRTY_NEWSPACE_POINTERS
S_G.new_dirty_cards = NULL;
#endif /* !NO_DIRTY_NEWSPACE_POINTERS */
S_G.must_mark_gen0 = 0;
setup_sweepers(tgc); /* maps threads to sweepers */
for (ls = S_threads; ls != Snil; ls = Scdr(ls)) {
ptr t_tc = (ptr)THREADTC(Scar(ls));
thread_gc *t_tgc = THREAD_GC(t_tc);
S_scan_dirty(TO_VOIDP(EAP(t_tc)), TO_VOIDP(REAL_EAP(t_tc)));
EAP(t_tc) = REAL_EAP(t_tc) = AP(t_tc) = (ptr)0;
/* clear thread-local allocation: */
for (g = 0; g <= MAX_CG; g++) {
for (s = 0; s <= max_real_space; s++) {
if (t_tgc->base_loc[g][s]) {
/* We close off, instead of just setting BASELOC to 0,
in case the page ends up getting marked, in which
case a terminator mark needed. */
S_close_off_thread_local_segment(t_tc, s, g);
}
}
}
if (!HAS_SWEEPER_WRT(t_tc, tc)) {
/* close off any current allocation in MAX_TG, and ensure that
end-of-segment markers are otherwise set (in case that's
needed for dirty-byte sweeping) */
for (s = 0; s <= max_real_space; s++) {
if (t_tgc->base_loc[MAX_TG][s])
S_close_off_thread_local_segment(t_tc, s, MAX_TG);
for (g = MAX_TG + 1; g <= static_generation; g++) {
ptr old = t_tgc->next_loc[g][s];
if (old != (ptr)0)
*(ptr*)TO_VOIDP(old) = forward_marker;
}
}
} else {
/* set up context for sweeping --- effectively remembering the current
allocation state so anything new is recognized as needing sweeping */
t_tgc->sweep_stack_start = t_tgc->sweep_stack = t_tgc->sweep_stack_limit = (ptr)0;
t_tgc->send_remote_sweep_stack_start = t_tgc->send_remote_sweep_stack = t_tgc->send_remote_sweep_stack_limit = (ptr)0;
t_tgc->receive_remote_sweep_stack_start = t_tgc->receive_remote_sweep_stack = t_tgc->receive_remote_sweep_stack_limit = (ptr)0;
t_tgc->bitmask_overhead[0] = 0;
for (g = MIN_TG; g <= MAX_TG; g++)
t_tgc->bitmask_overhead[g] = 0;
for (s = 0; s <= max_real_space; s++) {
/* need to save `next_loc` to ensure that dirty sweeping
doesn't overshoot into newly allocated objects */
t_tgc->orig_next_loc[s] = t_tgc->next_loc[MAX_TG][s];
t_tgc->sweep_loc[MAX_TG][s] = t_tgc->next_loc[MAX_TG][s];
for (g = MIN_TG; g <= MAX_TG; g++)
t_tgc->sweep_next[g][s] = NULL;
}
}
}
/* perform after ScanDirty */
if (S_checkheap) S_check_heap(0, MAX_CG);
#ifdef DEBUG
(void)printf("max_cg = %x; go? ", MAX_CG); (void)fflush(stdout); (void)getc(stdin);
#endif
resweep_weak_segments = NULL;
for (g = MIN_TG; g <= MAX_TG; g++) fully_marked_mask[g] = NULL;
/* set up generations to be copied */
for (g = 0; g <= MAX_CG; g++) {
S_G.bytes_of_generation[g] = 0;
for (s = 0; s <= max_real_space; s++) {
S_G.bytes_of_space[g][s] = 0;
S_G.bitmask_overhead[g] = 0;
}
}
/* reset phantom size in generations to be copied, even if counting is not otherwise enabled */
pre_phantom_bytes = 0;
for (g = 0; g <= MAX_CG; g++) {
pre_phantom_bytes += S_G.bytesof[g][countof_phantom];
S_G.bytesof[g][countof_phantom] = 0;
}
for (g = MIN_TG; g <= MAX_TG; g++) {
pre_phantom_bytes += S_G.bytesof[g][countof_phantom];
}
/* mark segments from which objects are to be copied or marked */
oldspacesegments = oldweakspacesegments = (seginfo *)NULL;