/
assem.lisp
2059 lines (1944 loc) · 94.7 KB
/
assem.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;;; scheduling assembler
;;;; This software is part of the SBCL system. See the README file for
;;;; more information.
;;;;
;;;; This software is derived from the CMU CL system, which was
;;;; written at Carnegie Mellon University and released into the
;;;; public domain. The software is in the public domain and is
;;;; provided with absolutely no warranty. See the COPYING and CREDITS
;;;; files for more information.
(in-package "SB-ASSEM")
;;;; assembly control parameters
;;; Only the scheduling assembler cares about this constant,
;;; but it has to be defined. If ASSEM-SCHEDULER-P is true and it hasn't
;;; been assigned, we'll get an error. It's OK if ASSEM-SCHEDULER-P is NIL
;;; and the constant _has_ already been defined. (This is weird but harmless)
(defconstant +assem-max-locations+
(if (boundp '+assem-max-locations+)
(symbol-value '+assem-max-locations+)
0))
;;;; Constants.
;;; ASSEMBLY-UNIT-BITS -- the number of bits in the minimum assembly
;;; unit, (also referred to as a ``byte''). Hopefully, different
;;; instruction sets won't require changing this.
(defconstant assembly-unit-bits 8)
(defconstant assembly-unit-mask (1- (ash 1 assembly-unit-bits)))
(deftype assembly-unit ()
`(unsigned-byte ,assembly-unit-bits))
;;; Some functions which accept assembly units can meaningfully accept
;;; signed values with the same number of bits and silently munge them
;;; into appropriate unsigned values. (This is handy behavior e.g.
;;; when assembling branch instructions on the X86.)
(deftype possibly-signed-assembly-unit ()
`(or assembly-unit
(signed-byte ,assembly-unit-bits)))
;;; the maximum alignment we can guarantee given the object format. If
;;; the loader only loads objects 8-byte aligned, we can't do any
;;; better than that ourselves.
(defconstant max-alignment 5)
(deftype alignment ()
`(integer 0 ,max-alignment))
;;;; the SEGMENT structure
;;; This structure holds the state of the assembler.
(defstruct (segment (:copier nil))
;; This is a vector where instructions are written.
;; It used to be an adjustable array, but we now do the array size
;; management manually for performance reasons (as of 2006-05-13 hairy
;; array operations are rather slow compared to simple ones).
(buffer (make-array 100 :element-type 'assembly-unit)
:type (simple-array assembly-unit 1))
(encoder-state)
;; whether or not to run the scheduler. Note: if the instruction
;; definitions were not compiled with the scheduler turned on, this
;; has no effect.
(run-scheduler nil)
;; what position does this correspond to? Initially, positions and
;; indexes are the same, but after we start collapsing choosers,
;; positions can change while indexes stay the same.
(current-posn 0 :type index)
(%current-index 0 :type index)
;; a list of all the annotations that have been output to this segment
(annotations nil :type list)
;; the subset of annotations which are of type ALIGNMENT-NOTE
(alignment-annotations)
;; a pointer to the last cons cell in the annotations list. This is
;; so we can quickly add things to the end of the annotations list.
(last-annotation nil :type list)
;; the number of bits of alignment at the last time we synchronized
(alignment max-alignment :type alignment)
;; number of bytes to subtract from all finalized positions such that
;; position 0 corresponds to CODE-INSTRUCTIONS of the code component
;; being assembled.
(header-skew 0 :type (member 0 #.sb-vm:n-word-bytes))
;; the position the last time we synchronized
(sync-posn 0 :type index)
;; a label at position 0
(origin (gen-label) :read-only t)
;; The posn and index everything ends at. This is not maintained
;; while the data is being generated, but is filled in after.
;; Basically, we copy CURRENT-POSN and CURRENT-INDEX so that we can
;; trash them while processing choosers and back-patches.
(final-posn 0 :type index)
(final-index 0 :type index)
;; *** State used by the scheduler during instruction queueing.
;;
;; a list of postits. These are accumulated between instructions.
(postits nil :type list)
;; ``Number'' for last instruction queued. Used only to supply insts
;; with unique sset-element-number's.
(inst-number 0 :type index)
;; SIMPLE-VECTORs mapping locations to the instruction that reads them and
;; instructions that write them
(readers (make-array +assem-max-locations+ :initial-element nil)
:type simple-vector)
(writers (make-array +assem-max-locations+ :initial-element nil)
:type simple-vector)
;; The number of additional cycles before the next control transfer,
;; or NIL if a control transfer hasn't been queued. When a delayed
;; branch is queued, this slot is set to the delay count.
(branch-countdown nil :type (or null (and fixnum unsigned-byte)))
;; *** These two slots are used both by the queuing noise and the
;; scheduling noise.
;;
;; All the instructions that are pending and don't have any
;; unresolved dependents. We don't list branches here even if they
;; would otherwise qualify. They are listed above.
(emittable-insts-sset (make-sset) :type sset)
;; list of queued branches. We handle these specially, because they
;; have to be emitted at a specific place (e.g. one slot before the
;; end of the block).
(queued-branches nil :type list)
;; *** state used by the scheduler during instruction scheduling
;;
;; the instructions who would have had a read dependent removed if
;; it were not for a delay slot. This is a list of lists. Each
;; element in the top level list corresponds to yet another cycle of
;; delay. Each element in the second level lists is a dotted pair,
;; holding the dependency instruction and the dependent to remove.
(delayed nil :type list)
;; The emittable insts again, except this time as a list sorted by depth.
(emittable-insts-queue nil :type list)
(fixup-notes)
;; Whether or not to collect dynamic statistics. This is just the same as
;; *COLLECT-DYNAMIC-STATISTICS* but is faster to reference.
#+sb-dyncount
(collect-dynamic-statistics nil))
(declaim (freeze-type segment))
(defprinter (segment :identity t))
(declaim (inline segment-current-index))
(defun segment-current-index (segment)
(segment-%current-index segment))
(defun (setf segment-current-index) (new-value segment)
(declare (type index new-value)
(type segment segment))
;; FIXME: It would be lovely to enforce this, but first FILL-IN will
;; need to be convinced to stop rolling SEGMENT-CURRENT-INDEX
;; backwards.
;;
;; Enforce an observed regularity which makes it easier to think
;; about what's going on in the (legacy) code: The segment never
;; shrinks. -- WHN the reverse engineer
#+nil (aver (>= new-value (segment-current-index segment)))
(let* ((buffer (segment-buffer segment))
(new-buffer-size (length buffer)))
(declare (type (simple-array (unsigned-byte 8)) buffer)
(type index new-buffer-size))
;; Make sure the array is big enough.
(when (<= new-buffer-size new-value)
(do ()
((> new-buffer-size new-value))
;; When we have to increase the size of the array, we want to
;; roughly double the vector length: that way growing the array
;; to size N conses only O(N) bytes in total.
(setf new-buffer-size (* 2 new-buffer-size)))
(let ((new-buffer (make-array new-buffer-size
:element-type '(unsigned-byte 8))))
(replace new-buffer buffer)
(setf (segment-buffer segment) new-buffer)))
;; Now that the array has the intended next free byte, we can point to it.
(setf (segment-%current-index segment) new-value)))
;;; Various functions (like BACK-PATCH-FUN or CHOOSER-WORST-CASE-FUN)
;;; aren't cleanly parameterized, but instead use
;;; SEGMENT-CURRENT-INDEX and/or SEGMENT-CURRENT-POSN as global
;;; variables. So code which calls such functions needs to modify
;;; SEGMENT-CURRENT-INDEX and SEGMENT-CURRENT-POSN. This is left over
;;; from the old new-assem.lisp C-style code, and so all the
;;; destruction happens to be done after other uses of these slots are
;;; done and things basically work. However, (1) it's fundamentally
;;; nasty, and (2) at least one thing doesn't work right: OpenMCL
;;; properly points out that SUBSEQ's indices aren't supposed to
;;; exceed its logical LENGTH, i.e. its FILL-POINTER, i.e.
;;; SEGMENT-CURRENT-INDEX.
;;;
;;; As a quick fix involving minimal modification of legacy code,
;;; we do such sets of SEGMENT-CURRENT-INDEX and SEGMENT-CURRENT-POSN
;;; using this macro, which restores 'em afterwards.
;;;
;;; FIXME: It'd probably be better to cleanly parameterize things like
;;; BACK-PATCH-FUN so we can avoid this nastiness altogether.
(defmacro with-modified-segment-index-and-posn ((segment index posn)
&body body)
(with-unique-names (n-segment old-index old-posn)
`(let* ((,n-segment ,segment)
(,old-index (segment-current-index ,n-segment))
(,old-posn (segment-current-posn ,n-segment)))
(unwind-protect
(progn
(setf (segment-current-index ,n-segment) ,index
(segment-current-posn ,n-segment) ,posn)
,@body)
(setf (segment-current-index ,n-segment) ,old-index
(segment-current-posn ,n-segment) ,old-posn)))))
;;;; structures/types used by the scheduler
(!def-boolean-attribute instruction
;; This attribute is set if the scheduler can freely flush this
;; instruction if it thinks it is not needed. Examples are NOP and
;; instructions that have no side effect not described by the
;; writes.
flushable
;; This attribute is set when an instruction can cause a control
;; transfer. For test instructions, the delay is used to determine
;; how many instructions follow the branch.
branch
;; This attribute indicates that this ``instruction'' can be
;; variable length, and therefore had better never be used in a
;; branch delay slot.
variable-length)
(defstruct (instruction
(:include sset-element)
(:conc-name inst-)
(:constructor make-instruction (number emitter attributes delay))
(:copier nil))
;; The function to envoke to actually emit this instruction. Gets called
;; with the segment as its one argument.
(emitter (missing-arg) :type (or null function))
;; The attributes of this instruction.
(attributes (instruction-attributes) :type sb-c:attributes)
;; Number of instructions or cycles of delay before additional
;; instructions can read our writes.
(delay 0 :type (and fixnum unsigned-byte))
;; the maximum number of instructions in the longest dependency
;; chain from this instruction to one of the independent
;; instructions. This is used as a heuristic at to which
;; instructions should be scheduled first.
(depth nil :type (or null (and fixnum unsigned-byte)))
;; Note: When trying remember which of the next four is which, note
;; that the ``read'' or ``write'' always refers to the dependent
;; (second) instruction.
;;
;; instructions whose writes this instruction tries to read
(read-dependencies (make-sset) :type sset :read-only t)
;; instructions whose writes or reads are overwritten by this instruction
(write-dependencies (make-sset) :type sset :read-only t)
;; instructions which write what we read or write
(write-dependents (make-sset) :type sset :read-only t)
;; instructions which read what we write
(read-dependents (make-sset) :type sset :read-only t))
(declaim (freeze-type instruction))
#+sb-show-assem (defvar *inst-ids* (make-hash-table :test 'eq))
#+sb-show-assem (defvar *next-inst-id* 0)
(defmethod print-object ((inst instruction) stream)
(print-unreadable-object (inst stream :type t :identity t)
#+sb-show-assem
(princ (ensure-gethash inst *inst-ids* (incf *next-inst-id*))
stream)
(format stream
#+sb-show-assem " emitter=~S" #-sb-show-assem "emitter=~S"
(let ((emitter (inst-emitter inst)))
(if emitter
(multiple-value-bind (lambda lexenv-p name)
(function-lambda-expression emitter)
(declare (ignore lambda lexenv-p))
name)
'<flushed>)))
(when (inst-depth inst)
(format stream ", depth=~W" (inst-depth inst)))))
#+sb-show-assem
(defun reset-inst-ids ()
(clrhash *inst-ids*)
(setf *next-inst-id* 0))
;;;
;;; Instructions are streamed into a section before (optionally combining
;;; sections and) assembling into a SEGMENT.
(defstruct (stmt (:constructor make-stmt (labels vop mnemonic operands)))
(labels)
(vop)
(mnemonic)
(operands)
(plist nil) ; put anything you want here for later passes such as instcombine
(prev nil)
(next nil))
(declaim (freeze-type stmt))
(defmethod print-object ((stmt stmt) stream)
(print-unreadable-object (stmt stream :type t :identity t)
(awhen (stmt-labels stmt)
(princ it stream)
(write-char #\space stream))
(princ (stmt-mnemonic stmt) stream)))
;;; A section is just a doubly-linked list of statements with a head and
;;; tail pointer to allow insertion anywhere,
;;; and a dummy head node to avoid special-casing an empty section.
(defun make-section ()
(let ((first (make-stmt nil nil :ignore nil)))
(cons first first)))
(defun section-start (section) (car section))
(defmacro section-tail (section) `(cdr ,section))
(defstruct asmstream
(data-section (make-section) :read-only t)
(code-section (make-section) :read-only t)
(elsewhere-section (make-section) :read-only t)
(data-origin-label (gen-label "data start") :read-only t)
(text-origin-label (gen-label "text start") :read-only t)
(elsewhere-label (gen-label "elsewhere start") :read-only t)
(inter-function-padding :normal :type (member :normal :nop))
;; for collecting unique "unboxed constants" prior to placing them
;; into the data section
(constant-table (make-hash-table :test #'equal) :read-only t)
(constant-vector (make-array 16 :adjustable t :fill-pointer 0) :read-only t)
;; for deterministic allocation profiler (or possibly other tooling)
;; that wants to monkey patch the instructions at runtime.
(alloc-points)
;; for shrinking the size of the code fixups, we can choose to emit at most one call
;; from a dynamic space code component to a given assembly routine. The call goes
;; through an extra indirection in the component.
;; This table is stored as an alist of (NAME . LABEL).
(indirection-table)
;; tracking where we last wrote an instruction so that SB-C::TRACE-INSTRUCTION
;; can print "in the {x} section" whenever it changes.
(tracing-state (list nil nil) :read-only t)) ; segment and vop
(declaim (freeze-type asmstream))
(defvar *asmstream*)
(declaim (type asmstream *asmstream*))
(defun get-allocation-points (asmstream)
;; Convert the label positions to a packed integer
;; Utilize PACK-CODE-FIXUP-LOCS to perform compression.
(awhen (mapcar 'label-posn (asmstream-alloc-points asmstream))
(sb-c:pack-code-fixup-locs it nil nil)))
;;; Insert STMT after PREDECESSOR.
(defun insert-stmt (stmt predecessor)
(let ((successor (stmt-next predecessor)))
(setf (stmt-next predecessor) stmt
(stmt-prev stmt) predecessor
(stmt-next stmt) successor)
(when successor
(stmt-prev successor) stmt))
stmt)
(defun delete-stmt (stmt)
(let ((prev (stmt-prev stmt))
(next (stmt-next stmt)))
(aver prev)
;; KLUDGE: we're not passing around the section, but instcombine only
;; runs on the code section, so check whether we're deleting the last
;; statement in that section, and fix the last pointer if so.
(let ((section (asmstream-code-section *asmstream*)))
(when (eq (section-tail section) stmt)
(setf (section-tail section) prev)))
(setf (stmt-next prev) next)
(when next
(setf (stmt-prev next) prev))))
(defun add-stmt-labels (statement more-labels)
(let ((list (nconc (ensure-list (stmt-labels statement))
(ensure-list more-labels))))
(setf (stmt-labels statement)
(if (singleton-p list) (car list) list))))
;;; This is used only to keep track of which vops emit which insts.
(defvar *current-vop*)
;;; Return the final statement emitted.
(defun emit (section &rest things)
;; each element of THINGS can be:
;; - a label
;; - a list (mnemonic . operands) for a machine instruction or assembler directive
;; - a function to emit a postit
(let ((last (section-tail section))
(vop (if (boundp '*current-vop*) *current-vop*)))
(dolist (thing things (setf (section-tail section) last))
(if (label-p thing) ; Accumulate multiple labels until the next instruction
(if (stmt-mnemonic last)
(setq last (insert-stmt (make-stmt thing vop nil nil) last))
(let ((old (stmt-labels last)) (new (list thing)))
(setf (stmt-labels last)
(if (label-p old) (cons old new) (nconc old new)))))
(multiple-value-bind (mnemonic operands)
(if (consp thing) (values (car thing) (cdr thing)) thing)
(unless (member mnemonic '(.align .byte .skip))
;; This automatically gets the .QWORD pseudo-op which we use on x86-64
;; to create jump tables, but it's sort of unfortunate that the mnemonic
;; is specific to that backend. It should probably be .LISPWORD instead.
;; Anyway, the good news is that jump tables flag all the labels as used.
(dolist (operand operands)
(if (label-p operand)
(setf (label-usedp operand) t)
;; backend decides what labels are used
(%mark-used-labels operand))))
(if (stmt-mnemonic last)
(setq last (insert-stmt (make-stmt nil vop mnemonic operands) last))
(setf (stmt-vop last) (or (stmt-vop last) vop)
(stmt-mnemonic last) mnemonic
(stmt-operands last) operands)))))))
#-(or x86-64 x86)
(defun %mark-used-labels (operand) ; default implementation
(declare (ignore operand)))
;;; This holds either the current section (if writing symbolic assembly)
;;; or current segment (if machine-encoding). Use ASSEMBLE to change it.
(defvar *current-destination*)
;;; This formerly had some absolutely bizarre behavior regarding a manually-specified
;;; list of labels. It was so confusing that I couldn't figure out either what it was
;;; designed to do, or what it did do, because they certainly weren't the same thing.
;;; My best guess is that if you needed labels which were not directly at the "spine"
;;; of the ASSEMBLE form, but nested within, you could force it to call GEN-LABEL
;;; for you, which is no different from calling GEN-LABEL _outside_ of the ASSEMBLE.
;;; (Which most people do anyway if they need to)
;;; But in anything more than a straightfoward usage, the expansion could be wrong.
;;; For example this fragment calls EMIT-LABEL on the same label instance twice:
#|
(sb-cltl2:macroexpand-all
'(assemble ()
B
(assemble (nil nil :labels (foo))
(inst pop rax-tn)
B
(assemble ()
B
(wat)))))
=>
(LET* ((B (GEN-LABEL)))
(SYMBOL-MACROLET ()
(EMIT-LABEL B)
(LET* ((B (GEN-LABEL)) (FOO (GEN-LABEL)))
(SYMBOL-MACROLET ((SB-ASSEM::..INHERITED-LABELS.. (B)))
(INST* 'POP RAX-TN)
(EMIT-LABEL B)
(LET* ()
(SYMBOL-MACROLET ((SB-ASSEM::..INHERITED-LABELS.. NIL))
(EMIT-LABEL B)
(WAT)))))))
|#
(defmacro assemble ((&optional dest vop) &body body &environment env)
"Execute BODY (as a progn) with DEST as the current section or segment."
(flet ((label-name-p (thing) (typep thing '(and symbol (not null)))))
(let ((inherited (multiple-value-bind (expansion expanded)
(#+sb-xc-host cl:macroexpand-1
#-sb-xc-host %macroexpand-1 '..inherited-labels.. env)
(if expanded expansion)))
(new-labels (sort (copy-list (remove-if-not #'label-name-p body)) #'string<)))
;; Compare for dups using STRING=. Two reasons to use that rather than EQ:
;; (1) the assembler input is generally string-like - consider that instruction
;; mnemonics are looked up by string even though written as symbols.
;; (2) the above SORT could yield an unpredictable result across build hosts
(unless (= (length (remove-duplicates new-labels :test #'string=))
(length new-labels))
(error "Repeated labels in ASSEMBLE body"))
(awhen (intersection inherited new-labels)
(style-warn "Shadowed asm labels ~S should be renamed not to conflict" it))
`(let* (,@(when dest
`((*current-destination*
,(case dest
(:code '(asmstream-code-section *asmstream*))
(:elsewhere '(asmstream-elsewhere-section *asmstream*))
(t dest)))))
,@(when vop `((*current-vop* ,vop)))
,@(mapcar (lambda (name) `(,name (gen-label)))
new-labels))
(symbol-macrolet ((..inherited-labels.. ,(append inherited new-labels)))
,@(mapcar (lambda (form)
(if (label-name-p form)
`(emit-label ,form)
form))
body))))))
(defun assembling-to-elsewhere-p ()
(eq *current-destination* (asmstream-elsewhere-section *asmstream*)))
;;;; the scheduler itself
(defmacro without-scheduling (() &body body)
"Execute BODY (as a PROGN) without scheduling any of the instructions
generated inside it. This is not protected by UNWIND-PROTECT, so
DO NOT use THROW or RETURN-FROM to escape from it."
`(let ((section. *current-destination*))
;; This is similar to bracketing the code with ".set noreorder"
;; and ".set reorder" in the MIPS assembler, except that we could
;; theoretically allow nesting (so only restore ".set reorder"
;; after the outermost), except that we don't allow it.
(emit section. '(.begin-without-scheduling))
,@body
(emit section. '(.end-without-scheduling))))
(defmacro note-dependencies ((segment inst) &body body)
(once-only ((segment segment) (inst inst))
`(macrolet ((reads (loc) `(note-read-dependency ,',segment ,',inst ,loc))
(writes (loc &rest keys)
`(note-write-dependency ,',segment ,',inst ,loc ,@keys)))
,@body)))
#.(unless assem-scheduler-p
'(defmacro schedule-pending-instructions (segment)
(declare (ignore segment))))
#.(when assem-scheduler-p
'(progn
(defun note-read-dependency (segment inst read)
(multiple-value-bind (loc-num size)
(sb-c:location-number read)
#+sb-show-assem (format *trace-output*
"~&~S reads ~S[~W for ~W]~%"
inst read loc-num size)
(when loc-num
;; Iterate over all the locations for this TN.
(do ((index loc-num (1+ index))
(end-loc (+ loc-num (or size 1))))
((>= index end-loc))
(declare (type (mod 2048) index end-loc))
(let ((writers (svref (segment-writers segment) index)))
(when writers
;; The inst that wrote the value we want to read must have
;; completed.
(let ((writer (car writers)))
(sset-adjoin writer (inst-read-dependencies inst))
(sset-adjoin inst (inst-read-dependents writer))
(sset-delete writer (segment-emittable-insts-sset segment))
;; And it must have been completed *after* all other
;; writes to that location. Actually, that isn't quite
;; true. Each of the earlier writes could be done
;; either before this last write, or after the read, but
;; we have no way of representing that.
(dolist (other-writer (cdr writers))
(sset-adjoin other-writer (inst-write-dependencies writer))
(sset-adjoin writer (inst-write-dependents other-writer))
(sset-delete other-writer
(segment-emittable-insts-sset segment))))
;; And we don't need to remember about earlier writes any
;; more. Shortening the writers list means that we won't
;; bother generating as many explicit arcs in the graph.
(setf (cdr writers) nil)))
(push inst (svref (segment-readers segment) index)))))
(values))
(defun note-write-dependency (segment inst write &key partially)
(multiple-value-bind (loc-num size)
(sb-c:location-number write)
#+sb-show-assem (format *trace-output*
"~&~S writes ~S[~W for ~W]~%"
inst write loc-num size)
(when loc-num
;; Iterate over all the locations for this TN.
(do ((index loc-num (1+ index))
(end-loc (+ loc-num (or size 1))))
((>= index end-loc))
(declare (type (mod 2048) index end-loc))
;; All previous reads of this location must have completed.
(dolist (prev-inst (svref (segment-readers segment) index))
(unless (eq prev-inst inst)
(sset-adjoin prev-inst (inst-write-dependencies inst))
(sset-adjoin inst (inst-write-dependents prev-inst))
(sset-delete prev-inst (segment-emittable-insts-sset segment))))
(when partially
;; All previous writes to the location must have completed.
(dolist (prev-inst (svref (segment-writers segment) index))
(sset-adjoin prev-inst (inst-write-dependencies inst))
(sset-adjoin inst (inst-write-dependents prev-inst))
(sset-delete prev-inst (segment-emittable-insts-sset segment)))
;; And we can forget about remembering them, because
;; depending on us is as good as depending on them.
(setf (svref (segment-writers segment) index) nil))
(push inst (svref (segment-writers segment) index)))))
(values))
;;; This routine is called by due to uses of the INST macro when the
;;; scheduler is turned on. The change to the dependency graph has
;;; already been computed, so we just have to check to see whether the
;;; basic block is terminated.
(defun queue-inst (segment inst)
#+sb-show-assem (format *trace-output* "~&queuing ~S~%" inst)
#+sb-show-assem (format *trace-output*
" reads ~S~% writes ~S~%"
(collect ((reads))
(do-sset-elements (read
(inst-read-dependencies inst))
(reads read))
(reads))
(collect ((writes))
(do-sset-elements (write
(inst-write-dependencies inst))
(writes write))
(writes)))
(aver (segment-run-scheduler segment))
(let ((countdown (segment-branch-countdown segment)))
(when countdown
(decf countdown)
(aver (not (instruction-attributep (inst-attributes inst)
variable-length))))
(cond ((instruction-attributep (inst-attributes inst) branch)
(unless countdown
(setf countdown (inst-delay inst)))
(push (cons countdown inst)
(segment-queued-branches segment)))
(t
(sset-adjoin inst (segment-emittable-insts-sset segment))))
(when countdown
(setf (segment-branch-countdown segment) countdown)
(when (zerop countdown)
(schedule-pending-instructions segment))))
(values))
;;; Emit all the pending instructions, and reset any state. This is
;;; called whenever we hit a label (i.e. an entry point of some kind)
;;; and when the user turns the scheduler off (otherwise, the queued
;;; instructions would sit there until the scheduler was turned back
;;; on, and emitted in the wrong place).
(defun schedule-pending-instructions (segment)
(aver (segment-run-scheduler segment))
;; Quick blow-out if nothing to do.
(when (and (sset-empty (segment-emittable-insts-sset segment))
(null (segment-queued-branches segment)))
(return-from schedule-pending-instructions
(values)))
#+sb-show-assem (format *trace-output*
"~&scheduling pending instructions..~%")
;; Note that any values live at the end of the block have to be
;; computed last.
(let ((emittable-insts (segment-emittable-insts-sset segment))
(writers (segment-writers segment)))
(dotimes (index (length writers))
(let* ((writer (svref writers index))
(inst (car writer))
(overwritten (cdr writer)))
(when writer
(when overwritten
(let ((write-dependencies (inst-write-dependencies inst)))
(dolist (other-inst overwritten)
(sset-adjoin inst (inst-write-dependents other-inst))
(sset-adjoin other-inst write-dependencies)
(sset-delete other-inst emittable-insts))))
;; If the value is live at the end of the block, we can't flush it.
(setf (instruction-attributep (inst-attributes inst) flushable)
nil)))))
;; Grovel through the entire graph in the forward direction finding
;; all the leaf instructions.
(labels ((grovel-inst (inst)
(let ((max 0))
(do-sset-elements (dep (inst-write-dependencies inst))
(let ((dep-depth (or (inst-depth dep) (grovel-inst dep))))
(when (> dep-depth max)
(setf max dep-depth))))
(do-sset-elements (dep (inst-read-dependencies inst))
(let ((dep-depth
(+ (or (inst-depth dep) (grovel-inst dep))
(inst-delay dep))))
(when (> dep-depth max)
(setf max dep-depth))))
(cond ((and (sset-empty (inst-read-dependents inst))
(instruction-attributep (inst-attributes inst)
flushable))
#+sb-show-assem (format *trace-output*
"flushing ~S~%"
inst)
(setf (inst-emitter inst) nil)
(setf (inst-depth inst) max))
(t
(setf (inst-depth inst) max))))))
(let ((emittable-insts nil)
(delayed nil))
(do-sset-elements (inst (segment-emittable-insts-sset segment))
(grovel-inst inst)
(if (zerop (inst-delay inst))
(push inst emittable-insts)
(setf delayed
(add-to-nth-list delayed inst (1- (inst-delay inst))))))
(setf (segment-emittable-insts-queue segment)
(sort emittable-insts #'> :key #'inst-depth))
(setf (segment-delayed segment) delayed))
(dolist (branch (segment-queued-branches segment))
(grovel-inst (cdr branch))))
#+sb-show-assem (format *trace-output*
"queued branches: ~S~%"
(segment-queued-branches segment))
#+sb-show-assem (format *trace-output*
"initially emittable: ~S~%"
(segment-emittable-insts-queue segment))
#+sb-show-assem (format *trace-output*
"initially delayed: ~S~%"
(segment-delayed segment))
;; Accumulate the results in reverse order. Well, actually, this
;; list will be in forward order, because we are generating the
;; reverse order in reverse.
(let ((results nil))
;; Schedule all the branches in their exact locations.
(let ((insts-from-end (segment-branch-countdown segment)))
(dolist (branch (segment-queued-branches segment))
(let ((inst (cdr branch)))
(dotimes (i (- (car branch) insts-from-end))
;; Each time through this loop we need to emit another
;; instruction. First, we check to see whether there is
;; any instruction that must be emitted before (i.e. must
;; come after) the branch inst. If so, emit it. Otherwise,
;; just pick one of the emittable insts. If there is
;; nothing to do, then emit a nop. ### Note: despite the
;; fact that this is a loop, it really won't work for
;; repetitions other than zero and one. For example, if
;; the branch has two dependents and one of them dpends on
;; the other, then the stuff that grabs a dependent could
;; easily grab the wrong one. But I don't feel like fixing
;; this because it doesn't matter for any of the
;; architectures we are using or plan on using.
(flet ((maybe-schedule-dependent (dependents)
(do-sset-elements (inst dependents)
;; If do-sset-elements enters the body, then there is a
;; dependent. Emit it.
(note-resolved-dependencies segment inst)
;; Remove it from the emittable insts.
(setf (segment-emittable-insts-queue segment)
(delete inst
(segment-emittable-insts-queue segment)
:test #'eq))
;; And if it was delayed, removed it from the delayed
;; list. This can happen if there is a load in a
;; branch delay slot.
(block scan-delayed
(do ((delayed (segment-delayed segment)
(cdr delayed)))
((null delayed))
(do ((prev nil cons)
(cons (car delayed) (cdr cons)))
((null cons))
(when (eq (car cons) inst)
(if prev
(setf (cdr prev) (cdr cons))
(setf (car delayed) (cdr cons)))
(return-from scan-delayed nil)))))
;; And return it.
(return inst))))
(let ((fill (or (maybe-schedule-dependent
(inst-read-dependents inst))
(maybe-schedule-dependent
(inst-write-dependents inst))
(schedule-one-inst segment t)
:nop)))
#+sb-show-assem (format *trace-output*
"filling branch delay slot with ~S~%"
fill)
(push fill results)))
(advance-one-inst segment)
(incf insts-from-end))
(note-resolved-dependencies segment inst)
(push inst results)
#+sb-show-assem (format *trace-output* "emitting ~S~%" inst)
(advance-one-inst segment))))
;; Keep scheduling stuff until we run out.
(loop
(let ((inst (schedule-one-inst segment nil)))
(unless inst
(return))
(push inst results)
(advance-one-inst segment)))
;; Now call the emitters, but turn the scheduler off for the duration.
(setf (segment-run-scheduler segment) nil)
(assemble (segment)
(dolist (inst results)
(if (eq inst :nop)
(sb-c:emit-nop segment)
(funcall (inst-emitter inst) segment))))
(setf (segment-run-scheduler segment) t))
;; Clear out any residue left over.
(setf (segment-inst-number segment) 0)
(setf (segment-queued-branches segment) nil)
(setf (segment-branch-countdown segment) nil)
(setf (segment-emittable-insts-sset segment) (make-sset))
(fill (segment-readers segment) nil)
(fill (segment-writers segment) nil)
;; That's all, folks.
(values))
;;; a utility for maintaining the segment-delayed list. We cdr down
;;; list n times (extending it if necessary) and then push thing on
;;; into the car of that cons cell.
(defun add-to-nth-list (list thing n)
(do ((cell (or list (setf list (list nil)))
(or (cdr cell) (setf (cdr cell) (list nil))))
(i n (1- i)))
((zerop i)
(push thing (car cell))
list)))
;;; Find the next instruction to schedule and return it after updating
;;; any dependency information. If we can't do anything useful right
;;; now, but there is more work to be done, return :NOP to indicate
;;; that a nop must be emitted. If we are all done, return NIL.
(defun schedule-one-inst (segment delay-slot-p)
(do ((prev nil remaining)
(remaining (segment-emittable-insts-queue segment) (cdr remaining)))
((null remaining))
(let ((inst (car remaining)))
(unless (and delay-slot-p
(instruction-attributep (inst-attributes inst)
variable-length))
;; We've got us a live one here. Go for it.
#+sb-show-assem (format *trace-output* "emitting ~S~%" inst)
;; Delete it from the list of insts.
(if prev
(setf (cdr prev) (cdr remaining))
(setf (segment-emittable-insts-queue segment)
(cdr remaining)))
;; Note that this inst has been emitted.
(note-resolved-dependencies segment inst)
;; And return.
(return-from schedule-one-inst
;; Are we wanting to flush this instruction?
(if (inst-emitter inst)
;; Nope, it's still a go. So return it.
inst
;; Yes, so pick a new one. We have to start
;; over, because note-resolved-dependencies
;; might have changed the emittable-insts-queue.
(schedule-one-inst segment delay-slot-p))))))
;; Nothing to do, so make something up.
(cond ((segment-delayed segment)
;; No emittable instructions, but we have more work to do. Emit
;; a NOP to fill in a delay slot.
#+sb-show-assem (format *trace-output* "emitting a NOP~%")
:nop)
(t
;; All done.
nil)))
;;; This function is called whenever an instruction has been
;;; scheduled, and we want to know what possibilities that opens up.
;;; So look at all the instructions that this one depends on, and
;;; remove this instruction from their dependents list. If we were the
;;; last dependent, then that dependency can be emitted now.
(defun note-resolved-dependencies (segment inst)
(aver (sset-empty (inst-read-dependents inst)))
(aver (sset-empty (inst-write-dependents inst)))
(do-sset-elements (dep (inst-write-dependencies inst))
;; These are the instructions who have to be completed before our
;; write fires. Doesn't matter how far before, just before.
(let ((dependents (inst-write-dependents dep)))
(sset-delete inst dependents)
(when (and (sset-empty dependents)
(sset-empty (inst-read-dependents dep)))
(insert-emittable-inst segment dep))))
(do-sset-elements (dep (inst-read-dependencies inst))
;; These are the instructions who write values we read. If there
;; is no delay, then just remove us from the dependent list.
;; Otherwise, record the fact that in n cycles, we should be
;; removed.
(if (zerop (inst-delay dep))
(let ((dependents (inst-read-dependents dep)))
(sset-delete inst dependents)
(when (and (sset-empty dependents)
(sset-empty (inst-write-dependents dep)))
(insert-emittable-inst segment dep)))
(setf (segment-delayed segment)
(add-to-nth-list (segment-delayed segment)
(cons dep inst)
(inst-delay dep)))))
(values))
;;; Process the next entry in segment-delayed. This is called whenever
;;; anyone emits an instruction.
(defun advance-one-inst (segment)
(let ((delayed-stuff (pop (segment-delayed segment))))
(dolist (stuff delayed-stuff)
(if (consp stuff)
(let* ((dependency (car stuff))
(dependent (cdr stuff))
(dependents (inst-read-dependents dependency)))
(sset-delete dependent dependents)
(when (and (sset-empty dependents)
(sset-empty (inst-write-dependents dependency)))
(insert-emittable-inst segment dependency)))
(insert-emittable-inst segment stuff)))))
;;; Note that inst is emittable by sticking it in the
;;; SEGMENT-EMITTABLE-INSTS-QUEUE list. We keep the emittable-insts
;;; sorted with the largest ``depths'' first. Except that if INST is a
;;; branch, don't bother. It will be handled correctly by the branch
;;; emitting code in SCHEDULE-PENDING-INSTRUCTIONS.
(defun insert-emittable-inst (segment inst)
(unless (instruction-attributep (inst-attributes inst) branch)
#+sb-show-assem (format *trace-output* "now emittable: ~S~%" inst)
(do ((my-depth (inst-depth inst))
(remaining (segment-emittable-insts-queue segment) (cdr remaining))
(prev nil remaining))
((or (null remaining) (> my-depth (inst-depth (car remaining))))
(if prev
(setf (cdr prev) (cons inst remaining))
(setf (segment-emittable-insts-queue segment)
(cons inst remaining))))))
(values))
)) ; end PROGN
;;;; structure used during output emission
;;; a constraint on how the output stream must be aligned
(defstruct (alignment-note (:include annotation)
(:conc-name alignment-)
(:predicate alignment-p)
(:constructor make-alignment (bits size pattern))
(:copier nil))
;; the minimum number of low-order bits that must be zero
(bits 0 :type alignment)
;; the amount of filler we are assuming this alignment op will take
(size 0 :type (integer 0 #.(1- (ash 1 max-alignment))))
;; the byte used as filling or :LONG-NOP, indicating to call EMIT-LONG-NOP
;; to emit a filling pattern
(pattern 0 :type (or possibly-signed-assembly-unit
(member :long-nop))))
;;; a reference to someplace that needs to be back-patched when
;;; we actually know what label positions, etc. are
(defstruct (back-patch (:include annotation)
(:constructor make-back-patch (size fun))
(:copier nil))
;; the area affected by this back-patch
(size 0 :type index :read-only t)
;; the function to use to generate the real data
(fun nil :type function :read-only t))
;;; This is similar to a BACK-PATCH, but also an indication that the
;;; amount of stuff output depends on label positions, etc.
;;; BACK-PATCHes can't change their mind about how much stuff to emit,
;;; but CHOOSERs can.
(defstruct (chooser (:include annotation)
(:constructor make-chooser
(size alignment maybe-shrink worst-case-fun))
(:copier nil))
;; the worst case size for this chooser. There is this much space
;; allocated in the output buffer.
(size 0 :type index :read-only t)
;; the worst case alignment this chooser is guaranteed to preserve
;; (Q: why can't we guarantee to preserve nothing, thus simplifying the API?)
(alignment 0 :type alignment :read-only t)
;; the function to call to determine if we can use a shorter
;; sequence. It returns NIL if nothing shorter can be used, or emits
;; that sequence and returns T.
(maybe-shrink nil :type function :read-only t)
;; the function to call to generate the worst case sequence. This is
;; used when nothing else can be condensed.
(worst-case-fun nil :type function :read-only t))
;;; This is used internally when we figure out a chooser or alignment
;;; doesn't really need as much space as we initially gave it.
(defstruct (filler (:include annotation)
(:constructor make-filler (bytes))
(:copier nil))
;; the number of bytes of filler here
(bytes 0 :type index))
(declaim (freeze-type annotation))
;;;; output functions
;;; interface: Emit the supplied BYTE to SEGMENT, growing SEGMENT if
;;; necessary.
(defun emit-byte (segment byte)
(declare (type segment segment))
(declare (type possibly-signed-assembly-unit byte))
(let ((old-index (segment-current-index segment)))
(incf (segment-current-index segment))
(setf (aref (segment-buffer segment) old-index)
(logand byte assembly-unit-mask)))
(incf (segment-current-posn segment))
(values))
;;; internal: Output AMOUNT bytes to SEGMENT, either copies of
;;; PATTERN (if that is an integer), or by calling EMIT-LONG-NOP
;;; (if PATTERN is :LONG-NOP).
(defun %emit-skip (segment amount &optional (pattern 0))
(declare (type segment segment)
(type index amount))
(etypecase pattern
(integer
(dotimes (i amount)
(emit-byte segment pattern)))
;; EMIT-LONG-NOP does not exist for most backends.
;; Better to get an ECASE error than undefined-function.
#+x86-64
((eql :long-nop)