-
Notifications
You must be signed in to change notification settings - Fork 5.4k
/
c1_Runtime1_x86.cpp
1622 lines (1383 loc) · 57.9 KB
/
c1_Runtime1_x86.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "c1/c1_Defs.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "c1/c1_Runtime1.hpp"
#include "ci/ciUtilities.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "interpreter/interpreter.hpp"
#include "memory/universe.hpp"
#include "nativeInst_x86.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
#include "register_x86.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/macros.hpp"
#include "vmreg_x86.inline.hpp"
// Implementation of StubAssembler
int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
// setup registers
const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); // is callee-saved register (Visual C++ calling conventions)
assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different");
assert(oop_result1 != thread && metadata_result != thread, "registers must be different");
assert(args_size >= 0, "illegal args_size");
bool align_stack = false;
#ifdef _LP64
// At a method handle call, the stack may not be properly aligned
// when returning with an exception.
align_stack = (stub_id() == Runtime1::handle_exception_from_callee_id);
#endif
#ifdef _LP64
mov(c_rarg0, thread);
set_num_rt_args(0); // Nothing on stack
#else
set_num_rt_args(1 + args_size);
// push java thread (becomes first argument of C function)
get_thread(thread);
push(thread);
#endif // _LP64
int call_offset = -1;
if (!align_stack) {
set_last_Java_frame(thread, noreg, rbp, NULL);
} else {
address the_pc = pc();
call_offset = offset();
set_last_Java_frame(thread, noreg, rbp, the_pc);
andptr(rsp, -(StackAlignmentInBytes)); // Align stack
}
// do the call
call(RuntimeAddress(entry));
if (!align_stack) {
call_offset = offset();
}
// verify callee-saved register
#ifdef ASSERT
guarantee(thread != rax, "change this code");
push(rax);
{ Label L;
get_thread(rax);
cmpptr(thread, rax);
jcc(Assembler::equal, L);
int3();
stop("StubAssembler::call_RT: rdi not callee saved?");
bind(L);
}
pop(rax);
#endif
reset_last_Java_frame(thread, true);
// discard thread and arguments
NOT_LP64(addptr(rsp, num_rt_args()*BytesPerWord));
// check for pending exceptions
{ Label L;
cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
jcc(Assembler::equal, L);
// exception pending => remove activation and forward to exception handler
movptr(rax, Address(thread, Thread::pending_exception_offset()));
// make sure that the vm_results are cleared
if (oop_result1->is_valid()) {
movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
}
if (metadata_result->is_valid()) {
movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
}
if (frame_size() == no_frame_size) {
leave();
jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
} else if (_stub_id == Runtime1::forward_exception_id) {
should_not_reach_here();
} else {
jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
}
bind(L);
}
// get oop results if there are any and reset the values in the thread
if (oop_result1->is_valid()) {
get_vm_result(oop_result1, thread);
}
if (metadata_result->is_valid()) {
get_vm_result_2(metadata_result, thread);
}
assert(call_offset >= 0, "Should be set");
return call_offset;
}
int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
#ifdef _LP64
mov(c_rarg1, arg1);
#else
push(arg1);
#endif // _LP64
return call_RT(oop_result1, metadata_result, entry, 1);
}
int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
#ifdef _LP64
if (c_rarg1 == arg2) {
if (c_rarg2 == arg1) {
xchgq(arg1, arg2);
} else {
mov(c_rarg2, arg2);
mov(c_rarg1, arg1);
}
} else {
mov(c_rarg1, arg1);
mov(c_rarg2, arg2);
}
#else
push(arg2);
push(arg1);
#endif // _LP64
return call_RT(oop_result1, metadata_result, entry, 2);
}
int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
#ifdef _LP64
// if there is any conflict use the stack
if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
arg2 == c_rarg1 || arg1 == c_rarg3 ||
arg3 == c_rarg1 || arg1 == c_rarg2) {
push(arg3);
push(arg2);
push(arg1);
pop(c_rarg1);
pop(c_rarg2);
pop(c_rarg3);
} else {
mov(c_rarg1, arg1);
mov(c_rarg2, arg2);
mov(c_rarg3, arg3);
}
#else
push(arg3);
push(arg2);
push(arg1);
#endif // _LP64
return call_RT(oop_result1, metadata_result, entry, 3);
}
// Implementation of StubFrame
class StubFrame: public StackObj {
private:
StubAssembler* _sasm;
public:
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
void load_argument(int offset_in_words, Register reg);
~StubFrame();
};
void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
set_info(name, must_gc_arguments);
enter();
}
void StubAssembler::epilogue() {
leave();
ret(0);
}
#define __ _sasm->
StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
_sasm = sasm;
__ prologue(name, must_gc_arguments);
}
// load parameters that were stored with LIR_Assembler::store_parameter
// Note: offsets for store_parameter and load_argument must match
void StubFrame::load_argument(int offset_in_words, Register reg) {
__ load_parameter(offset_in_words, reg);
}
StubFrame::~StubFrame() {
__ epilogue();
}
#undef __
// Implementation of Runtime1
const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
const int xmm_regs_as_doubles_size_in_slots = FrameMap::nof_xmm_regs * 2;
// Stack layout for saving/restoring all the registers needed during a runtime
// call (this includes deoptimization)
// Note: note that users of this frame may well have arguments to some runtime
// while these values are on the stack. These positions neglect those arguments
// but the code in save_live_registers will take the argument count into
// account.
//
#ifdef _LP64
#define SLOT2(x) x,
#define SLOT_PER_WORD 2
#else
#define SLOT2(x)
#define SLOT_PER_WORD 1
#endif // _LP64
enum reg_save_layout {
// 64bit needs to keep stack 16 byte aligned. So we add some alignment dummies to make that
// happen and will assert if the stack size we create is misaligned
#ifdef _LP64
align_dummy_0, align_dummy_1,
#endif // _LP64
#ifdef _WIN64
// Windows always allocates space for it's argument registers (see
// frame::arg_reg_save_area_bytes).
arg_reg_save_1, arg_reg_save_1H, // 0, 4
arg_reg_save_2, arg_reg_save_2H, // 8, 12
arg_reg_save_3, arg_reg_save_3H, // 16, 20
arg_reg_save_4, arg_reg_save_4H, // 24, 28
#endif // _WIN64
xmm_regs_as_doubles_off, // 32
float_regs_as_doubles_off = xmm_regs_as_doubles_off + xmm_regs_as_doubles_size_in_slots, // 160
fpu_state_off = float_regs_as_doubles_off + float_regs_as_doubles_size_in_slots, // 224
// fpu_state_end_off is exclusive
fpu_state_end_off = fpu_state_off + (FPUStateSizeInWords / SLOT_PER_WORD), // 352
marker = fpu_state_end_off, SLOT2(markerH) // 352, 356
extra_space_offset, // 360
#ifdef _LP64
r15_off = extra_space_offset, r15H_off, // 360, 364
r14_off, r14H_off, // 368, 372
r13_off, r13H_off, // 376, 380
r12_off, r12H_off, // 384, 388
r11_off, r11H_off, // 392, 396
r10_off, r10H_off, // 400, 404
r9_off, r9H_off, // 408, 412
r8_off, r8H_off, // 416, 420
rdi_off, rdiH_off, // 424, 428
#else
rdi_off = extra_space_offset,
#endif // _LP64
rsi_off, SLOT2(rsiH_off) // 432, 436
rbp_off, SLOT2(rbpH_off) // 440, 444
rsp_off, SLOT2(rspH_off) // 448, 452
rbx_off, SLOT2(rbxH_off) // 456, 460
rdx_off, SLOT2(rdxH_off) // 464, 468
rcx_off, SLOT2(rcxH_off) // 472, 476
rax_off, SLOT2(raxH_off) // 480, 484
saved_rbp_off, SLOT2(saved_rbpH_off) // 488, 492
return_off, SLOT2(returnH_off) // 496, 500
reg_save_frame_size // As noted: neglects any parameters to runtime // 504
};
// Save off registers which might be killed by calls into the runtime.
// Tries to smart of about FP registers. In particular we separate
// saving and describing the FPU registers for deoptimization since we
// have to save the FPU registers twice if we describe them and on P4
// saving FPU registers which don't contain anything appears
// expensive. The deopt blob is the only thing which needs to
// describe FPU registers. In all other cases it should be sufficient
// to simply save their current value.
static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
bool save_fpu_registers = true) {
// In 64bit all the args are in regs so there are no additional stack slots
LP64_ONLY(num_rt_args = 0);
LP64_ONLY(assert((reg_save_frame_size * VMRegImpl::stack_slot_size) % 16 == 0, "must be 16 byte aligned");)
int frame_size_in_slots = reg_save_frame_size + num_rt_args; // args + thread
sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
// record saved value locations in an OopMap
// locations are offsets from sp after runtime call; num_rt_args is number of arguments in call, including thread
OopMap* map = new OopMap(frame_size_in_slots, 0);
map->set_callee_saved(VMRegImpl::stack2reg(rax_off + num_rt_args), rax->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(rcx_off + num_rt_args), rcx->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(rdx_off + num_rt_args), rdx->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(rbx_off + num_rt_args), rbx->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(rsi_off + num_rt_args), rsi->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(rdi_off + num_rt_args), rdi->as_VMReg());
#ifdef _LP64
map->set_callee_saved(VMRegImpl::stack2reg(r8_off + num_rt_args), r8->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r9_off + num_rt_args), r9->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r10_off + num_rt_args), r10->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r11_off + num_rt_args), r11->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r12_off + num_rt_args), r12->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r13_off + num_rt_args), r13->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r14_off + num_rt_args), r14->as_VMReg());
map->set_callee_saved(VMRegImpl::stack2reg(r15_off + num_rt_args), r15->as_VMReg());
// This is stupid but needed.
map->set_callee_saved(VMRegImpl::stack2reg(raxH_off + num_rt_args), rax->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(rcxH_off + num_rt_args), rcx->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(rdxH_off + num_rt_args), rdx->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(rbxH_off + num_rt_args), rbx->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(rsiH_off + num_rt_args), rsi->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(rdiH_off + num_rt_args), rdi->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r8H_off + num_rt_args), r8->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r9H_off + num_rt_args), r9->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r10H_off + num_rt_args), r10->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r11H_off + num_rt_args), r11->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r12H_off + num_rt_args), r12->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r13H_off + num_rt_args), r13->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r14H_off + num_rt_args), r14->as_VMReg()->next());
map->set_callee_saved(VMRegImpl::stack2reg(r15H_off + num_rt_args), r15->as_VMReg()->next());
#endif // _LP64
int xmm_bypass_limit = FrameMap::nof_xmm_regs;
#ifdef _LP64
if (UseAVX < 3) {
xmm_bypass_limit = xmm_bypass_limit / 2;
}
#endif
if (save_fpu_registers) {
#ifndef _LP64
if (UseSSE < 2) {
int fpu_off = float_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
VMReg fpu_name_0 = FrameMap::fpu_regname(n);
map->set_callee_saved(VMRegImpl::stack2reg(fpu_off + num_rt_args), fpu_name_0);
// %%% This is really a waste but we'll keep things as they were for now
if (true) {
map->set_callee_saved(VMRegImpl::stack2reg(fpu_off + 1 + num_rt_args), fpu_name_0->next());
}
fpu_off += 2;
}
assert(fpu_off == fpu_state_off, "incorrect number of fpu stack slots");
if (UseSSE == 1) {
int xmm_off = xmm_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
xmm_off += 2;
}
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
}
}
#endif // !LP64
if (UseSSE >= 2) {
int xmm_off = xmm_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_xmm_regs; n++) {
if (n < xmm_bypass_limit) {
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
// %%% This is really a waste but we'll keep things as they were for now
if (true) {
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + 1 + num_rt_args), xmm_name_0->next());
}
}
xmm_off += 2;
}
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
}
}
return map;
}
#define __ this->
void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) {
__ block_comment("save_live_registers");
__ pusha(); // integer registers
// assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
// assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
__ subptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
#ifdef ASSERT
__ movptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
#endif
if (save_fpu_registers) {
#ifndef _LP64
if (UseSSE < 2) {
// save FPU stack
__ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
__ fwait();
#ifdef ASSERT
Label ok;
__ cmpw(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size), StubRoutines::fpu_cntrl_wrd_std());
__ jccb(Assembler::equal, ok);
__ stop("corrupted control word detected");
__ bind(ok);
#endif
// Reset the control word to guard against exceptions being unmasked
// since fstp_d can cause FPU stack underflow exceptions. Write it
// into the on stack copy and then reload that to make sure that the
// current and future values are correct.
__ movw(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size), StubRoutines::fpu_cntrl_wrd_std());
__ frstor(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
// Save the FPU registers in de-opt-able form
int offset = 0;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
__ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
offset += 8;
}
if (UseSSE == 1) {
// save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
int offset = 0;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
offset += 8;
}
}
}
#endif // !_LP64
if (UseSSE >= 2) {
// save XMM registers
// XMM registers can contain float or double values, but this is not known here,
// so always save them as doubles.
// note that float values are _not_ converted automatically, so for float values
// the second word contains only garbage data.
int xmm_bypass_limit = FrameMap::nof_xmm_regs;
int offset = 0;
#ifdef _LP64
if (UseAVX < 3) {
xmm_bypass_limit = xmm_bypass_limit / 2;
}
#endif
for (int n = 0; n < xmm_bypass_limit; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
offset += 8;
}
}
}
// FPU stack must be empty now
NOT_LP64( __ verify_FPU(0, "save_live_registers"); )
}
#undef __
#define __ sasm->
static void restore_fpu(C1_MacroAssembler* sasm, bool restore_fpu_registers) {
#ifdef _LP64
if (restore_fpu_registers) {
// restore XMM registers
int xmm_bypass_limit = FrameMap::nof_xmm_regs;
if (UseAVX < 3) {
xmm_bypass_limit = xmm_bypass_limit / 2;
}
int offset = 0;
for (int n = 0; n < xmm_bypass_limit; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
offset += 8;
}
}
#else
if (restore_fpu_registers) {
if (UseSSE >= 2) {
// restore XMM registers
int xmm_bypass_limit = FrameMap::nof_xmm_regs;
int offset = 0;
for (int n = 0; n < xmm_bypass_limit; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
offset += 8;
}
} else if (UseSSE == 1) {
// restore XMM registers(num MMX == num fpu)
int offset = 0;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movflt(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
offset += 8;
}
}
if (UseSSE < 2) {
__ frstor(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
} else {
// check that FPU stack is really empty
__ verify_FPU(0, "restore_live_registers");
}
} else {
// check that FPU stack is really empty
__ verify_FPU(0, "restore_live_registers");
}
#endif // _LP64
#ifdef ASSERT
{
Label ok;
__ cmpptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
__ jcc(Assembler::equal, ok);
__ stop("bad offsets in frame");
__ bind(ok);
}
#endif // ASSERT
__ addptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
}
#undef __
#define __ this->
void C1_MacroAssembler::restore_live_registers(bool restore_fpu_registers) {
__ block_comment("restore_live_registers");
restore_fpu(this, restore_fpu_registers);
__ popa();
}
void C1_MacroAssembler::restore_live_registers_except_rax(bool restore_fpu_registers) {
__ block_comment("restore_live_registers_except_rax");
restore_fpu(this, restore_fpu_registers);
#ifdef _LP64
__ movptr(r15, Address(rsp, 0));
__ movptr(r14, Address(rsp, wordSize));
__ movptr(r13, Address(rsp, 2 * wordSize));
__ movptr(r12, Address(rsp, 3 * wordSize));
__ movptr(r11, Address(rsp, 4 * wordSize));
__ movptr(r10, Address(rsp, 5 * wordSize));
__ movptr(r9, Address(rsp, 6 * wordSize));
__ movptr(r8, Address(rsp, 7 * wordSize));
__ movptr(rdi, Address(rsp, 8 * wordSize));
__ movptr(rsi, Address(rsp, 9 * wordSize));
__ movptr(rbp, Address(rsp, 10 * wordSize));
// skip rsp
__ movptr(rbx, Address(rsp, 12 * wordSize));
__ movptr(rdx, Address(rsp, 13 * wordSize));
__ movptr(rcx, Address(rsp, 14 * wordSize));
__ addptr(rsp, 16 * wordSize);
#else
__ pop(rdi);
__ pop(rsi);
__ pop(rbp);
__ pop(rbx); // skip this value
__ pop(rbx);
__ pop(rdx);
__ pop(rcx);
__ addptr(rsp, BytesPerWord);
#endif // _LP64
}
#undef __
#define __ sasm->
static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
bool save_fpu_registers = true) {
__ save_live_registers_no_oop_map(save_fpu_registers);
return generate_oop_map(sasm, num_rt_args, save_fpu_registers);
}
static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
__ restore_live_registers(restore_fpu_registers);
}
static void restore_live_registers_except_rax(StubAssembler* sasm, bool restore_fpu_registers = true) {
sasm->restore_live_registers_except_rax(restore_fpu_registers);
}
void Runtime1::initialize_pd() {
// nothing to do
}
// Target: the entry point of the method that creates and posts the exception oop.
// has_argument: true if the exception needs arguments (passed on the stack because
// registers must be preserved).
OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
// Preserve all registers.
int num_rt_args = has_argument ? (2 + 1) : 1;
OopMap* oop_map = save_live_registers(sasm, num_rt_args);
// Now all registers are saved and can be used freely.
// Verify that no old value is used accidentally.
__ invalidate_registers(true, true, true, true, true, true);
// Registers used by this stub.
const Register temp_reg = rbx;
// Load arguments for exception that are passed as arguments into the stub.
if (has_argument) {
#ifdef _LP64
__ movptr(c_rarg1, Address(rbp, 2*BytesPerWord));
__ movptr(c_rarg2, Address(rbp, 3*BytesPerWord));
#else
__ movptr(temp_reg, Address(rbp, 3*BytesPerWord));
__ push(temp_reg);
__ movptr(temp_reg, Address(rbp, 2*BytesPerWord));
__ push(temp_reg);
#endif // _LP64
}
int call_offset = __ call_RT(noreg, noreg, target, num_rt_args - 1);
OopMapSet* oop_maps = new OopMapSet();
oop_maps->add_gc_map(call_offset, oop_map);
__ stop("should not reach here");
return oop_maps;
}
OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ block_comment("generate_handle_exception");
// incoming parameters
const Register exception_oop = rax;
const Register exception_pc = rdx;
// other registers used in this stub
const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
// Save registers, if required.
OopMapSet* oop_maps = new OopMapSet();
OopMap* oop_map = NULL;
switch (id) {
case forward_exception_id:
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
// if found. Otherwise unwind and dispatch to the callers
// exception handler.
oop_map = generate_oop_map(sasm, 1 /*thread*/);
// load and clear pending exception oop into RAX
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
// load issuing PC (the return address for this stub) into rdx
__ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
// make sure that the vm_results are cleared (may be unnecessary)
__ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
__ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
break;
case handle_exception_nofpu_id:
case handle_exception_id:
// At this point all registers MAY be live.
oop_map = save_live_registers(sasm, 1 /*thread*/, id != handle_exception_nofpu_id);
break;
case handle_exception_from_callee_id: {
// At this point all registers except exception oop (RAX) and
// exception pc (RDX) are dead.
const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord);
oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
sasm->set_frame_size(frame_size);
WIN64_ONLY(__ subq(rsp, frame::arg_reg_save_area_bytes));
break;
}
default: ShouldNotReachHere();
}
#if !defined(_LP64) && defined(TIERED)
if (UseSSE < 2) {
// C2 can leave the fpu stack dirty
__ empty_FPU_stack();
}
#endif // !_LP64 && TIERED
// verify that only rax, and rdx is valid at this time
__ invalidate_registers(false, true, true, false, true, true);
// verify that rax, contains a valid exception
__ verify_not_null_oop(exception_oop);
// load address of JavaThread object for thread-local data
NOT_LP64(__ get_thread(thread);)
#ifdef ASSERT
// check that fields in JavaThread for exception oop and issuing pc are
// empty before writing to them
Label oop_empty;
__ cmpptr(Address(thread, JavaThread::exception_oop_offset()), (int32_t) NULL_WORD);
__ jcc(Assembler::equal, oop_empty);
__ stop("exception oop already set");
__ bind(oop_empty);
Label pc_empty;
__ cmpptr(Address(thread, JavaThread::exception_pc_offset()), 0);
__ jcc(Assembler::equal, pc_empty);
__ stop("exception pc already set");
__ bind(pc_empty);
#endif
// save exception oop and issuing pc into JavaThread
// (exception handler will load it from here)
__ movptr(Address(thread, JavaThread::exception_oop_offset()), exception_oop);
__ movptr(Address(thread, JavaThread::exception_pc_offset()), exception_pc);
// patch throwing pc into return address (has bci & oop map)
__ movptr(Address(rbp, 1*BytesPerWord), exception_pc);
// compute the exception handler.
// the exception oop and the throwing pc are read from the fields in JavaThread
int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
oop_maps->add_gc_map(call_offset, oop_map);
// rax: handler address
// will be the deopt blob if nmethod was deoptimized while we looked up
// handler regardless of whether handler existed in the nmethod.
// only rax, is valid at this time, all other registers have been destroyed by the runtime call
__ invalidate_registers(false, true, true, true, true, true);
// patch the return address, this stub will directly return to the exception handler
__ movptr(Address(rbp, 1*BytesPerWord), rax);
switch (id) {
case forward_exception_id:
case handle_exception_nofpu_id:
case handle_exception_id:
// Restore the registers that were saved at the beginning.
restore_live_registers(sasm, id != handle_exception_nofpu_id);
break;
case handle_exception_from_callee_id:
// WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
// since we do a leave anyway.
// Pop the return address.
__ leave();
__ pop(rcx);
__ jmp(rcx); // jump to exception handler
break;
default: ShouldNotReachHere();
}
return oop_maps;
}
void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
// incoming parameters
const Register exception_oop = rax;
// callee-saved copy of exception_oop during runtime call
const Register exception_oop_callee_saved = NOT_LP64(rsi) LP64_ONLY(r14);
// other registers used in this stub
const Register exception_pc = rdx;
const Register handler_addr = rbx;
const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
// verify that only rax, is valid at this time
__ invalidate_registers(false, true, true, true, true, true);
#ifdef ASSERT
// check that fields in JavaThread for exception oop and issuing pc are empty
NOT_LP64(__ get_thread(thread);)
Label oop_empty;
__ cmpptr(Address(thread, JavaThread::exception_oop_offset()), 0);
__ jcc(Assembler::equal, oop_empty);
__ stop("exception oop must be empty");
__ bind(oop_empty);
Label pc_empty;
__ cmpptr(Address(thread, JavaThread::exception_pc_offset()), 0);
__ jcc(Assembler::equal, pc_empty);
__ stop("exception pc must be empty");
__ bind(pc_empty);
#endif
// clear the FPU stack in case any FPU results are left behind
NOT_LP64( __ empty_FPU_stack(); )
// save exception_oop in callee-saved register to preserve it during runtime calls
__ verify_not_null_oop(exception_oop);
__ movptr(exception_oop_callee_saved, exception_oop);
NOT_LP64(__ get_thread(thread);)
// Get return address (is on top of stack after leave).
__ movptr(exception_pc, Address(rsp, 0));
// search the exception handler address of the caller (using the return address)
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
// rax: exception handler address of the caller
// Only RAX and RSI are valid at this time, all other registers have been destroyed by the call.
__ invalidate_registers(false, true, true, true, false, true);
// move result of call into correct register
__ movptr(handler_addr, rax);
// Restore exception oop to RAX (required convention of exception handler).
__ movptr(exception_oop, exception_oop_callee_saved);
// verify that there is really a valid exception in rax
__ verify_not_null_oop(exception_oop);
// get throwing pc (= return address).
// rdx has been destroyed by the call, so it must be set again
// the pop is also necessary to simulate the effect of a ret(0)
__ pop(exception_pc);
// continue at exception handler (return address removed)
// note: do *not* remove arguments when unwinding the
// activation since the caller assumes having
// all arguments on the stack when entering the
// runtime to determine the exception handler
// (GC happens at call site with arguments!)
// rax: exception oop
// rdx: throwing pc
// rbx: exception handler
__ jmp(handler_addr);
}
OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
// use the maximum number of runtime-arguments here because it is difficult to
// distinguish each RT-Call.
// Note: This number affects also the RT-Call in generate_handle_exception because
// the oop-map is shared for all calls.
const int num_rt_args = 2; // thread + dummy
DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
assert(deopt_blob != NULL, "deoptimization blob must have been created");
OopMap* oop_map = save_live_registers(sasm, num_rt_args);
#ifdef _LP64
const Register thread = r15_thread;
// No need to worry about dummy
__ mov(c_rarg0, thread);
#else
__ push(rax); // push dummy
const Register thread = rdi; // is callee-saved register (Visual C++ calling conventions)
// push java thread (becomes first argument of C function)
__ get_thread(thread);
__ push(thread);
#endif // _LP64
__ set_last_Java_frame(thread, noreg, rbp, NULL);
// do the call
__ call(RuntimeAddress(target));
OopMapSet* oop_maps = new OopMapSet();
oop_maps->add_gc_map(__ offset(), oop_map);
// verify callee-saved register
#ifdef ASSERT
guarantee(thread != rax, "change this code");
__ push(rax);
{ Label L;
__ get_thread(rax);
__ cmpptr(thread, rax);
__ jcc(Assembler::equal, L);
__ stop("StubAssembler::call_RT: rdi/r15 not callee saved?");
__ bind(L);
}
__ pop(rax);
#endif
__ reset_last_Java_frame(thread, true);
#ifndef _LP64
__ pop(rcx); // discard thread arg
__ pop(rcx); // discard dummy
#endif // _LP64
// check for pending exceptions
{ Label L;
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
// exception pending => remove activation and forward to exception handler
__ testptr(rax, rax); // have we deoptimized?
__ jump_cc(Assembler::equal,
RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
// the deopt blob expects exceptions in the special fields of
// JavaThread, so copy and clear pending exception.
// load and clear pending exception
__ movptr(rax, Address(thread, Thread::pending_exception_offset()));
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
// check that there is really a valid exception
__ verify_not_null_oop(rax);
// load throwing pc: this is the return address of the stub
__ movptr(rdx, Address(rsp, return_off * VMRegImpl::stack_slot_size));
#ifdef ASSERT
// check that fields in JavaThread for exception oop and issuing pc are empty
Label oop_empty;
__ cmpptr(Address(thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, oop_empty);
__ stop("exception oop must be empty");
__ bind(oop_empty);
Label pc_empty;
__ cmpptr(Address(thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, pc_empty);
__ stop("exception pc must be empty");
__ bind(pc_empty);
#endif
// store exception oop and throwing pc to JavaThread
__ movptr(Address(thread, JavaThread::exception_oop_offset()), rax);
__ movptr(Address(thread, JavaThread::exception_pc_offset()), rdx);
restore_live_registers(sasm);
__ leave();
__ addptr(rsp, BytesPerWord); // remove return address from stack
// Forward the exception directly to deopt blob. We can blow no
// registers and must leave throwing pc on the stack. A patch may
// have values live in registers so the entry point with the
// exception in tls.
__ jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
__ bind(L);
}
// Runtime will return true if the nmethod has been deoptimized during
// the patching process. In that case we must do a deopt reexecute instead.
Label cont;
__ testptr(rax, rax); // have we deoptimized?
__ jcc(Assembler::equal, cont); // no
// Will reexecute. Proper return address is already on the stack we just restore
// registers, pop all of our frame but the return address and jump to the deopt blob
restore_live_registers(sasm);
__ leave();
__ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
__ bind(cont);
restore_live_registers(sasm);
__ leave();
__ ret(0);
return oop_maps;
}