@@ -3603,6 +3603,7 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3603
3603
Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
3604
3604
address base = CompressedKlassPointers::base ();
3605
3605
int shift = CompressedKlassPointers::shift ();
3606
+ bool need_zero_extend = base != 0 ;
3606
3607
assert (UseCompressedClassPointers, " only for compressed klass ptrs" );
3607
3608
3608
3609
BLOCK_COMMENT (" cKlass encoder {" );
@@ -3619,28 +3620,76 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3619
3620
bind (ok);
3620
3621
#endif
3621
3622
3622
- if (base != NULL ) {
3623
- unsigned int base_h = ((unsigned long )base)>>32 ;
3624
- unsigned int base_l = (unsigned int )((unsigned long )base);
3625
- if ((base_h != 0 ) && (base_l == 0 ) && VM_Version::has_HighWordInstr ()) {
3626
- lgr_if_needed (dst, current);
3627
- z_aih (dst, -((int )base_h)); // Base has no set bits in lower half.
3628
- } else if ((base_h == 0 ) && (base_l != 0 )) {
3629
- lgr_if_needed (dst, current);
3630
- z_agfi (dst, -(int )base_l);
3631
- } else {
3632
- load_const (Z_R0, base);
3633
- lgr_if_needed (dst, current);
3634
- z_sgr (dst, Z_R0);
3635
- }
3636
- current = dst;
3637
- }
3623
+ // Scale down the incoming klass pointer first.
3624
+ // We then can be sure we calculate an offset that fits into 32 bit.
3625
+ // More generally speaking: all subsequent calculations are purely 32-bit.
3638
3626
if (shift != 0 ) {
3639
3627
assert (LogKlassAlignmentInBytes == shift, " decode alg wrong" );
3640
3628
z_srlg (dst, current, shift);
3641
3629
current = dst;
3642
3630
}
3643
- lgr_if_needed (dst, current); // Move may be required (if neither base nor shift != 0).
3631
+
3632
+ if (base != NULL ) {
3633
+ // Use scaled-down base address parts to match scaled-down klass pointer.
3634
+ unsigned int base_h = ((unsigned long )base)>>(32 +shift);
3635
+ unsigned int base_l = (unsigned int )(((unsigned long )base)>>shift);
3636
+
3637
+ // General considerations:
3638
+ // - when calculating (current_h - base_h), all digits must cancel (become 0).
3639
+ // Otherwise, we would end up with a compressed klass pointer which doesn't
3640
+ // fit into 32-bit.
3641
+ // - Only bit#33 of the difference could potentially be non-zero. For that
3642
+ // to happen, (current_l < base_l) must hold. In this case, the subtraction
3643
+ // will create a borrow out of bit#32, nicely killing bit#33.
3644
+ // - With the above, we only need to consider current_l and base_l to
3645
+ // calculate the result.
3646
+ // - Both values are treated as unsigned. The unsigned subtraction is
3647
+ // replaced by adding (unsigned) the 2's complement of the subtrahend.
3648
+
3649
+ if (base_l == 0 ) {
3650
+ // - By theory, the calculation to be performed here (current_h - base_h) MUST
3651
+ // cancel all high-word bits. Otherwise, we would end up with an offset
3652
+ // (i.e. compressed klass pointer) that does not fit into 32 bit.
3653
+ // - current_l remains unchanged.
3654
+ // - Therefore, we can replace all calculation with just a
3655
+ // zero-extending load 32 to 64 bit.
3656
+ // - Even that can be replaced with a conditional load if dst != current.
3657
+ // (this is a local view. The shift step may have requested zero-extension).
3658
+ } else {
3659
+ if ((base_h == 0 ) && is_uimm (base_l, 31 )) {
3660
+ // If we happen to find that (base_h == 0), and that base_l is within the range
3661
+ // which can be represented by a signed int, then we can use 64bit signed add with
3662
+ // (-base_l) as 32bit signed immediate operand. The add will take care of the
3663
+ // upper 32 bits of the result, saving us the need of an extra zero extension.
3664
+ // For base_l to be in the required range, it must not have the most significant
3665
+ // bit (aka sign bit) set.
3666
+ lgr_if_needed (dst, current); // no zero/sign extension in this case!
3667
+ z_agfi (dst, -(int )base_l); // base_l must be passed as signed.
3668
+ need_zero_extend = false ;
3669
+ current = dst;
3670
+ } else {
3671
+ // To begin with, we may need to copy and/or zero-extend the register operand.
3672
+ // We have to calculate (current_l - base_l). Because there is no unsigend
3673
+ // subtract instruction with immediate operand, we add the 2's complement of base_l.
3674
+ if (need_zero_extend) {
3675
+ z_llgfr (dst, current);
3676
+ need_zero_extend = false ;
3677
+ } else {
3678
+ llgfr_if_needed (dst, current);
3679
+ }
3680
+ current = dst;
3681
+ z_alfi (dst, -base_l);
3682
+ }
3683
+ }
3684
+ }
3685
+
3686
+ if (need_zero_extend) {
3687
+ // We must zero-extend the calculated result. It may have some leftover bits in
3688
+ // the hi-word because we only did optimized calculations.
3689
+ z_llgfr (dst, current);
3690
+ } else {
3691
+ llgfr_if_needed (dst, current); // zero-extension while copying comes at no extra cost.
3692
+ }
3644
3693
3645
3694
BLOCK_COMMENT (" } cKlass encoder" );
3646
3695
}
0 commit comments