Skip to content

Commit

Permalink
[arm][arm64] enable to optimize write-barrier code for size
Browse files Browse the repository at this point in the history
This CL moves write-barrier checking code into stub for binary size reduction.

``` before
        ;; StoreInstanceField(v11 . start = v15)
0x101d0040c    b801b020               strw r0, [r1, dart-lang#27]
0x101d00410    36000100               tbzw r0, #0, 0x101d00430
0x101d00414    385ff030               ldrb ip0, [r1, #-1]
0x101d00418    385ff011               ldrb ip1, [r0, #-1]
0x101d0041c    8a500a30               and ip0, ip1, ip0 lsr dart-lang#2
0x101d00420    ea5c821f               ands zr, ip0, r28 lsr dart-lang#32
0x101d00424    54000060               beq 0x101d00430
0x101d00428    f942cb5e               ldr lr, [r26, dart-lang#1424]
0x101d0042c    d63f03c0               blr lr
        ;; ParallelMove r0 <- r4
0x101d00430    aa0403e0               mov r0, r4
        ;; StoreInstanceField(v11 . end = v17)
0x101d00434    b801f020               strw r0, [r1, dart-lang#31]
0x101d00438    36000100               tbzw r0, #0, 0x101d00458
0x101d0043c    385ff030               ldrb ip0, [r1, #-1]
0x101d00440    385ff011               ldrb ip1, [r0, #-1]
0x101d00444    8a500a30               and ip0, ip1, ip0 lsr dart-lang#2
0x101d00448    ea5c821f               ands zr, ip0, r28 lsr dart-lang#32
0x101d0044c    54000060               beq 0x101d00458
0x101d00450    f942cb5e               ldr lr, [r26, dart-lang#1424]
0x101d00454    d63f03c0               blr lr
    ```

``` after
        ;; StoreInstanceField(v11 . start = v15)
0x10a6003cc    b801b020               strw r0, [r1, dart-lang#27]
0x10a6003d0    f942cb5e               ldr lr, [r26, dart-lang#1424]
0x10a6003d4    d63f03c0               blr lr
        ;; ParallelMove r0 <- r4
0x10a6003d8    aa0403e0               mov r0, r4
        ;; StoreInstanceField(v11 . end = v17)
0x10a6003dc    b801f020               strw r0, [r1, dart-lang#31]
0x10a6003e0    f942cb5e               ldr lr, [r26, dart-lang#1424]
0x10a6003e4    d63f03c0               blr lr
```
  • Loading branch information
kanghuay committed Jun 14, 2022
1 parent fb58843 commit e3617f2
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 0 deletions.
4 changes: 4 additions & 0 deletions runtime/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ config("dart_config") {
defines += [ "DART_COMPRESSED_POINTERS" ]
}

if (dart_optimize_for_size) {
defines += [ "DART_OPTIMIZE_FOR_SIZE" ]
}

if (is_fuchsia) {
if (using_fuchsia_gn_sdk) {
lib_dirs = [ root_out_dir + "/lib" ]
Expand Down
3 changes: 3 additions & 0 deletions runtime/runtime_args.gni
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ declare_args() {

# Whether to use compressed pointers.
dart_use_compressed_pointers = false

# Whether to optimize code for size
dart_optimize_for_size = false
}

declare_args() {
Expand Down
21 changes: 21 additions & 0 deletions runtime/vm/compiler/assembler/assembler_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1781,13 +1781,16 @@ void Assembler::StoreIntoObject(Register object,
// store buffer (case 1) or add value to the marking stack (case 2).
// Compare UntaggedObject::StorePointer.
Label done;
#ifndef DART_OPTIMIZE_FOR_SIZE
if (can_be_smi == kValueCanBeSmi) {
BranchIfSmi(value, &done);
}
#endif
const bool preserve_lr = lr_state().LRContainsReturnAddress();
if (preserve_lr) {
SPILLS_LR_TO_FRAME(Push(LR));
}
#ifndef DART_OPTIMIZE_FOR_SIZE
CLOBBERS_LR({
ldrb(TMP, FieldAddress(object, target::Object::tags_offset()));
ldrb(LR, FieldAddress(value, target::Object::tags_offset()));
Expand All @@ -1796,11 +1799,14 @@ void Assembler::StoreIntoObject(Register object,
ldr(LR, Address(THR, target::Thread::write_barrier_mask_offset()));
tst(TMP, Operand(LR));
});
#endif
if (value != kWriteBarrierValueReg) {
// Unlikely. Only non-graph intrinsics.
// TODO(rmacnak): Shuffle registers in intrinsics.
Label restore_and_done;
#ifndef DART_OPTIMIZE_FOR_SIZE
b(&restore_and_done, ZERO);
#endif
Register objectForCall = object;
if (object != kWriteBarrierValueReg) {
Push(kWriteBarrierValueReg);
Expand All @@ -1821,7 +1827,11 @@ void Assembler::StoreIntoObject(Register object,
}
Bind(&restore_and_done);
} else {
#ifndef DART_OPTIMIZE_FOR_SIZE
generate_invoke_write_barrier_wrapper_(NE, object);
#else
generate_invoke_write_barrier_wrapper_(AL, object);
#endif
}
if (preserve_lr) {
RESTORES_LR_FROM_FRAME(Pop(LR));
Expand Down Expand Up @@ -1851,6 +1861,7 @@ void Assembler::StoreIntoArray(Register object,
// If so, call the WriteBarrier stub, which will either add object to the
// store buffer (case 1) or add value to the marking stack (case 2).
// Compare UntaggedObject::StorePointer.
#ifndef DART_OPTIMIZE_FOR_SIZE
Label done;
if (can_be_smi == kValueCanBeSmi) {
BranchIfSmi(value, &done);
Expand Down Expand Up @@ -1881,6 +1892,16 @@ void Assembler::StoreIntoArray(Register object,
RESTORES_LR_FROM_FRAME(Pop(LR));
}
Bind(&done);
#else
const bool preserve_lr = lr_state().LRContainsReturnAddress();
if (preserve_lr) {
SPILLS_LR_TO_FRAME(Push(LR));
}
generate_invoke_array_write_barrier_(AL);
if (preserve_lr) {
RESTORES_LR_FROM_FRAME(Pop(LR));
}
#endif
}

void Assembler::StoreIntoObjectOffset(Register object,
Expand Down
4 changes: 4 additions & 0 deletions runtime/vm/compiler/assembler/assembler_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ void Assembler::StoreBarrier(Register object,
// store buffer (case 1) or add value to the marking stack (case 2).
// Compare UntaggedObject::StorePointer.
Label done;
#ifndef DART_OPTIMIZE_FOR_SIZE
if (can_be_smi == kValueCanBeSmi) {
BranchIfSmi(value, &done);
}
Expand All @@ -1115,6 +1116,7 @@ void Assembler::StoreBarrier(Register object,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
tst(TMP, Operand(HEAP_BITS, LSR, 32));
b(&done, ZERO);
#endif

if (spill_lr) {
SPILLS_LR_TO_FRAME(Push(LR));
Expand Down Expand Up @@ -1186,6 +1188,7 @@ void Assembler::StoreIntoArrayBarrier(Register object,
// store buffer (case 1) or add value to the marking stack (case 2).
// Compare UntaggedObject::StorePointer.
Label done;
#ifndef DART_OPTIMIZE_FOR_SIZE
if (can_be_smi == kValueCanBeSmi) {
BranchIfSmi(value, &done);
}
Expand All @@ -1197,6 +1200,7 @@ void Assembler::StoreIntoArrayBarrier(Register object,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
tst(TMP, Operand(HEAP_BITS, LSR, 32));
b(&done, ZERO);
#endif
if (spill_lr) {
SPILLS_LR_TO_FRAME(Push(LR));
}
Expand Down
49 changes: 49 additions & 0 deletions runtime/vm/compiler/stub_code_compiler_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1568,12 +1568,46 @@ void StubCodeCompiler::GenerateWriteBarrierWrappersStub(Assembler* assembler) {

Register reg = static_cast<Register>(i);
intptr_t start = __ CodeSize();
#ifdef DART_OPTIMIZE_FOR_SIZE
Label done;
__ BranchIfSmi(kWriteBarrierValueReg, &done);
if (kWriteBarrierObjectReg == reg) {
SPILLS_LR_TO_FRAME(__ Push(LR));
} else {
SPILLS_LR_TO_FRAME(__ PushList((1 << LR) | (1 << kWriteBarrierObjectReg)));
}
CLOBBERS_LR({
__ ldrb(TMP, FieldAddress(reg, target::Object::tags_offset()));
__ ldrb(LR, FieldAddress(kWriteBarrierValueReg, target::Object::tags_offset()));
__ and_(TMP, LR,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
__ ldr(LR, Address(THR, target::Thread::write_barrier_mask_offset()));
__ tst(TMP, Operand(LR));
});
if (kWriteBarrierObjectReg != reg) {
__ mov(kWriteBarrierObjectReg, Operand(reg));
}
__ Call(Address(THR, target::Thread::write_barrier_entry_point_offset()), NE);
if (kWriteBarrierObjectReg == reg) {
RESTORES_LR_FROM_FRAME(
__ Pop(LR));
} else {
RESTORES_LR_FROM_FRAME(
__ PopList((1 << LR) | (1 << kWriteBarrierObjectReg)));
}
__ Bind(&done);
READS_RETURN_ADDRESS_FROM_LR(__ bx(LR));
if (kWriteBarrierObjectReg == reg) {
__ Breakpoint(); // padding
}
#else
SPILLS_LR_TO_FRAME(__ PushList((1 << LR) | (1 << kWriteBarrierObjectReg)));
__ mov(kWriteBarrierObjectReg, Operand(reg));
__ Call(Address(THR, target::Thread::write_barrier_entry_point_offset()));
RESTORES_LR_FROM_FRAME(
__ PopList((1 << LR) | (1 << kWriteBarrierObjectReg)));
READS_RETURN_ADDRESS_FROM_LR(__ bx(LR));
#endif
intptr_t end = __ CodeSize();

RELEASE_ASSERT(end - start == kStoreBufferWrapperSize);
Expand Down Expand Up @@ -1740,6 +1774,21 @@ void StubCodeCompiler::GenerateWriteBarrierStub(Assembler* assembler) {
}

void StubCodeCompiler::GenerateArrayWriteBarrierStub(Assembler* assembler) {
#ifdef DART_OPTIMIZE_FOR_SIZE
__ tst(kWriteBarrierValueReg, Operand(kSmiTagMask));
READS_RETURN_ADDRESS_FROM_LR(__ bx(LR, EQ));
SPILLS_LR_TO_FRAME(__ Push(LR));
CLOBBERS_LR({
__ ldrb(TMP, FieldAddress(kWriteBarrierObjectReg, target::Object::tags_offset()));
__ ldrb(LR, FieldAddress(kWriteBarrierValueReg, target::Object::tags_offset()));
__ and_(TMP, LR,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
__ ldr(LR, Address(THR, target::Thread::write_barrier_mask_offset()));
__ tst(TMP, Operand(LR));
});
RESTORES_LR_FROM_FRAME(__ Pop(LR));
READS_RETURN_ADDRESS_FROM_LR(__ bx(LR, ZERO));
#endif
GenerateWriteBarrierStubHelper(assembler, true);
}

Expand Down
45 changes: 45 additions & 0 deletions runtime/vm/compiler/stub_code_compiler_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1863,13 +1863,43 @@ void StubCodeCompiler::GenerateWriteBarrierWrappersStub(Assembler* assembler) {

Register reg = static_cast<Register>(i);
intptr_t start = __ CodeSize();
#ifdef DART_OPTIMIZE_FOR_SIZE
Label done;
__ BranchIfSmi(kWriteBarrierValueReg, &done);
__ ldr(TMP, FieldAddress(reg, target::Object::tags_offset(), kByte),
kUnsignedByte);
__ ldr(TMP2, FieldAddress(kWriteBarrierValueReg, target::Object::tags_offset(), kByte),
kUnsignedByte);
__ and_(TMP, TMP2,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
__ tst(TMP, Operand(HEAP_BITS, LSR, 32));
__ b(&done, ZERO);
if (kWriteBarrierObjectReg == reg) {
SPILLS_LR_TO_FRAME(__ Push(LR));
} else {
SPILLS_LR_TO_FRAME(__ PushPair(kWriteBarrierObjectReg, LR));
__ mov(kWriteBarrierObjectReg, reg);
}
__ Call(Address(THR, target::Thread::write_barrier_entry_point_offset()));
if (kWriteBarrierObjectReg == reg) {
RESTORES_LR_FROM_FRAME(__ Pop(LR));
} else {
RESTORES_LR_FROM_FRAME(__ PopPair(kWriteBarrierObjectReg, LR));
}
__ Bind(&done);
READS_RETURN_ADDRESS_FROM_LR(__ ret(LR));
if (kWriteBarrierObjectReg == reg) {
__ Breakpoint(); // padding
}
#else
SPILLS_LR_TO_FRAME(__ Push(LR));
__ Push(kWriteBarrierObjectReg);
__ mov(kWriteBarrierObjectReg, reg);
__ Call(Address(THR, target::Thread::write_barrier_entry_point_offset()));
__ Pop(kWriteBarrierObjectReg);
RESTORES_LR_FROM_FRAME(__ Pop(LR));
READS_RETURN_ADDRESS_FROM_LR(__ ret(LR));
#endif
intptr_t end = __ CodeSize();

RELEASE_ASSERT(end - start == kStoreBufferWrapperSize);
Expand Down Expand Up @@ -2047,6 +2077,21 @@ void StubCodeCompiler::GenerateWriteBarrierStub(Assembler* assembler) {
}

void StubCodeCompiler::GenerateArrayWriteBarrierStub(Assembler* assembler) {
#ifdef DART_OPTIMIZE_FOR_SIZE
Label done, slow_case;
__ BranchIfSmi(kWriteBarrierValueReg, &done);
__ ldr(TMP, FieldAddress(kWriteBarrierObjectReg, target::Object::tags_offset(), kByte),
kUnsignedByte);
__ ldr(TMP2, FieldAddress(kWriteBarrierValueReg, target::Object::tags_offset(), kByte),
kUnsignedByte);
__ and_(TMP, TMP2,
Operand(TMP, LSR, target::UntaggedObject::kBarrierOverlapShift));
__ tst(TMP, Operand(HEAP_BITS, LSR, 32));
__ b(&slow_case, NOT_ZERO);
__ Bind(&done);
READS_RETURN_ADDRESS_FROM_LR(__ ret(LR));
__ Bind(&slow_case);
#endif
GenerateWriteBarrierStubHelper(assembler, true);
}

Expand Down
4 changes: 4 additions & 0 deletions runtime/vm/constants_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,11 @@ constexpr int kNumberOfDartAvailableCpuRegs =
kNumberOfCpuRegisters - kNumberOfReservedCpuRegisters;
// No reason to prefer certain registers on ARM.
constexpr int kRegisterAllocationBias = 0;
#ifdef DART_OPTIMIZE_FOR_SIZE
const intptr_t kStoreBufferWrapperSize = 52;
#else
const intptr_t kStoreBufferWrapperSize = 24;
#endif
// Registers available to Dart that are not preserved by runtime calls.
const RegList kDartVolatileCpuRegs =
kDartAvailableCpuRegs & ~kAbiPreservedCpuRegs;
Expand Down
4 changes: 4 additions & 0 deletions runtime/vm/constants_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,11 @@ const RegList kAbiVolatileFpuRegs =
R(V17) | R(V18) | R(V19) | R(V20) | R(V21) | R(V22) | R(V23) | R(V24) |
R(V25) | R(V26) | R(V27) | R(V28) | R(V29) | R(V30) | R(V31);

#ifdef DART_OPTIMIZE_FOR_SIZE
constexpr int kStoreBufferWrapperSize = 48;
#else
constexpr int kStoreBufferWrapperSize = 32;
#endif

class CallingConventions {
public:
Expand Down

0 comments on commit e3617f2

Please sign in to comment.