Skip to content

Commit

Permalink
8270947: AArch64: C1: use zero_words to initialize all objects
Browse files Browse the repository at this point in the history
Reviewed-by: ngasson, adinn
  • Loading branch information
Andrew Haley committed Jul 30, 2021
1 parent cd7e30e commit 6c68ce2
Show file tree
Hide file tree
Showing 8 changed files with 28,831 additions and 165 deletions.
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -14983,12 +14983,12 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_pipe(pipe_class_memory);
%}

instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
%{
predicate((uint64_t)n->in(2)->get_long()
< (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
match(Set dummy (ClearArray cnt base));
effect(USE_KILL base);
effect(TEMP temp, USE_KILL base, KILL cr);

ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
Expand Down
12 changes: 6 additions & 6 deletions src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
Expand Up @@ -1127,8 +1127,8 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
CodeEmitInfo* info = state_for(x, x->state());
LIR_Opr reg = result_register_for(x->type());
new_instance(reg, x->klass(), x->is_unresolved(),
FrameMap::r2_oop_opr,
FrameMap::r5_oop_opr,
FrameMap::r10_oop_opr,
FrameMap::r11_oop_opr,
FrameMap::r4_oop_opr,
LIR_OprFact::illegalOpr,
FrameMap::r3_metadata_opr, info);
Expand All @@ -1143,8 +1143,8 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
length.load_item_force(FrameMap::r19_opr);

LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
Expand Down Expand Up @@ -1172,8 +1172,8 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
CodeEmitInfo* info = state_for(x, x->state());

LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
Expand Down
64 changes: 18 additions & 46 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -180,20 +180,24 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
}

// preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
//
// Scratch registers: t1 = r10, t2 = r11
//
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2) {
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
assert(t1 == r10 && t2 == r11, "must be");

Label done;

// len_in_bytes is positive and ptr sized
subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
br(Assembler::EQ, done);

// Preserve obj
if (hdr_size_in_bytes)
add(obj, obj, hdr_size_in_bytes);
zero_memory(obj, len_in_bytes, t1);
if (hdr_size_in_bytes)
sub(obj, obj, hdr_size_in_bytes);
// zero_words() takes ptr in r10 and count in words in r11
mov(rscratch1, len_in_bytes);
lea(t1, Address(obj, hdr_size_in_bytes));
lsr(t2, rscratch1, LogBytesPerWord);
zero_words(t1, t2);

bind(done);
}
Expand All @@ -208,6 +212,7 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
}

// Scratch registers: t1 = r10, t2 = r11
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment");
Expand All @@ -218,45 +223,13 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
// clear rest of allocated space
const Register index = t2;
const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
if (var_size_in_bytes != noreg) {
mov(index, var_size_in_bytes);
initialize_body(obj, index, hdr_size_in_bytes, t1);
} else if (con_size_in_bytes <= threshold) {
// use explicit null stores
int i = hdr_size_in_bytes;
if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
str(zr, Address(obj, i));
i += BytesPerWord;
}
for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
stp(zr, zr, Address(obj, i));
initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
} else if (con_size_in_bytes > hdr_size_in_bytes) {
block_comment("zero memory");
// use loop to null out the fields

int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
mov(index, words / 8);

const int unroll = 8; // Number of str(zr) instructions we'll unroll
int remainder = words % unroll;
lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));

Label entry_point, loop;
b(entry_point);

bind(loop);
sub(index, index, 1);
for (int i = -unroll; i < 0; i++) {
if (-i == remainder)
bind(entry_point);
str(zr, Address(rscratch1, i * wordSize));
}
if (remainder == 0)
bind(entry_point);
add(rscratch1, rscratch1, unroll * wordSize);
cbnz(index, loop);

con_size_in_bytes -= hdr_size_in_bytes;
lea(t1, Address(obj, hdr_size_in_bytes));
zero_words(t1, con_size_in_bytes / BytesPerWord);
}
}

Expand Down Expand Up @@ -291,8 +264,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
initialize_header(obj, klass, len, t1, t2);

// clear rest of allocated space
const Register len_zero = len;
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);

membar(StoreStore);

Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -48,7 +48,7 @@ using MacroAssembler::null_check;
);

void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);

void float_cmp(bool is_float, int unordered_result,
FloatRegister f0, FloatRegister f1,
Expand Down
16 changes: 8 additions & 8 deletions src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -656,9 +656,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
Label slow_path;
Register obj_size = r2;
Register t1 = r19;
Register t2 = r4;
Register obj_size = r19;
Register t1 = r10;
Register t2 = r11;
assert_different_registers(klass, obj, obj_size, t1, t2);

__ stp(r19, zr, Address(__ pre(sp, -2 * wordSize)));
Expand Down Expand Up @@ -769,9 +769,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// allocations.
// Otherwise, just go to the slow path.
if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
Register arr_size = r4;
Register t1 = r2;
Register t2 = r5;
Register arr_size = r5;
Register t1 = r10;
Register t2 = r11;
Label slow_path;
assert_different_registers(length, klass, obj, arr_size, t1, t2);

Expand Down Expand Up @@ -801,7 +801,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ andr(t1, t1, Klass::_lh_header_size_mask);
__ sub(arr_size, arr_size, t1); // body length
__ add(t1, t1, obj); // body start
__ initialize_body(t1, arr_size, 0, t2);
__ initialize_body(t1, arr_size, 0, t1, t2);
__ membar(Assembler::StoreStore);
__ verify_oop(obj);

Expand Down

3 comments on commit 6c68ce2

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GoeLin
Copy link
Member

@GoeLin GoeLin commented on 6c68ce2 Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk17u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 6c68ce2 Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GoeLin the backport was successfully created on the branch GoeLin-backport-6c68ce2d in my personal fork of openjdk/jdk17u-dev. To create a pull request with this backport targeting openjdk/jdk17u-dev:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit 6c68ce2d from the openjdk/jdk repository.

The commit being backported was authored by Andrew Haley on 30 Jul 2021 and was reviewed by Nick Gasson and Andrew Dinn.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk17u-dev:

$ git fetch https://github.com/openjdk-bots/jdk17u-dev GoeLin-backport-6c68ce2d:GoeLin-backport-6c68ce2d
$ git checkout GoeLin-backport-6c68ce2d
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk17u-dev GoeLin-backport-6c68ce2d

Please sign in to comment.