Skip to content

Commit

Permalink
8326962: C2 SuperWord: cache VPointer
Browse files Browse the repository at this point in the history
Reviewed-by: chagedorn, kvn
  • Loading branch information
eme64 committed Apr 4, 2024
1 parent 2931458 commit f762637
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 52 deletions.
24 changes: 12 additions & 12 deletions src/hotspot/share/opto/superword.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,13 +531,13 @@ void SuperWord::find_adjacent_refs() {
set_align_to_ref(align_to_mem_ref);
}

VPointer align_to_ref_p(mem_ref, _vloop);
const VPointer& align_to_ref_p = vpointer(mem_ref);
// Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem();
if (isomorphic(s, mem_ref) &&
(!_do_vector_loop || same_origin_idx(s, mem_ref))) {
VPointer p2(s, _vloop);
const VPointer& p2 = vpointer(s);
if (p2.comparable(align_to_ref_p)) {
int align = memory_alignment(s, iv_adjustment);
set_alignment(s, align);
Expand Down Expand Up @@ -593,11 +593,11 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
// Count number of comparable memory ops
for (uint i = 0; i < memops.size(); i++) {
MemNode* s1 = memops.at(i)->as_Mem();
VPointer p1(s1, _vloop);
const VPointer& p1 = vpointer(s1);
for (uint j = i+1; j < memops.size(); j++) {
MemNode* s2 = memops.at(j)->as_Mem();
if (isomorphic(s1, s2)) {
VPointer p2(s2, _vloop);
const VPointer& p2 = vpointer(s2);
if (p1.comparable(p2)) {
(*cmp_ct.adr_at(i))++;
(*cmp_ct.adr_at(j))++;
Expand All @@ -618,7 +618,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
if (s->is_Store()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
VPointer p(s, _vloop);
const VPointer& p = vpointer(s);
if ( cmp_ct.at(j) > max_ct ||
(cmp_ct.at(j) == max_ct &&
( vw > max_vw ||
Expand All @@ -641,7 +641,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
if (s->is_Load()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
VPointer p(s, _vloop);
const VPointer& p = vpointer(s);
if ( cmp_ct.at(j) > max_ct ||
(cmp_ct.at(j) == max_ct &&
( vw > max_vw ||
Expand Down Expand Up @@ -714,7 +714,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) {
//---------------------------get_iv_adjustment---------------------------
// Calculate loop's iv adjustment for this memory ops.
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
VPointer align_to_ref_p(mem_ref, _vloop);
const VPointer& align_to_ref_p = vpointer(mem_ref);
int offset = align_to_ref_p.offset_in_bytes();
int scale = align_to_ref_p.scale_in_bytes();
int elt_size = align_to_ref_p.memory_size();
Expand Down Expand Up @@ -875,8 +875,8 @@ bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) const {

// Adjacent memory references must have the same base, be comparable
// and have the correct distance between them.
VPointer p1(s1->as_Mem(), _vloop);
VPointer p2(s2->as_Mem(), _vloop);
const VPointer& p1 = vpointer(s1->as_Mem());
const VPointer& p2 = vpointer(s2->as_Mem());
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
return diff == data_size(s1);
Expand Down Expand Up @@ -1637,7 +1637,7 @@ const AlignmentSolution* SuperWord::pack_alignment_solution(const Node_List* pac
assert(pack != nullptr && (pack->at(0)->is_Load() || pack->at(0)->is_Store()), "only load/store packs");

const MemNode* mem_ref = pack->at(0)->as_Mem();
VPointer mem_ref_p(mem_ref, _vloop);
const VPointer& mem_ref_p = vpointer(mem_ref);
const CountedLoopEndNode* pre_end = _vloop.pre_loop_end();
assert(pre_end->stride_is_con(), "pre loop stride is constant");

Expand Down Expand Up @@ -3310,7 +3310,7 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
}
#endif
VPointer p(s, _vloop);
const VPointer& p = vpointer(s);
if (!p.valid()) {
NOT_PRODUCT(if(is_trace_superword_alignment()) tty->print_cr("SuperWord::memory_alignment: VPointer p invalid, return bottom_align");)
return bottom_align;
Expand Down Expand Up @@ -3413,7 +3413,7 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
Node* orig_limit = pre_opaq->original_loop_limit();
assert(orig_limit != nullptr && igvn().type(orig_limit) != Type::TOP, "");

VPointer align_to_ref_p(align_to_ref, _vloop);
const VPointer& align_to_ref_p = vpointer(align_to_ref);
assert(align_to_ref_p.valid(), "sanity");

// For the main-loop, we want the address of align_to_ref to be memory aligned
Expand Down
17 changes: 11 additions & 6 deletions src/hotspot/share/opto/superword.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ class SuperWord : public ResourceObj {
// Decide if loop can eventually be vectorized, and what unrolling factor is required.
static void unrolling_analysis(const VLoop &vloop, int &local_loop_unroll_factor);

// VLoop Accessors
// VLoop accessors
PhaseIdealLoop* phase() const { return _vloop.phase(); }
PhaseIterGVN& igvn() const { return _vloop.phase()->igvn(); }
IdealLoopTree* lpt() const { return _vloop.lpt(); }
Expand All @@ -434,7 +434,7 @@ class SuperWord : public ResourceObj {
int iv_stride() const { return cl()->stride_con(); }
bool in_bb(const Node* n) const { return _vloop.in_bb(n); }

// VLoopReductions Accessors
// VLoopReductions accessors
bool is_marked_reduction(const Node* n) const {
return _vloop_analyzer.reductions().is_marked_reduction(n);
}
Expand All @@ -443,12 +443,12 @@ class SuperWord : public ResourceObj {
return _vloop_analyzer.reductions().is_marked_reduction_pair(n1, n2);
}

// VLoopMemorySlices Accessors
// VLoopMemorySlices accessors
bool same_memory_slice(MemNode* n1, MemNode* n2) const {
return _vloop_analyzer.memory_slices().same_memory_slice(n1, n2);
}

// VLoopBody Accessors
// VLoopBody accessors
const GrowableArray<Node*>& body() const {
return _vloop_analyzer.body().body();
}
Expand All @@ -457,7 +457,7 @@ class SuperWord : public ResourceObj {
return _vloop_analyzer.body().bb_idx(n);
}

// VLoopTypes Accessors
// VLoopTypes accessors
const Type* velt_type(Node* n) const {
return _vloop_analyzer.types().velt_type(n);
}
Expand All @@ -482,7 +482,7 @@ class SuperWord : public ResourceObj {
return _vloop_analyzer.types().vector_width_in_bytes(n);
}

// VLoopDependencyGraph Accessors
// VLoopDependencyGraph accessors
const VLoopDependencyGraph& dependency_graph() const {
return _vloop_analyzer.dependency_graph();
}
Expand All @@ -495,6 +495,11 @@ class SuperWord : public ResourceObj {
return _vloop_analyzer.dependency_graph().mutually_independent(nodes);
}

// VLoopVPointer accessors
const VPointer& vpointer(const MemNode* mem) const {
return _vloop_analyzer.vpointers().vpointer(mem);
}

#ifndef PRODUCT
// TraceAutoVectorization and TraceSuperWord
bool is_trace_superword_alignment() const {
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/share/opto/traceAutoVectorizationTag.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@
#include "utilities/stringUtils.hpp"

#define COMPILER_TRACE_AUTO_VECTORIZATION_TAG(flags) \
flags(POINTER_ANALYSIS, "Trace VPointer") \
flags(POINTER_ANALYSIS, "Trace VPointer (verbose)") \
flags(PRECONDITIONS, "Trace VLoop::check_preconditions") \
flags(LOOP_ANALYZER, "Trace VLoopAnalyzer::setup_submodules") \
flags(MEMORY_SLICES, "Trace VLoopMemorySlices") \
flags(BODY, "Trace VLoopBody") \
flags(TYPES, "Trace VLoopTypes") \
flags(POINTERS, "Trace VLoopPointers") \
flags(DEPENDENCY_GRAPH, "Trace VLoopDependencyGraph") \
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
Expand Down
106 changes: 82 additions & 24 deletions src/hotspot/share/opto/vectorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,19 @@
#include "opto/rootnode.hpp"
#include "opto/vectorization.hpp"

#ifndef PRODUCT
static void print_con_or_idx(const Node* n) {
if (n == nullptr) {
tty->print("( 0)");
} else if (n->is_ConI()) {
jint val = n->as_ConI()->get_int();
tty->print("(%4d)", val);
} else {
tty->print("[%4d]", n->_idx);
}
}
#endif

bool VLoop::check_preconditions() {
#ifndef PRODUCT
if (is_trace_preconditions()) {
Expand Down Expand Up @@ -161,11 +174,62 @@ VStatus VLoopAnalyzer::setup_submodules_helper() {

_types.compute_vector_element_type();

_vpointers.compute_vpointers();

_dependency_graph.construct();

return VStatus::make_success();
}

void VLoopVPointers::compute_vpointers() {
count_vpointers();
allocate_vpointers_array();
compute_and_cache_vpointers();
NOT_PRODUCT( if (_vloop.is_trace_vpointers()) { print(); } )
}

void VLoopVPointers::count_vpointers() {
_vpointers_length = 0;
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
_vpointers_length++;
});
}

void VLoopVPointers::allocate_vpointers_array() {
uint bytes = _vpointers_length * sizeof(VPointer);
_vpointers = (VPointer*)_arena->Amalloc(bytes);
}

void VLoopVPointers::compute_and_cache_vpointers() {
int pointers_idx = 0;
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
// Placement new: construct directly into the array.
::new (&_vpointers[pointers_idx]) VPointer(mem, _vloop);
_bb_idx_to_vpointer.at_put(bb_idx, pointers_idx);
pointers_idx++;
});
}

const VPointer& VLoopVPointers::vpointer(const MemNode* mem) const {
assert(mem != nullptr && _vloop.in_bb(mem), "only mem in loop");
int bb_idx = _body.bb_idx(mem);
int pointers_idx = _bb_idx_to_vpointer.at(bb_idx);
assert(0 <= pointers_idx && pointers_idx < _vpointers_length, "valid range");
return _vpointers[pointers_idx];
}

#ifndef PRODUCT
void VLoopVPointers::print() const {
tty->print_cr("\nVLoopVPointers::print:");

_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
const VPointer& p = vpointer(mem);
tty->print(" ");
p.print();
});
}
#endif

// Construct the dependency graph:
// - Data-dependencies: implicit (taken from C2 node inputs).
// - Memory-dependencies:
Expand Down Expand Up @@ -193,15 +257,15 @@ void VLoopDependencyGraph::construct() {
MemNode* n1 = slice_nodes.at(j);
memory_pred_edges.clear();

VPointer p1(n1, _vloop);
const VPointer& p1 = _vpointers.vpointer(n1);
// For all memory nodes before it, check if we need to add a memory edge.
for (int k = slice_nodes.length() - 1; k > j; k--) {
MemNode* n2 = slice_nodes.at(k);

// Ignore Load-Load dependencies:
if (n1->is_Load() && n2->is_Load()) { continue; }

VPointer p2(n2, _vloop);
const VPointer& p2 = _vpointers.vpointer(n2);
if (!VPointer::not_equal(p1.cmp(p2))) {
// Possibly overlapping memory
memory_pred_edges.append(_body.bb_idx(n2));
Expand Down Expand Up @@ -723,19 +787,24 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
_invar = register_if_new(add);
}

// Function for printing the fields of a VPointer
void VPointer::print() {
#ifndef PRODUCT
tty->print("base: [%d] adr: [%d] scale: %d offset: %d",
_base != nullptr ? _base->_idx : 0,
_adr != nullptr ? _adr->_idx : 0,
_scale, _offset);
if (_invar != nullptr) {
tty->print(" invar: [%d]", _invar->_idx);
}
tty->cr();
#endif
// Function for printing the fields of a VPointer
void VPointer::print() const {
tty->print("VPointer[mem: %4d %10s, ", _mem->_idx, _mem->Name());
tty->print("base: %4d, ", _base != nullptr ? _base->_idx : 0);
tty->print("adr: %4d, ", _adr != nullptr ? _adr->_idx : 0);

tty->print(" base");
print_con_or_idx(_base);

tty->print(" + offset(%4d)", _offset);

tty->print(" + invar");
print_con_or_idx(_invar);

tty->print_cr(" + scale(%4d) * iv]", _scale);
}
#endif

// Following are functions for tracing VPointer match
#ifndef PRODUCT
Expand Down Expand Up @@ -1502,17 +1571,6 @@ AlignmentSolution* AlignmentSolver::solve() const {
}

#ifdef ASSERT
static void print_con_or_idx(const Node* n) {
if (n == nullptr) {
tty->print("(0)");
} else if (n->is_ConI()) {
jint val = n->as_ConI()->get_int();
tty->print("(%d)", val);
} else {
tty->print("[%d]", n->_idx);
}
}

void AlignmentSolver::trace_start_solve() const {
if (is_trace()) {
tty->print(" vector mem_ref:");
Expand Down

1 comment on commit f762637

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.