Skip to content

Commit 885338b

Browse files
committed
8323582: C2 SuperWord AlignVector: misaligned vector memory access with unaligned native memory
Reviewed-by: roland, kvn
1 parent bb48b73 commit 885338b

27 files changed

+1061
-123
lines changed

src/hotspot/share/jvmci/vmStructs_jvmci.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,7 @@
708708
declare_constant(Deoptimization::Reason_constraint) \
709709
declare_constant(Deoptimization::Reason_div0_check) \
710710
declare_constant(Deoptimization::Reason_loop_limit_check) \
711+
declare_constant(Deoptimization::Reason_auto_vectorization_check) \
711712
declare_constant(Deoptimization::Reason_type_checked_inlining) \
712713
declare_constant(Deoptimization::Reason_optimized_type_check) \
713714
declare_constant(Deoptimization::Reason_aliasing) \

src/hotspot/share/opto/c2_globals.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,12 @@
346346
develop(bool, TraceLoopUnswitching, false, \
347347
"Trace loop unswitching") \
348348
\
349+
product(bool, LoopMultiversioning, true, DIAGNOSTIC, \
350+
"Enable loop multiversioning (for speculative compilation)") \
351+
\
352+
develop(bool, TraceLoopMultiversioning, false, \
353+
"Trace loop multiversioning") \
354+
\
349355
product(bool, AllowVectorizeOnDemand, true, \
350356
"Globally suppress vectorization set in VectorizeMethod") \
351357
\

src/hotspot/share/opto/cfgnode.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ class IfNode : public MultiBranchNode {
428428
IfNode(Node* control, Node* bol, float p, float fcnt);
429429
IfNode(Node* control, Node* bol, float p, float fcnt, AssertionPredicateType assertion_predicate_type);
430430

431-
static IfNode* make_with_same_profile(IfNode* if_node_profile, Node* ctrl, BoolNode* bol);
431+
static IfNode* make_with_same_profile(IfNode* if_node_profile, Node* ctrl, Node* bol);
432432

433433
virtual int Opcode() const;
434434
virtual bool pinned() const { return true; }

src/hotspot/share/opto/classes.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ macro(OnSpinWait)
277277
macro(Opaque1)
278278
macro(OpaqueLoopInit)
279279
macro(OpaqueLoopStride)
280+
macro(OpaqueMultiversioning)
280281
macro(OpaqueZeroTripGuard)
281282
macro(OpaqueNotNull)
282283
macro(OpaqueInitializedAssertionPredicate)

src/hotspot/share/opto/graphKit.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4086,6 +4086,7 @@ void GraphKit::add_parse_predicates(int nargs) {
40864086
if (UseProfiledLoopPredicate) {
40874087
add_parse_predicate(Deoptimization::Reason_profile_predicate, nargs);
40884088
}
4089+
add_parse_predicate(Deoptimization::Reason_auto_vectorization_check, nargs);
40894090
// Loop Limit Check Predicate should be near the loop.
40904091
add_parse_predicate(Deoptimization::Reason_loop_limit_check, nargs);
40914092
}

src/hotspot/share/opto/ifnode.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
469469
return new ConINode(TypeInt::ZERO);
470470
}
471471

472-
IfNode* IfNode::make_with_same_profile(IfNode* if_node_profile, Node* ctrl, BoolNode* bol) {
472+
IfNode* IfNode::make_with_same_profile(IfNode* if_node_profile, Node* ctrl, Node* bol) {
473473
// Assert here that we only try to create a clone from an If node with the same profiling if that actually makes sense.
474474
// Some If node subtypes should not be cloned in this way. In theory, we should not clone BaseCountedLoopEndNodes.
475475
// But they can end up being used as normal If nodes when peeling a loop - they serve as zero-trip guard.
@@ -2177,6 +2177,7 @@ ParsePredicateNode::ParsePredicateNode(Node* control, Deoptimization::DeoptReaso
21772177
switch (deopt_reason) {
21782178
case Deoptimization::Reason_predicate:
21792179
case Deoptimization::Reason_profile_predicate:
2180+
case Deoptimization::Reason_auto_vectorization_check:
21802181
case Deoptimization::Reason_loop_limit_check:
21812182
break;
21822183
default:
@@ -2214,6 +2215,9 @@ void ParsePredicateNode::dump_spec(outputStream* st) const {
22142215
case Deoptimization::DeoptReason::Reason_profile_predicate:
22152216
st->print("Profiled Loop ");
22162217
break;
2218+
case Deoptimization::DeoptReason::Reason_auto_vectorization_check:
2219+
st->print("Auto_Vectorization_Check ");
2220+
break;
22172221
case Deoptimization::DeoptReason::Reason_loop_limit_check:
22182222
st->print("Loop Limit Check ");
22192223
break;

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,11 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
745745
cl->set_trip_count(cl->trip_count() - 1);
746746
if (cl->is_main_loop()) {
747747
cl->set_normal_loop();
748+
if (cl->is_multiversion()) {
749+
// Peeling also destroys the connection of the main loop
750+
// to the multiversion_if.
751+
cl->set_no_multiversion();
752+
}
748753
#ifndef PRODUCT
749754
if (PrintOpto && VerifyLoopOptimizations) {
750755
tty->print("Peeling a 'main' loop; resetting to 'normal' ");
@@ -1174,8 +1179,9 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional,
11741179
if (!bol->is_Bool()) {
11751180
assert(bol->is_OpaqueNotNull() ||
11761181
bol->is_OpaqueTemplateAssertionPredicate() ||
1177-
bol->is_OpaqueInitializedAssertionPredicate(),
1178-
"Opaque node of a non-null-check or an Assertion Predicate");
1182+
bol->is_OpaqueInitializedAssertionPredicate() ||
1183+
bol->is_OpaqueMultiversioning(),
1184+
"Opaque node of a non-null-check or an Assertion Predicate or Multiversioning");
11791185
continue;
11801186
}
11811187
if (bol->as_Bool()->_test._test == BoolTest::ne) {
@@ -3354,6 +3360,23 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
33543360
// Do nothing special to pre- and post- loops
33553361
if (cl->is_pre_loop() || cl->is_post_loop()) return true;
33563362

3363+
// With multiversioning, we create a fast_loop and a slow_loop, and a multiversion_if that
3364+
// decides which loop is taken at runtime. At first, the multiversion_if always takes the
3365+
// fast_loop, and we only optimize the fast_loop. Since we are not sure if we will ever use
3366+
// the slow_loop, we delay optimizations for it, so we do not waste compile time and code
3367+
// size. If we never change the condition of the multiversion_if, the slow_loop is eventually
3368+
// folded away after loop-opts. While optimizing the fast_loop, we may want to perform some
3369+
// speculative optimization, for which we need a runtime-check. We add this runtime-check
3370+
// condition to the multiversion_if. Now, it becomes possible to execute the slow_loop at
3371+
// runtime, and we resume optimizations for slow_loop ("un-delay" it).
3372+
// TLDR: If the slow_loop is still in "delay" mode, check if the multiversion_if was changed
3373+
// and we should now resume optimizations for it.
3374+
if (cl->is_multiversion_delayed_slow_loop() &&
3375+
!phase->try_resume_optimizations_for_delayed_slow_loop(this)) {
3376+
// We are still delayed, so wait with further loop-opts.
3377+
return true;
3378+
}
3379+
33573380
// Compute loop trip count from profile data
33583381
compute_profile_trip_cnt(phase);
33593382

@@ -3413,6 +3436,12 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
34133436
if (!phase->may_require_nodes(estimate)) {
34143437
return false;
34153438
}
3439+
3440+
// We are going to add pre-loop and post-loop.
3441+
// But should we also multi-version for auto-vectorization speculative
3442+
// checks, i.e. fast and slow-paths?
3443+
phase->maybe_multiversion_for_auto_vectorization_runtime_checks(this, old_new);
3444+
34163445
phase->insert_pre_post_loops(this, old_new, peel_only);
34173446
}
34183447
// Adjust the pre- and main-loop limits to let the pre and post loops run

0 commit comments

Comments
 (0)