Skip to content

Commit a38582e

Browse files
author
Pengfei Li
committed
8311691: C2: Remove legacy code related to PostLoopMultiversioning
Reviewed-by: kvn, sviswanathan
1 parent 38f7412 commit a38582e

File tree

9 files changed

+44
-601
lines changed

9 files changed

+44
-601
lines changed

src/hotspot/share/compiler/compilerDefinitions.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -512,15 +512,6 @@ bool CompilerConfig::check_args_consistency(bool status) {
512512
FLAG_SET_CMDLINE(BackgroundCompilation, false);
513513
}
514514

515-
#ifdef COMPILER2
516-
if (PostLoopMultiversioning && !RangeCheckElimination) {
517-
if (!FLAG_IS_DEFAULT(PostLoopMultiversioning)) {
518-
warning("PostLoopMultiversioning disabled because RangeCheckElimination is disabled.");
519-
}
520-
FLAG_SET_CMDLINE(PostLoopMultiversioning, false);
521-
}
522-
#endif // COMPILER2
523-
524515
if (CompilerConfig::is_interpreter_only()) {
525516
if (UseCompiler) {
526517
if (!FLAG_IS_DEFAULT(UseCompiler)) {

src/hotspot/share/opto/c2_globals.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,6 @@
182182
"Map number of unrolls for main loop via " \
183183
"Superword Level Parallelism analysis") \
184184
\
185-
product(bool, PostLoopMultiversioning, false, EXPERIMENTAL, \
186-
"Multi versioned post loops to eliminate range checks") \
187-
\
188185
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
189186
"Trace what Superword Level Parallelism analysis applies") \
190187
\

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 0 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,55 +1888,6 @@ void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old
18881888
loop->record_for_igvn();
18891889
}
18901890

1891-
1892-
//-------------------------insert_scalar_rced_post_loop------------------------
1893-
// Insert a copy of the rce'd main loop as a post loop,
1894-
// We have not unrolled the main loop, so this is the right time to inject this.
1895-
// Later we will examine the partner of this post loop pair which still has range checks
1896-
// to see inject code which tests at runtime if the range checks are applicable.
1897-
void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1898-
if (!loop->_head->is_CountedLoop()) return;
1899-
1900-
CountedLoopNode *cl = loop->_head->as_CountedLoop();
1901-
1902-
// only process RCE'd main loops
1903-
if (!cl->is_main_loop() || loop->range_checks_present()) return;
1904-
1905-
#ifndef PRODUCT
1906-
if (TraceLoopOpts) {
1907-
tty->print("PostScalarRce ");
1908-
loop->dump_head();
1909-
}
1910-
#endif
1911-
C->set_major_progress();
1912-
1913-
// Find common pieces of the loop being guarded with pre & post loops
1914-
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1915-
CountedLoopEndNode *main_end = main_head->loopexit();
1916-
// diagnostic to show loop end is not properly formed
1917-
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1918-
1919-
Node *incr = main_end->incr();
1920-
Node *limit = main_end->limit();
1921-
1922-
// In this case we throw away the result as we are not using it to connect anything else.
1923-
CountedLoopNode *post_head = nullptr;
1924-
insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1925-
copy_assertion_predicates_to_post_loop(main_head->skip_strip_mined(), post_head, incr, main_head->stride());
1926-
1927-
// It's difficult to be precise about the trip-counts
1928-
// for post loops. They are usually very short,
1929-
// so guess that unit vector trips is a reasonable value.
1930-
post_head->set_profile_trip_cnt(4.0);
1931-
post_head->set_is_rce_post_loop();
1932-
1933-
// Now force out all loop-invariant dominating tests. The optimizer
1934-
// finds some, but we _know_ they are all useless.
1935-
peeled_dom_test_elim(loop, old_new);
1936-
loop->record_for_igvn();
1937-
}
1938-
1939-
19401891
//------------------------------insert_post_loop-------------------------------
19411892
// Insert post loops. Add a post loop to the given loop passed.
19421893
Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree* loop, Node_List& old_new,
@@ -3198,143 +3149,6 @@ bool IdealLoopTree::compute_has_range_checks() const {
31983149
return false;
31993150
}
32003151

3201-
//-------------------------multi_version_post_loops----------------------------
3202-
// Check the range checks that remain, if simple, use the bounds to guard
3203-
// which version to a post loop we execute, one with range checks or one without
3204-
bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
3205-
bool multi_version_succeeded = false;
3206-
assert(RangeCheckElimination, "");
3207-
CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
3208-
assert(legacy_cl->is_post_loop(), "");
3209-
3210-
// Check for existence of range checks using the unique instance to make a guard with
3211-
Unique_Node_List worklist;
3212-
for (uint i = 0; i < legacy_loop->_body.size(); i++) {
3213-
Node *iff = legacy_loop->_body[i];
3214-
int iff_opc = iff->Opcode();
3215-
if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
3216-
worklist.push(iff);
3217-
}
3218-
}
3219-
3220-
// Find RCE'd post loop so that we can stage its guard.
3221-
if (legacy_cl->is_canonical_loop_entry() == nullptr) {
3222-
return multi_version_succeeded;
3223-
}
3224-
Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
3225-
Node* iffm = ctrl->in(0);
3226-
3227-
// Now we test that both the post loops are connected
3228-
Node* post_loop_region = iffm->in(0);
3229-
if (post_loop_region == nullptr) return multi_version_succeeded;
3230-
if (!post_loop_region->is_Region()) return multi_version_succeeded;
3231-
Node* covering_region = post_loop_region->in(RegionNode::Control+1);
3232-
if (covering_region == nullptr) return multi_version_succeeded;
3233-
if (!covering_region->is_Region()) return multi_version_succeeded;
3234-
Node* p_f = covering_region->in(RegionNode::Control);
3235-
if (p_f == nullptr) return multi_version_succeeded;
3236-
if (!p_f->is_IfFalse()) return multi_version_succeeded;
3237-
if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
3238-
CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
3239-
if (rce_loop_end == nullptr) return multi_version_succeeded;
3240-
CountedLoopNode* rce_cl = rce_loop_end->loopnode();
3241-
if (rce_cl == nullptr || !rce_cl->is_post_loop()) return multi_version_succeeded;
3242-
CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
3243-
if (rce_cl != known_rce_cl) return multi_version_succeeded;
3244-
3245-
// Then we fetch the cover entry test
3246-
ctrl = rce_cl->in(LoopNode::EntryControl);
3247-
if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
3248-
3249-
#ifndef PRODUCT
3250-
if (TraceLoopOpts) {
3251-
tty->print("PostMultiVersion\n");
3252-
rce_loop->dump_head();
3253-
legacy_loop->dump_head();
3254-
}
3255-
#endif
3256-
3257-
// Now fetch the limit we want to compare against
3258-
Node *limit = rce_cl->limit();
3259-
bool first_time = true;
3260-
3261-
// If we got this far, we identified the post loop which has been RCE'd and
3262-
// we have a work list. Now we will try to transform the if guard to cause
3263-
// the loop pair to be multi version executed with the determination left to runtime
3264-
// or the optimizer if full information is known about the given arrays at compile time.
3265-
Node *last_min = nullptr;
3266-
multi_version_succeeded = true;
3267-
while (worklist.size()) {
3268-
Node* rc_iffm = worklist.pop();
3269-
if (rc_iffm->is_If()) {
3270-
Node *rc_bolzm = rc_iffm->in(1);
3271-
if (rc_bolzm->is_Bool()) {
3272-
Node *rc_cmpzm = rc_bolzm->in(1);
3273-
if (rc_cmpzm->is_Cmp()) {
3274-
Node *rc_left = rc_cmpzm->in(2);
3275-
if (rc_left->Opcode() != Op_LoadRange) {
3276-
multi_version_succeeded = false;
3277-
break;
3278-
}
3279-
if (first_time) {
3280-
last_min = rc_left;
3281-
first_time = false;
3282-
} else {
3283-
Node *cur_min = new MinINode(last_min, rc_left);
3284-
last_min = cur_min;
3285-
_igvn.register_new_node_with_optimizer(last_min);
3286-
}
3287-
}
3288-
}
3289-
}
3290-
}
3291-
3292-
// All we have to do is update the limit of the rce loop
3293-
// with the min of our expression and the current limit.
3294-
// We will use this expression to replace the current limit.
3295-
if (last_min && multi_version_succeeded) {
3296-
Node *cur_min = new MinINode(last_min, limit);
3297-
_igvn.register_new_node_with_optimizer(cur_min);
3298-
Node *cmp_node = rce_loop_end->cmp_node();
3299-
_igvn.replace_input_of(cmp_node, 2, cur_min);
3300-
set_ctrl(cur_min, ctrl);
3301-
set_loop(cur_min, rce_loop->_parent);
3302-
3303-
legacy_cl->mark_is_multiversioned();
3304-
rce_cl->mark_is_multiversioned();
3305-
multi_version_succeeded = true;
3306-
3307-
C->set_major_progress();
3308-
}
3309-
3310-
return multi_version_succeeded;
3311-
}
3312-
3313-
//-------------------------poison_rce_post_loop--------------------------------
3314-
// Causes the rce'd post loop to be optimized away if multiversioning fails
3315-
void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
3316-
CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
3317-
Node* ctrl = rce_cl->in(LoopNode::EntryControl);
3318-
if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
3319-
Node* iffm = ctrl->in(0);
3320-
if (iffm->is_If()) {
3321-
Node* cur_bool = iffm->in(1);
3322-
if (cur_bool->is_Bool()) {
3323-
Node* cur_cmp = cur_bool->in(1);
3324-
if (cur_cmp->is_Cmp()) {
3325-
BoolTest::mask new_test = BoolTest::gt;
3326-
BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
3327-
_igvn.replace_node(cur_bool, new_bool);
3328-
_igvn._worklist.push(new_bool);
3329-
Node* left_op = cur_cmp->in(1);
3330-
_igvn.replace_input_of(cur_cmp, 2, left_op);
3331-
C->set_major_progress();
3332-
}
3333-
}
3334-
}
3335-
}
3336-
}
3337-
33383152
//------------------------------DCE_loop_body----------------------------------
33393153
// Remove simplistic dead code from loop body
33403154
void IdealLoopTree::DCE_loop_body() {
@@ -3864,14 +3678,6 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
38643678
phase->do_range_check(this, old_new);
38653679
}
38663680

3867-
if (should_unroll && !should_peel && PostLoopMultiversioning &&
3868-
Matcher::has_predicated_vectors()) {
3869-
// Try to setup multiversioning on main loops before they are unrolled
3870-
if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3871-
phase->insert_scalar_rced_post_loop(this, old_new);
3872-
}
3873-
}
3874-
38753681
// Double loop body for unrolling. Adjust the minimum-trip test (will do
38763682
// twice as many iterations as before) and the main body limit (only do
38773683
// an even number of trips). If we are peeling, we might enable some RCE

src/hotspot/share/opto/loopnode.cpp

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4013,7 +4013,6 @@ void IdealLoopTree::dump_head() {
40134013
if (cl->is_post_loop()) tty->print(" post");
40144014
if (cl->is_vectorized_loop()) tty->print(" vector");
40154015
if (range_checks_present()) tty->print(" rc ");
4016-
if (cl->is_multiversioned()) tty->print(" multi ");
40174016
}
40184017
if (_has_call) tty->print(" has_call");
40194018
if (_has_sfpt) tty->print(" has_sfpt");
@@ -4653,29 +4652,7 @@ void PhaseIdealLoop::build_and_optimize() {
46534652
IdealLoopTree* lpt = iter.current();
46544653
if (lpt->is_counted()) {
46554654
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
4656-
4657-
if (cl->is_rce_post_loop() && !cl->is_vectorized_loop()) {
4658-
assert(PostLoopMultiversioning, "multiversioning must be enabled");
4659-
// Check that the rce'd post loop is encountered first, multiversion after all
4660-
// major main loop optimization are concluded
4661-
if (!C->major_progress()) {
4662-
IdealLoopTree *lpt_next = lpt->_next;
4663-
if (lpt_next && lpt_next->is_counted()) {
4664-
CountedLoopNode *cl = lpt_next->_head->as_CountedLoop();
4665-
if (cl->is_post_loop() && lpt_next->range_checks_present()) {
4666-
if (!cl->is_multiversioned()) {
4667-
if (multi_version_post_loops(lpt, lpt_next) == false) {
4668-
// Cause the rce loop to be optimized away if we fail
4669-
cl->mark_is_multiversioned();
4670-
cl->set_slp_max_unroll(0);
4671-
poison_rce_post_loop(lpt);
4672-
}
4673-
}
4674-
}
4675-
}
4676-
sw.transform_loop(lpt, true);
4677-
}
4678-
} else if (cl->is_main_loop()) {
4655+
if (cl->is_main_loop()) {
46794656
if (!sw.transform_loop(lpt, true)) {
46804657
// Instigate more unrolling for optimization when vectorization fails.
46814658
if (cl->has_passed_slp()) {

src/hotspot/share/opto/loopnode.hpp

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,13 @@ class LoopNode : public RegionNode {
7272
DoUnrollOnly = 1<<9,
7373
VectorizedLoop = 1<<10,
7474
HasAtomicPostLoop = 1<<11,
75-
IsMultiversioned = 1<<12,
76-
StripMined = 1<<13,
77-
SubwordLoop = 1<<14,
78-
ProfileTripFailed = 1<<15,
79-
LoopNestInnerLoop = 1<<16,
80-
LoopNestLongOuterLoop = 1<<17};
75+
StripMined = 1<<12,
76+
SubwordLoop = 1<<13,
77+
ProfileTripFailed = 1<<14,
78+
LoopNestInnerLoop = 1<<15,
79+
LoopNestLongOuterLoop = 1<<16 };
8180
char _unswitch_count;
8281
enum { _unswitch_max=3 };
83-
char _postloop_flags;
84-
enum { RCEPostLoop = 1 };
8582

8683
// Expected trip count from profile data
8784
float _profile_trip_cnt;
@@ -93,7 +90,6 @@ class LoopNode : public RegionNode {
9390
bool is_inner_loop() const { return _loop_flags & InnerLoop; }
9491
void set_inner_loop() { _loop_flags |= InnerLoop; }
9592

96-
bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
9793
bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
9894
bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
9995
void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
@@ -110,7 +106,6 @@ class LoopNode : public RegionNode {
110106
void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
111107
void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
112108
void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
113-
void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
114109
void mark_strip_mined() { _loop_flags |= StripMined; }
115110
void clear_strip_mined() { _loop_flags &= ~StripMined; }
116111
void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
@@ -121,9 +116,6 @@ class LoopNode : public RegionNode {
121116
int unswitch_max() { return _unswitch_max; }
122117
int unswitch_count() { return _unswitch_count; }
123118

124-
int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
125-
void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
126-
127119
void set_unswitch_count(int val) {
128120
assert (val <= unswitch_max(), "too many unswitches");
129121
_unswitch_count = val;
@@ -134,7 +126,7 @@ class LoopNode : public RegionNode {
134126

135127
LoopNode(Node *entry, Node *backedge)
136128
: RegionNode(3), _loop_flags(0), _unswitch_count(0),
137-
_postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
129+
_profile_trip_cnt(COUNT_UNKNOWN) {
138130
init_class_id(Class_Loop);
139131
init_req(EntryControl, entry);
140132
init_req(LoopBackControl, backedge);
@@ -322,8 +314,6 @@ class CountedLoopNode : public BaseCountedLoopNode {
322314
int node_count_before_unroll() { return _node_count_before_unroll; }
323315
void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
324316
int slp_max_unroll() const { return _slp_maximum_unroll_factor; }
325-
void set_slp_pack_count(int pack_count) { _slp_vector_pack_count = pack_count; }
326-
int slp_pack_count() const { return _slp_vector_pack_count; }
327317

328318
virtual LoopNode* skip_strip_mined(int expect_skeleton = 1);
329319
OuterStripMinedLoopNode* outer_loop() const;
@@ -1305,9 +1295,6 @@ class PhaseIdealLoop : public PhaseTransform {
13051295
CountedLoopNode* main_head, CountedLoopEndNode* main_end,
13061296
Node*& incr, Node* limit, CountedLoopNode*& post_head);
13071297

1308-
// Add an RCE'd post loop which we will multi-version adapt for run time test path usage
1309-
void insert_scalar_rced_post_loop( IdealLoopTree *loop, Node_List &old_new );
1310-
13111298
// Add a vector post loop between a vector main loop and the current post loop
13121299
void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
13131300
// If Node n lives in the back_ctrl block, we clone a private version of n
@@ -1402,13 +1389,6 @@ class PhaseIdealLoop : public PhaseTransform {
14021389
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
14031390
void do_range_check(IdealLoopTree *loop, Node_List &old_new);
14041391

1405-
// Process post loops which have range checks and try to build a multi-version
1406-
// guard to safely determine if we can execute the post loop which was RCE'd.
1407-
bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1408-
1409-
// Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1410-
void poison_rce_post_loop(IdealLoopTree *rce_loop);
1411-
14121392
// Create a slow version of the loop by cloning the loop
14131393
// and inserting an if to select fast-slow versions.
14141394
// Return the inserted if.

0 commit comments

Comments
 (0)