@@ -69,7 +69,8 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
69
69
_nlist(arena(), 8, 0, NULL), // scratch list of nodes
70
70
_stk(arena(), 8, 0, NULL), // scratch stack of nodes
71
71
_lpt(NULL ), // loop tree node
72
- _lp(NULL ), // LoopNode
72
+ _lp(NULL ), // CountedLoopNode
73
+ _pre_loop_end(NULL ), // Pre loop CountedLoopEndNode
73
74
_bb(NULL ), // basic block
74
75
_iv(NULL ), // induction var
75
76
_race_possible(false ), // cases where SDMU is true
@@ -155,10 +156,15 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
155
156
156
157
if (cl->is_main_loop ()) {
157
158
// Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
158
- CountedLoopEndNode* pre_end = get_pre_loop_end (cl);
159
- if (pre_end == NULL ) return ;
160
- Node *pre_opaq1 = pre_end->limit ();
161
- if (pre_opaq1->Opcode () != Op_Opaque1) return ;
159
+ CountedLoopEndNode* pre_end = find_pre_loop_end (cl);
160
+ if (pre_end == NULL ) {
161
+ return ;
162
+ }
163
+ Node* pre_opaq1 = pre_end->limit ();
164
+ if (pre_opaq1->Opcode () != Op_Opaque1) {
165
+ return ;
166
+ }
167
+ set_pre_loop_end (pre_end);
162
168
}
163
169
164
170
init (); // initialize data structures
@@ -911,8 +917,7 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
911
917
if (!p.has_iv ()) {
912
918
return true ; // no induction variable
913
919
}
914
- CountedLoopEndNode* pre_end = get_pre_loop_end (lp ()->as_CountedLoop ());
915
- assert (pre_end != NULL , " we must have a correct pre-loop" );
920
+ CountedLoopEndNode* pre_end = pre_loop_end ();
916
921
assert (pre_end->stride_is_con (), " pre loop stride is constant" );
917
922
int preloop_stride = pre_end->stride_con ();
918
923
@@ -3431,21 +3436,19 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
3431
3436
// to align_to_ref will be a position zero in the vector.
3432
3437
// (iv + k) mod vector_align == 0
3433
3438
void SuperWord::align_initial_loop_index (MemNode* align_to_ref) {
3434
- CountedLoopNode *main_head = lp ()->as_CountedLoop ();
3435
- assert (main_head->is_main_loop (), " " );
3436
- CountedLoopEndNode* pre_end = get_pre_loop_end (main_head);
3437
- assert (pre_end != NULL , " we must have a correct pre-loop" );
3438
- Node *pre_opaq1 = pre_end->limit ();
3439
+ assert (lp ()->is_main_loop (), " " );
3440
+ CountedLoopEndNode* pre_end = pre_loop_end ();
3441
+ Node* pre_opaq1 = pre_end->limit ();
3439
3442
assert (pre_opaq1->Opcode () == Op_Opaque1, " " );
3440
- Opaque1Node * pre_opaq = (Opaque1Node*)pre_opaq1;
3441
- Node * lim0 = pre_opaq->in (1 );
3443
+ Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1;
3444
+ Node* lim0 = pre_opaq->in (1 );
3442
3445
3443
3446
// Where we put new limit calculations
3444
- Node * pre_ctrl = pre_end-> loopnode ()->in (LoopNode::EntryControl);
3447
+ Node* pre_ctrl = pre_loop_head ()->in (LoopNode::EntryControl);
3445
3448
3446
3449
// Ensure the original loop limit is available from the
3447
3450
// pre-loop Opaque1 node.
3448
- Node * orig_limit = pre_opaq->original_loop_limit ();
3451
+ Node* orig_limit = pre_opaq->original_loop_limit ();
3449
3452
assert (orig_limit != NULL && _igvn.type (orig_limit) != Type::TOP, " " );
3450
3453
3451
3454
SWPointer align_to_ref_p (align_to_ref, this , NULL , false );
@@ -3596,7 +3599,7 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
3596
3599
3597
3600
// ----------------------------get_pre_loop_end---------------------------
3598
3601
// Find pre loop end from main loop. Returns null if none.
3599
- CountedLoopEndNode* SuperWord::get_pre_loop_end (CountedLoopNode* cl) {
3602
+ CountedLoopEndNode* SuperWord::find_pre_loop_end (CountedLoopNode* cl) const {
3600
3603
// The loop cannot be optimized if the graph shape at
3601
3604
// the loop entry is inappropriate.
3602
3605
if (!PhaseIdealLoop::is_canonical_loop_entry (cl)) {
@@ -3724,7 +3727,7 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
3724
3727
// Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
3725
3728
Node* base = adr->in (AddPNode::Base);
3726
3729
// The base address should be loop invariant
3727
- if (! invariant (base)) {
3730
+ if (is_main_loop_member (base)) {
3728
3731
assert (!valid (), " base address is loop variant" );
3729
3732
return ;
3730
3733
}
@@ -3753,7 +3756,7 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
3753
3756
break ; // stop looking at addp's
3754
3757
}
3755
3758
}
3756
- if (! invariant (adr)) {
3759
+ if (is_main_loop_member (adr)) {
3757
3760
assert (!valid (), " adr is loop variant" );
3758
3761
return ;
3759
3762
}
@@ -3783,12 +3786,23 @@ SWPointer::SWPointer(SWPointer* p) :
3783
3786
#endif
3784
3787
{}
3785
3788
3789
+ bool SWPointer::is_main_loop_member (Node* n) const {
3790
+ Node* n_c = phase ()->get_ctrl (n);
3791
+ return lpt ()->is_member (phase ()->get_loop (n_c));
3792
+ }
3786
3793
3787
- bool SWPointer::invariant (Node* n) {
3794
+ bool SWPointer::invariant (Node* n) const {
3788
3795
NOT_PRODUCT (Tracer::Depth dd;)
3789
- Node * n_c = phase ()->get_ctrl (n);
3796
+ Node* n_c = phase ()->get_ctrl (n);
3790
3797
NOT_PRODUCT (_tracer.invariant_1 (n, n_c);)
3791
- return !lpt ()->is_member (phase ()->get_loop (n_c));
3798
+ bool is_not_member = !is_main_loop_member (n);
3799
+ if (is_not_member && _slp->lp ()->is_main_loop ()) {
3800
+ // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop
3801
+ // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal
3802
+ // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop).
3803
+ return phase ()->is_dominator (n_c, _slp->pre_loop_head ());
3804
+ }
3805
+ return is_not_member;
3792
3806
}
3793
3807
3794
3808
// ------------------------scaled_iv_plus_offset--------------------
@@ -3851,7 +3865,7 @@ bool SWPointer::scaled_iv(Node* n) {
3851
3865
NOT_PRODUCT (_tracer.scaled_iv_3 (n, _scale);)
3852
3866
return true ;
3853
3867
}
3854
- if (_analyze_only && (invariant (n) == false )) {
3868
+ if (_analyze_only && (is_main_loop_member (n))) {
3855
3869
_nstack->push (n, _stack_idx++);
3856
3870
}
3857
3871
@@ -3937,7 +3951,7 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
3937
3951
return false ;
3938
3952
}
3939
3953
3940
- if (_analyze_only && ( invariant (n) == false )) {
3954
+ if (_analyze_only && is_main_loop_member (n )) {
3941
3955
_nstack->push (n, _stack_idx++);
3942
3956
}
3943
3957
if (opc == Op_AddI) {
@@ -3970,20 +3984,27 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
3970
3984
return true ;
3971
3985
}
3972
3986
}
3973
- if (invariant (n)) {
3987
+
3988
+ if (!is_main_loop_member (n)) {
3989
+ // 'n' is loop invariant. Skip range check dependent CastII nodes before checking if 'n' is dominating the pre loop.
3974
3990
if (opc == Op_ConvI2L) {
3975
3991
n = n->in (1 );
3976
3992
if (n->Opcode () == Op_CastII &&
3977
3993
n->as_CastII ()->has_range_check ()) {
3978
3994
// Skip range check dependent CastII nodes
3979
- assert (invariant (n), " sanity" );
3995
+ assert (! is_main_loop_member (n), " sanity" );
3980
3996
n = n->in (1 );
3981
3997
}
3998
+
3999
+ // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop).
4000
+ if (invariant (n)) {
4001
+ _negate_invar = negate;
4002
+ _invar = n;
4003
+ NOT_PRODUCT (_tracer.offset_plus_k_10 (n, _invar, _negate_invar, _offset);)
4004
+ return true ;
4005
+ }
3982
4006
}
3983
- _negate_invar = negate;
3984
- _invar = n;
3985
- NOT_PRODUCT (_tracer.offset_plus_k_10 (n, _invar, _negate_invar, _offset);)
3986
- return true ;
4007
+ return false ;
3987
4008
}
3988
4009
3989
4010
NOT_PRODUCT (_tracer.offset_plus_k_11 (n);)
@@ -4004,8 +4025,10 @@ void SWPointer::print() {
4004
4025
4005
4026
// ----------------------------tracing------------------------
4006
4027
#ifndef PRODUCT
4007
- void SWPointer::Tracer::print_depth () {
4008
- for (int ii = 0 ; ii<_depth; ++ii) tty->print (" " );
4028
+ void SWPointer::Tracer::print_depth () const {
4029
+ for (int ii = 0 ; ii < _depth; ++ii) {
4030
+ tty->print (" " );
4031
+ }
4009
4032
}
4010
4033
4011
4034
void SWPointer::Tracer::ctor_1 (Node* mem) {
@@ -4057,7 +4080,7 @@ void SWPointer::Tracer::ctor_6(Node* mem) {
4057
4080
}
4058
4081
}
4059
4082
4060
- void SWPointer::Tracer::invariant_1 (Node *n, Node *n_c) {
4083
+ void SWPointer::Tracer::invariant_1 (Node *n, Node *n_c) const {
4061
4084
if (_slp->do_vector_loop () && _slp->is_debug () && _slp->_lpt ->is_member (_slp->_phase ->get_loop (n_c)) != (int )_slp->in_bb (n)) {
4062
4085
int is_member = _slp->_lpt ->is_member (_slp->_phase ->get_loop (n_c));
4063
4086
int in_bb = _slp->in_bb (n);
0 commit comments