@@ -2987,52 +2987,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2987
2987
return reg;
2988
2988
}
2989
2989
2990
- // ------------------------------ insert_cmpi_loop_exit -------------------------------------
2991
- // Clone a signed compare loop exit from an unsigned compare and
2992
- // insert it before the unsigned cmp on the stay-in-loop path.
2993
- // All new nodes inserted in the dominator tree between the original
2994
- // if and it's projections. The original if test is replaced with
2995
- // a constant to force the stay-in-loop path.
2990
+ // Idea
2991
+ // ----
2992
+ // Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
2993
+ // require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
2994
+ // with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
2995
+ // loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
2996
+ // test alone instead:
2996
2997
//
2997
- // This is done to make sure that the original if and it's projections
2998
- // still dominate the same set of control nodes, that the ctrl() relation
2999
- // from data nodes to them is preserved, and that their loop nesting is
3000
- // preserved.
2998
+ // Before Partial Peeling:
2999
+ // Loop:
3000
+ // <peeled section>
3001
+ // Split off signed loop exit test
3002
+ // <-- CUT HERE -->
3003
+ // Unchanged unsigned loop exit test
3004
+ // <rest of unpeeled section>
3005
+ // goto Loop
3001
3006
//
3002
- // before
3003
- // if(i <u limit) unsigned compare loop exit
3007
+ // After Partial Peeling:
3008
+ // <cloned peeled section>
3009
+ // Cloned split off signed loop exit test
3010
+ // Loop:
3011
+ // Unchanged unsigned loop exit test
3012
+ // <rest of unpeeled section>
3013
+ // <peeled section>
3014
+ // Split off signed loop exit test
3015
+ // goto Loop
3016
+ //
3017
+ // Details
3018
+ // -------
3019
+ // Before:
3020
+ // if (i <u limit) Unsigned loop exit condition
3004
3021
// / |
3005
3022
// v v
3006
3023
// exit-proj stay-in-loop-proj
3007
3024
//
3008
- // after
3009
- // if(stay-in-loop-const) original if
3010
- // / |
3011
- // / v
3012
- // / if(i < limit) new signed test
3025
+ // Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
3026
+ // before the CmpU on the stay-in-loop path and keep both tests:
3027
+ //
3028
+ // if (i <u limit) Signed loop exit test
3029
+ // / |
3030
+ // / if (i <u limit) Unsigned loop exit test
3013
3031
// / / |
3014
- // / / v
3015
- // / / if(i <u limit) new cloned unsigned test
3016
- // / / / |
3017
- // v v v |
3018
- // region |
3019
- // | |
3020
- // dum-if |
3021
- // / | |
3022
- // ether | |
3023
- // v v
3032
+ // v v v
3033
+ // exit-region stay-in-loop-proj
3034
+ //
3035
+ // Implementation
3036
+ // --------------
3037
+ // We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
3038
+ // loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
3039
+ // exit tests is preserved, and their loop nesting is correct.
3040
+ //
3041
+ // To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
3042
+ // test above it and kill the original unsigned loop exit test by setting it's condition to a constant
3043
+ // (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
3044
+ //
3045
+ // if (stay-in-loop-const) Killed original unsigned loop exit test
3046
+ // / |
3047
+ // / v
3048
+ // / if (i < limit) Split off signed loop exit test
3049
+ // / / |
3050
+ // / / v
3051
+ // / / if (i <u limit) Cloned unsigned loop exit test
3052
+ // / / / |
3053
+ // v v v |
3054
+ // exit-region |
3055
+ // | |
3056
+ // dummy-if |
3057
+ // / | |
3058
+ // dead | |
3059
+ // v v
3024
3060
// exit-proj stay-in-loop-proj
3025
3061
//
3026
- IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree *loop) {
3062
+ // Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
3063
+ // loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
3064
+ //
3065
+ // Requirements
3066
+ // ------------
3067
+ // Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
3068
+ // the same as before with only a single unsigned test. This is only possible if certain requirements are met.
3069
+ // Otherwise, we need to bail out (see comments in the code below).
3070
+ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree* loop) {
3027
3071
const bool Signed = true ;
3028
3072
const bool Unsigned = false ;
3029
3073
3030
3074
BoolNode* bol = if_cmpu->in (1 )->as_Bool ();
3031
- if (bol->_test ._test != BoolTest::lt) return nullptr ;
3075
+ if (bol->_test ._test != BoolTest::lt) {
3076
+ return nullptr ;
3077
+ }
3032
3078
CmpNode* cmpu = bol->in (1 )->as_Cmp ();
3033
- if (cmpu->Opcode () != Op_CmpU) return nullptr ;
3079
+ assert (cmpu->Opcode () == Op_CmpU, " must be unsigned comparison" );
3080
+
3034
3081
int stride = stride_of_possible_iv (if_cmpu);
3035
- if (stride == 0 ) return nullptr ;
3082
+ if (stride == 0 ) {
3083
+ return nullptr ;
3084
+ }
3036
3085
3037
3086
Node* lp_proj = stay_in_loop (if_cmpu, loop);
3038
3087
guarantee (lp_proj != nullptr , " null loop node" );
@@ -3044,22 +3093,101 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
3044
3093
// We therefore can't add a single exit condition.
3045
3094
return nullptr ;
3046
3095
}
3047
- // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
3048
- // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
3049
- Node* limit = nullptr ;
3096
+ // The unsigned loop exit condition is
3097
+ // !(i <u limit)
3098
+ // = i >=u limit
3099
+ //
3100
+ // First, we note that for any x for which
3101
+ // 0 <= x <= INT_MAX
3102
+ // we can convert x to an unsigned int and still get the same guarantee:
3103
+ // 0 <= (uint) x <= INT_MAX = (uint) INT_MAX
3104
+ // 0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX (LEMMA)
3105
+ //
3106
+ // With that in mind, if
3107
+ // limit >= 0 (COND)
3108
+ // then the unsigned loop exit condition
3109
+ // i >=u limit (ULE)
3110
+ // is equivalent to
3111
+ // i < 0 || i >= limit (SLE-full)
3112
+ // because either i is negative and therefore always greater than MAX_INT when converting to unsigned
3113
+ // (uint) i >=u MAX_INT >= limit >= 0
3114
+ // or otherwise
3115
+ // i >= limit >= 0
3116
+ // holds due to (LEMMA).
3117
+ //
3118
+ // For completeness, a counterexample with limit < 0:
3119
+ // Assume i = -3 and limit = -2:
3120
+ // i < 0
3121
+ // -2 < 0
3122
+ // is true and thus also "i < 0 || i >= limit". But
3123
+ // i >=u limit
3124
+ // -3 >=u -2
3125
+ // is false.
3126
+ Node* limit = cmpu->in (2 );
3127
+ const TypeInt* type_limit = _igvn.type (limit)->is_int ();
3128
+ if (type_limit->_lo < 0 ) {
3129
+ return nullptr ;
3130
+ }
3131
+
3132
+ // We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
3133
+ // stride < 0:
3134
+ // i < 0 (SLE = SLE-negative)
3135
+ // stride > 0:
3136
+ // i >= limit (SLE = SLE-positive)
3137
+ // such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
3138
+ //
3139
+ // Loop:
3140
+ // <peeled section>
3141
+ // i >= limit (SLE-positive)
3142
+ // <-- CUT HERE -->
3143
+ // i >=u limit (ULE)
3144
+ // <rest of unpeeled section>
3145
+ // goto Loop
3146
+ //
3147
+ // We exit the loop if:
3148
+ // (SLE) is true OR (ULE) is true
3149
+ // However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
3150
+ // exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
3151
+ // (SLE) IMPLIES (ULE)
3152
+ // This indeed holds when (COND) is given:
3153
+ // - stride > 0:
3154
+ // i >= limit // (SLE = SLE-positive)
3155
+ // i >= limit >= 0 // (COND)
3156
+ // i >=u limit >= 0 // (LEMMA)
3157
+ // which is the unsigned loop exit condition (ULE).
3158
+ // - stride < 0:
3159
+ // i < 0 // (SLE = SLE-negative)
3160
+ // (uint) i >u MAX_INT // (NEG) all negative values are greater than MAX_INT when converted to unsigned
3161
+ // MAX_INT >= limit >= 0 // (COND)
3162
+ // MAX_INT >=u limit >= 0 // (LEMMA)
3163
+ // and thus from (NEG) and (LEMMA):
3164
+ // i >=u limit
3165
+ // which is the unsigned loop exit condition (ULE).
3166
+ //
3167
+ //
3168
+ // After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
3169
+ // <cloned peeled section>
3170
+ // i >= limit (SLE-positive)
3171
+ // Loop:
3172
+ // i >=u limit (ULE)
3173
+ // <rest of unpeeled section>
3174
+ // <peeled section>
3175
+ // i >= limit (SLE-positive)
3176
+ // goto Loop
3177
+ Node* rhs_cmpi;
3050
3178
if (stride > 0 ) {
3051
- limit = cmpu-> in ( 2 );
3179
+ rhs_cmpi = limit; // For i >= limit
3052
3180
} else {
3053
- limit = _igvn.makecon (TypeInt::ZERO);
3054
- set_ctrl (limit , C->root ());
3181
+ rhs_cmpi = _igvn.makecon (TypeInt::ZERO); // For i < 0
3182
+ set_ctrl (rhs_cmpi , C->root ());
3055
3183
}
3056
3184
// Create a new region on the exit path
3057
3185
RegionNode* reg = insert_region_before_proj (lp_exit);
3058
3186
guarantee (reg != nullptr , " null region node" );
3059
3187
3060
3188
// Clone the if-cmpu-true-false using a signed compare
3061
3189
BoolTest::mask rel_i = stride > 0 ? bol->_test ._test : BoolTest::ge;
3062
- ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, limit , lp_continue);
3190
+ ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, rhs_cmpi , lp_continue);
3063
3191
reg->add_req (cmpi_exit);
3064
3192
3065
3193
// Clone the if-cmpu-true-false
0 commit comments