@@ -2926,52 +2926,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2926
2926
return reg;
2927
2927
}
2928
2928
2929
- // ------------------------------ insert_cmpi_loop_exit -------------------------------------
2930
- // Clone a signed compare loop exit from an unsigned compare and
2931
- // insert it before the unsigned cmp on the stay-in-loop path.
2932
- // All new nodes inserted in the dominator tree between the original
2933
- // if and it's projections. The original if test is replaced with
2934
- // a constant to force the stay-in-loop path.
2929
+ // Idea
2930
+ // ----
2931
+ // Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
2932
+ // require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
2933
+ // with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
2934
+ // loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
2935
+ // test alone instead:
2935
2936
//
2936
- // This is done to make sure that the original if and it's projections
2937
- // still dominate the same set of control nodes, that the ctrl() relation
2938
- // from data nodes to them is preserved, and that their loop nesting is
2939
- // preserved.
2937
+ // Before Partial Peeling:
2938
+ // Loop:
2939
+ // <peeled section>
2940
+ // Split off signed loop exit test
2941
+ // <-- CUT HERE -->
2942
+ // Unchanged unsigned loop exit test
2943
+ // <rest of unpeeled section>
2944
+ // goto Loop
2940
2945
//
2941
- // before
2942
- // if(i <u limit) unsigned compare loop exit
2946
+ // After Partial Peeling:
2947
+ // <cloned peeled section>
2948
+ // Cloned split off signed loop exit test
2949
+ // Loop:
2950
+ // Unchanged unsigned loop exit test
2951
+ // <rest of unpeeled section>
2952
+ // <peeled section>
2953
+ // Split off signed loop exit test
2954
+ // goto Loop
2955
+ //
2956
+ // Details
2957
+ // -------
2958
+ // Before:
2959
+ // if (i <u limit) Unsigned loop exit condition
2943
2960
// / |
2944
2961
// v v
2945
2962
// exit-proj stay-in-loop-proj
2946
2963
//
2947
- // after
2948
- // if(stay-in-loop-const) original if
2949
- // / |
2950
- // / v
2951
- // / if(i < limit) new signed test
2964
+ // Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
2965
+ // before the CmpU on the stay-in-loop path and keep both tests:
2966
+ //
2967
+ // if (i <u limit) Signed loop exit test
2968
+ // / |
2969
+ // / if (i <u limit) Unsigned loop exit test
2952
2970
// / / |
2953
- // / / v
2954
- // / / if(i <u limit) new cloned unsigned test
2955
- // / / / |
2956
- // v v v |
2957
- // region |
2958
- // | |
2959
- // dum-if |
2960
- // / | |
2961
- // ether | |
2962
- // v v
2971
+ // v v v
2972
+ // exit-region stay-in-loop-proj
2973
+ //
2974
+ // Implementation
2975
+ // --------------
2976
+ // We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
2977
+ // loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
2978
+ // exit tests is preserved, and their loop nesting is correct.
2979
+ //
2980
+ // To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
2981
+ // test above it and kill the original unsigned loop exit test by setting it's condition to a constant
2982
+ // (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
2983
+ //
2984
+ // if (stay-in-loop-const) Killed original unsigned loop exit test
2985
+ // / |
2986
+ // / v
2987
+ // / if (i < limit) Split off signed loop exit test
2988
+ // / / |
2989
+ // / / v
2990
+ // / / if (i <u limit) Cloned unsigned loop exit test
2991
+ // / / / |
2992
+ // v v v |
2993
+ // exit-region |
2994
+ // | |
2995
+ // dummy-if |
2996
+ // / | |
2997
+ // dead | |
2998
+ // v v
2963
2999
// exit-proj stay-in-loop-proj
2964
3000
//
2965
- IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree *loop) {
3001
+ // Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
3002
+ // loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
3003
+ //
3004
+ // Requirements
3005
+ // ------------
3006
+ // Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
3007
+ // the same as before with only a single unsigned test. This is only possible if certain requirements are met.
3008
+ // Otherwise, we need to bail out (see comments in the code below).
3009
+ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree* loop) {
2966
3010
const bool Signed = true ;
2967
3011
const bool Unsigned = false ;
2968
3012
2969
3013
BoolNode* bol = if_cmpu->in (1 )->as_Bool ();
2970
- if (bol->_test ._test != BoolTest::lt) return nullptr ;
3014
+ if (bol->_test ._test != BoolTest::lt) {
3015
+ return nullptr ;
3016
+ }
2971
3017
CmpNode* cmpu = bol->in (1 )->as_Cmp ();
2972
- if (cmpu->Opcode () != Op_CmpU) return nullptr ;
3018
+ assert (cmpu->Opcode () == Op_CmpU, " must be unsigned comparison" );
3019
+
2973
3020
int stride = stride_of_possible_iv (if_cmpu);
2974
- if (stride == 0 ) return nullptr ;
3021
+ if (stride == 0 ) {
3022
+ return nullptr ;
3023
+ }
2975
3024
2976
3025
Node* lp_proj = stay_in_loop (if_cmpu, loop);
2977
3026
guarantee (lp_proj != nullptr , " null loop node" );
@@ -2983,22 +3032,101 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
2983
3032
// We therefore can't add a single exit condition.
2984
3033
return nullptr ;
2985
3034
}
2986
- // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
2987
- // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
2988
- Node* limit = nullptr ;
3035
+ // The unsigned loop exit condition is
3036
+ // !(i <u limit)
3037
+ // = i >=u limit
3038
+ //
3039
+ // First, we note that for any x for which
3040
+ // 0 <= x <= INT_MAX
3041
+ // we can convert x to an unsigned int and still get the same guarantee:
3042
+ // 0 <= (uint) x <= INT_MAX = (uint) INT_MAX
3043
+ // 0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX (LEMMA)
3044
+ //
3045
+ // With that in mind, if
3046
+ // limit >= 0 (COND)
3047
+ // then the unsigned loop exit condition
3048
+ // i >=u limit (ULE)
3049
+ // is equivalent to
3050
+ // i < 0 || i >= limit (SLE-full)
3051
+ // because either i is negative and therefore always greater than MAX_INT when converting to unsigned
3052
+ // (uint) i >=u MAX_INT >= limit >= 0
3053
+ // or otherwise
3054
+ // i >= limit >= 0
3055
+ // holds due to (LEMMA).
3056
+ //
3057
+ // For completeness, a counterexample with limit < 0:
3058
+ // Assume i = -3 and limit = -2:
3059
+ // i < 0
3060
+ // -2 < 0
3061
+ // is true and thus also "i < 0 || i >= limit". But
3062
+ // i >=u limit
3063
+ // -3 >=u -2
3064
+ // is false.
3065
+ Node* limit = cmpu->in (2 );
3066
+ const TypeInt* type_limit = _igvn.type (limit)->is_int ();
3067
+ if (type_limit->_lo < 0 ) {
3068
+ return nullptr ;
3069
+ }
3070
+
3071
+ // We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
3072
+ // stride < 0:
3073
+ // i < 0 (SLE = SLE-negative)
3074
+ // stride > 0:
3075
+ // i >= limit (SLE = SLE-positive)
3076
+ // such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
3077
+ //
3078
+ // Loop:
3079
+ // <peeled section>
3080
+ // i >= limit (SLE-positive)
3081
+ // <-- CUT HERE -->
3082
+ // i >=u limit (ULE)
3083
+ // <rest of unpeeled section>
3084
+ // goto Loop
3085
+ //
3086
+ // We exit the loop if:
3087
+ // (SLE) is true OR (ULE) is true
3088
+ // However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
3089
+ // exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
3090
+ // (SLE) IMPLIES (ULE)
3091
+ // This indeed holds when (COND) is given:
3092
+ // - stride > 0:
3093
+ // i >= limit // (SLE = SLE-positive)
3094
+ // i >= limit >= 0 // (COND)
3095
+ // i >=u limit >= 0 // (LEMMA)
3096
+ // which is the unsigned loop exit condition (ULE).
3097
+ // - stride < 0:
3098
+ // i < 0 // (SLE = SLE-negative)
3099
+ // (uint) i >u MAX_INT // (NEG) all negative values are greater than MAX_INT when converted to unsigned
3100
+ // MAX_INT >= limit >= 0 // (COND)
3101
+ // MAX_INT >=u limit >= 0 // (LEMMA)
3102
+ // and thus from (NEG) and (LEMMA):
3103
+ // i >=u limit
3104
+ // which is the unsigned loop exit condition (ULE).
3105
+ //
3106
+ //
3107
+ // After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
3108
+ // <cloned peeled section>
3109
+ // i >= limit (SLE-positive)
3110
+ // Loop:
3111
+ // i >=u limit (ULE)
3112
+ // <rest of unpeeled section>
3113
+ // <peeled section>
3114
+ // i >= limit (SLE-positive)
3115
+ // goto Loop
3116
+ Node* rhs_cmpi;
2989
3117
if (stride > 0 ) {
2990
- limit = cmpu-> in ( 2 );
3118
+ rhs_cmpi = limit; // For i >= limit
2991
3119
} else {
2992
- limit = _igvn.makecon (TypeInt::ZERO);
2993
- set_ctrl (limit , C->root ());
3120
+ rhs_cmpi = _igvn.makecon (TypeInt::ZERO); // For i < 0
3121
+ set_ctrl (rhs_cmpi , C->root ());
2994
3122
}
2995
3123
// Create a new region on the exit path
2996
3124
RegionNode* reg = insert_region_before_proj (lp_exit);
2997
3125
guarantee (reg != nullptr , " null region node" );
2998
3126
2999
3127
// Clone the if-cmpu-true-false using a signed compare
3000
3128
BoolTest::mask rel_i = stride > 0 ? bol->_test ._test : BoolTest::ge;
3001
- ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, limit , lp_continue);
3129
+ ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, rhs_cmpi , lp_continue);
3002
3130
reg->add_req (cmpi_exit);
3003
3131
3004
3132
// Clone the if-cmpu-true-false
0 commit comments