Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
3dd72b8
8307513: C2: intrinsify Math.max(long,long) and Math.min(long,long)
galderz Jul 8, 2024
e43b390
Add IR test
galderz Jul 18, 2024
f910739
Refactor inline methods to unify their implementations
galderz Jul 19, 2024
ce71a0e
Add math vectorized JMH benchmark
galderz Jul 23, 2024
8d66f7b
Rename benchmark class to MathLoopBench
galderz Aug 27, 2024
605a78a
Fix multi long tests to use long arrays
galderz Aug 27, 2024
1522e26
Implement cmovL as a jump+mov branch
galderz Sep 9, 2024
a64fcda
Switch movl to movq
galderz Sep 11, 2024
13ed872
Fix format of assembly for the movl to movq switch
galderz Sep 11, 2024
da720c5
Distribute values targetting a branch percentage
galderz Sep 12, 2024
0b71cb5
Fix min case to distribute numbers as per probability
galderz Sep 12, 2024
fe3aff4
Fix compilation error
galderz Sep 12, 2024
0047a4b
Add an intermediate % that is more representative of real life
galderz Sep 12, 2024
f622852
Skip single array benchmarks
galderz Sep 16, 2024
6fd8805
Add min/max benchmark that includes loops and reductions
galderz Sep 24, 2024
93799d5
Renamed benchmark methods
galderz Sep 24, 2024
c06e869
Multiply array value in reduction for vectorization to kick in
galderz Sep 25, 2024
28778c8
Remove previous benchmark effort
galderz Sep 27, 2024
bc648aa
Revert "Fix format of assembly for the movl to movq switch"
galderz Sep 27, 2024
7a07aa8
Revert "Switch movl to movq"
galderz Sep 27, 2024
16ae2a3
Revert "Implement cmovL as a jump+mov branch"
galderz Sep 27, 2024
3f712e2
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Oct 17, 2024
6cc5484
Avoid creating result array in benchmark method
galderz Oct 9, 2024
c956012
Encapsulate benchmark state within an inner class
galderz Oct 10, 2024
0b19789
Add clipping range benchmark that uses min/max
galderz Oct 10, 2024
e669893
Restore previous benchmark iterations and default param size
galderz Oct 10, 2024
dcf6b54
Make state class non-final
galderz Oct 10, 2024
b19fc81
Double/Float tests only when avx enabled
galderz Oct 15, 2024
f6f0244
Renamed benchmark class
galderz Oct 17, 2024
0a8718e
Use same default size as in other vector reduction benchmarks
galderz Oct 17, 2024
aca0922
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Dec 12, 2024
65e2e48
Add empty line
galderz Dec 17, 2024
c964c26
Add max reduction test
galderz Dec 17, 2024
cfe0239
Fix style
galderz Dec 17, 2024
7353a07
Adjust min/max identity IR test expectations after changes
galderz Dec 17, 2024
130b475
Added comment around the assertions
galderz Dec 17, 2024
4d4753f
Tests should also run on aarch64 asimd=true envs
galderz Dec 18, 2024
fb0f731
Fix license header
galderz Dec 18, 2024
c049198
Test can only run with 256 bit registers or bigger
galderz Jan 9, 2025
abbaf87
Make sure it runs with cpus with either avx512 or asimd
galderz Jan 13, 2025
94397d3
Fix copyright years
galderz Jan 17, 2025
f83d886
Renaming methods and variables and add docu on algorithms
galderz Jan 17, 2025
724a346
Fix typo
galderz Jan 17, 2025
a190ae6
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Feb 7, 2025
d0e793a
Add simple reduction benchmarks on top of multiply ones
galderz Feb 17, 2025
38537fc
Add assertion comments
galderz Mar 7, 2025
1aa690d
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Mar 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ class methodHandle;
do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \
do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \
do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \
do_intrinsic(_maxL, java_lang_Math, max_name, long2_long_signature, F_S) \
do_intrinsic(_minL, java_lang_Math, min_name, long2_long_signature, F_S) \
do_intrinsic(_roundD, java_lang_Math, round_name, double_long_signature, F_S) \
do_intrinsic(_roundF, java_lang_Math, round_name, float_int_signature, F_S) \
do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_max:
case vmIntrinsics::_min_strict:
case vmIntrinsics::_max_strict:
case vmIntrinsics::_maxL:
case vmIntrinsics::_minL:
case vmIntrinsics::_arraycopy:
case vmIntrinsics::_arraySort:
case vmIntrinsics::_arrayPartition:
Expand Down
193 changes: 80 additions & 113 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,17 +691,17 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_max:
case vmIntrinsics::_min_strict:
case vmIntrinsics::_max_strict:
return inline_min_max(intrinsic_id());

case vmIntrinsics::_maxF:
case vmIntrinsics::_minL:
case vmIntrinsics::_maxL:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxF:
case vmIntrinsics::_minD:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_maxD:
case vmIntrinsics::_minF_strict:
case vmIntrinsics::_maxD_strict:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_minD_strict:
return inline_fp_min_max(intrinsic_id());
case vmIntrinsics::_maxD_strict:
return inline_min_max(intrinsic_id());

case vmIntrinsics::_VectorUnaryOp:
return inline_vector_nary_operation(1);
Expand Down Expand Up @@ -1942,7 +1942,78 @@ bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {

//----------------------------inline_min_max-----------------------------------
bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
set_result(generate_min_max(id, argument(0), argument(1)));
Node* a = nullptr;
Node* b = nullptr;
Node* n = nullptr;
switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_max:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxF:
case vmIntrinsics::_minF_strict:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_min_strict:
case vmIntrinsics::_max_strict:
assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each.");
a = argument(0);
b = argument(1);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_maxD:
case vmIntrinsics::_minD_strict:
case vmIntrinsics::_maxD_strict:
assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each.");
a = round_double_node(argument(0));
b = round_double_node(argument(2));
break;
case vmIntrinsics::_minL:
case vmIntrinsics::_maxL:
assert(callee()->signature()->size() == 4, "minL/maxL has 2 parameters of size 2 each.");
a = argument(0);
b = argument(2);
break;
default:
fatal_unexpected_iid(id);
break;
}

switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_min_strict:
n = new MinINode(a, b);
break;
case vmIntrinsics::_max:
case vmIntrinsics::_max_strict:
n = new MaxINode(a, b);
break;
case vmIntrinsics::_minF:
case vmIntrinsics::_minF_strict:
n = new MinFNode(a, b);
break;
case vmIntrinsics::_maxF:
case vmIntrinsics::_maxF_strict:
n = new MaxFNode(a, b);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_minD_strict:
n = new MinDNode(a, b);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxD_strict:
n = new MaxDNode(a, b);
break;
case vmIntrinsics::_minL:
n = new MinLNode(_gvn.C, a, b);
break;
case vmIntrinsics::_maxL:
n = new MaxLNode(_gvn.C, a, b);
break;
default:
fatal_unexpected_iid(id);
break;
}

set_result(_gvn.transform(n));
return true;
}

Expand Down Expand Up @@ -2021,25 +2092,6 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() {
return true;
}

Node*
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
Node* result_val = nullptr;
switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_min_strict:
result_val = _gvn.transform(new MinINode(x0, y0));
break;
case vmIntrinsics::_max:
case vmIntrinsics::_max_strict:
result_val = _gvn.transform(new MaxINode(x0, y0));
break;
default:
fatal_unexpected_iid(id);
break;
}
return result_val;
}

inline int
LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type) {
const TypePtr* base_type = TypePtr::NULL_PTR;
Expand Down Expand Up @@ -4456,7 +4508,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
if (!stopped()) {
// How many elements will we copy from the original?
// The answer is MinI(orig_tail, length).
Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
Node* moved = _gvn.transform(new MinINode(orig_tail, length));

// Generate a direct call to the right arraycopy function(s).
// We know the copy is disjoint but we might not know if the
Expand Down Expand Up @@ -8477,91 +8529,6 @@ bool LibraryCallKit::inline_character_compare(vmIntrinsics::ID id) {
return true;
}

//------------------------------inline_fp_min_max------------------------------
bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) {
/* DISABLED BECAUSE METHOD DATA ISN'T COLLECTED PER CALL-SITE, SEE JDK-8015416.

// The intrinsic should be used only when the API branches aren't predictable,
// the last one performing the most important comparison. The following heuristic
// uses the branch statistics to eventually bail out if necessary.

ciMethodData *md = callee()->method_data();

if ( md != nullptr && md->is_mature() && md->invocation_count() > 0 ) {
ciCallProfile cp = caller()->call_profile_at_bci(bci());

if ( ((double)cp.count()) / ((double)md->invocation_count()) < 0.8 ) {
// Bail out if the call-site didn't contribute enough to the statistics.
return false;
}

uint taken = 0, not_taken = 0;

for (ciProfileData *p = md->first_data(); md->is_valid(p); p = md->next_data(p)) {
if (p->is_BranchData()) {
taken = ((ciBranchData*)p)->taken();
not_taken = ((ciBranchData*)p)->not_taken();
}
}

double balance = (((double)taken) - ((double)not_taken)) / ((double)md->invocation_count());
balance = balance < 0 ? -balance : balance;
if ( balance > 0.2 ) {
// Bail out if the most important branch is predictable enough.
return false;
}
}
*/

Node *a = nullptr;
Node *b = nullptr;
Node *n = nullptr;
switch (id) {
case vmIntrinsics::_maxF:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_minF_strict:
assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each.");
a = argument(0);
b = argument(1);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_minD:
case vmIntrinsics::_maxD_strict:
case vmIntrinsics::_minD_strict:
assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each.");
a = round_double_node(argument(0));
b = round_double_node(argument(2));
break;
default:
fatal_unexpected_iid(id);
break;
}
switch (id) {
case vmIntrinsics::_maxF:
case vmIntrinsics::_maxF_strict:
n = new MaxFNode(a, b);
break;
case vmIntrinsics::_minF:
case vmIntrinsics::_minF_strict:
n = new MinFNode(a, b);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxD_strict:
n = new MaxDNode(a, b);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_minD_strict:
n = new MinDNode(a, b);
break;
default:
fatal_unexpected_iid(id);
break;
}
set_result(_gvn.transform(n));
return true;
}

bool LibraryCallKit::inline_profileBoolean() {
Node* counts = argument(1);
const TypeAryPtr* ary = nullptr;
Expand Down
2 changes: 0 additions & 2 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ class LibraryCallKit : public GraphKit {
bool inline_math_subtractExactL(bool is_decrement);
bool inline_min_max(vmIntrinsics::ID id);
bool inline_notify(vmIntrinsics::ID id);
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
// This returns Type::AnyPtr, RawPtr, or OopPtr.
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false);
Expand Down Expand Up @@ -354,7 +353,6 @@ class LibraryCallKit : public GraphKit {
bool inline_vectorizedMismatch();
bool inline_fma(vmIntrinsics::ID id);
bool inline_character_compare(vmIntrinsics::ID id);
bool inline_fp_min_max(vmIntrinsics::ID id);
bool inline_galoisCounterMode_AESCrypt();
Node* inline_galoisCounterMode_AESCrypt_predicate();

Expand Down
2 changes: 2 additions & 0 deletions src/java.base/share/classes/java/lang/Math.java
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,7 @@ public static int max(int a, int b) {
* @param b another argument.
* @return the larger of {@code a} and {@code b}.
*/
@IntrinsicCandidate
public static long max(long a, long b) {
return (a >= b) ? a : b;
}
Expand Down Expand Up @@ -2128,6 +2129,7 @@ public static int min(int a, int b) {
* @param b another argument.
* @return the smaller of {@code a} and {@code b}.
*/
@IntrinsicCandidate
public static long min(long a, long b) {
return (a <= b) ? a : b;
}
Expand Down
10 changes: 6 additions & 4 deletions test/hotspot/jtreg/compiler/c2/irTests/TestMinMaxIdentities.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ public int intMaxMax(int a, int b) {

// Longs

// As Math.min/max(LL) is not intrinsified, it first needs to be transformed into CMoveL and then MinL/MaxL before
// As Math.min/max(LL) is not intrinsified in the backend, it first needs to be transformed into CMoveL and then MinL/MaxL before
// the identity can be matched. However, the outer min/max is not transformed into CMove because of the CMove cost model.
// As JDK-8307513 adds intrinsics for the methods, the tests will be updated then.
// JDK-8307513 adds intrinsics for the methods such that MinL/MaxL replace the ternary operations,
// and this enables identities to be matched.
// Note that before JDK-8307513 MinL/MaxL nodes were already present before macro expansion.

@Test
@IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "1" })
Expand All @@ -123,13 +125,13 @@ public long longMinMin(long a, long b) {
}

@Test
@IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "1" })
@IR(failOn = { IRNode.MIN_L, IRNode.MAX_L })
public long longMinMax(long a, long b) {
return Math.min(a, Math.max(a, b));
}

@Test
@IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MAX_L, "1" })
@IR(failOn = { IRNode.MIN_L, IRNode.MAX_L })
public long longMaxMin(long a, long b) {
return Math.max(a, Math.min(a, b));
}
Expand Down
Loading