Skip to content
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
3dd72b8
8307513: C2: intrinsify Math.max(long,long) and Math.min(long,long)
galderz Jul 8, 2024
e43b390
Add IR test
galderz Jul 18, 2024
f910739
Refactor inline methods to unify their implementations
galderz Jul 19, 2024
ce71a0e
Add math vectorized JMH benchmark
galderz Jul 23, 2024
8d66f7b
Rename benchmark class to MathLoopBench
galderz Aug 27, 2024
605a78a
Fix multi long tests to use long arrays
galderz Aug 27, 2024
1522e26
Implement cmovL as a jump+mov branch
galderz Sep 9, 2024
a64fcda
Switch movl to movq
galderz Sep 11, 2024
13ed872
Fix format of assembly for the movl to movq switch
galderz Sep 11, 2024
da720c5
Distribute values targetting a branch percentage
galderz Sep 12, 2024
0b71cb5
Fix min case to distribute numbers as per probability
galderz Sep 12, 2024
fe3aff4
Fix compilation error
galderz Sep 12, 2024
0047a4b
Add an intermediate % that is more representative of real life
galderz Sep 12, 2024
f622852
Skip single array benchmarks
galderz Sep 16, 2024
6fd8805
Add min/max benchmark that includes loops and reductions
galderz Sep 24, 2024
93799d5
Renamed benchmark methods
galderz Sep 24, 2024
c06e869
Multiply array value in reduction for vectorization to kick in
galderz Sep 25, 2024
28778c8
Remove previous benchmark effort
galderz Sep 27, 2024
bc648aa
Revert "Fix format of assembly for the movl to movq switch"
galderz Sep 27, 2024
7a07aa8
Revert "Switch movl to movq"
galderz Sep 27, 2024
16ae2a3
Revert "Implement cmovL as a jump+mov branch"
galderz Sep 27, 2024
3f712e2
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Oct 17, 2024
6cc5484
Avoid creating result array in benchmark method
galderz Oct 9, 2024
c956012
Encapsulate benchmark state within an inner class
galderz Oct 10, 2024
0b19789
Add clipping range benchmark that uses min/max
galderz Oct 10, 2024
e669893
Restore previous benchmark iterations and default param size
galderz Oct 10, 2024
dcf6b54
Make state class non-final
galderz Oct 10, 2024
b19fc81
Double/Float tests only when avx enabled
galderz Oct 15, 2024
f6f0244
Renamed benchmark class
galderz Oct 17, 2024
0a8718e
Use same default size as in other vector reduction benchmarks
galderz Oct 17, 2024
aca0922
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Dec 12, 2024
65e2e48
Add empty line
galderz Dec 17, 2024
c964c26
Add max reduction test
galderz Dec 17, 2024
cfe0239
Fix style
galderz Dec 17, 2024
7353a07
Adjust min/max identity IR test expectations after changes
galderz Dec 17, 2024
130b475
Added comment around the assertions
galderz Dec 17, 2024
4d4753f
Tests should also run on aarch64 asimd=true envs
galderz Dec 18, 2024
fb0f731
Fix license header
galderz Dec 18, 2024
c049198
Test can only run with 256 bit registers or bigger
galderz Jan 9, 2025
abbaf87
Make sure it runs with cpus with either avx512 or asimd
galderz Jan 13, 2025
94397d3
Fix copyright years
galderz Jan 17, 2025
f83d886
Renaming methods and variables and add docu on algorithms
galderz Jan 17, 2025
724a346
Fix typo
galderz Jan 17, 2025
a190ae6
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Feb 7, 2025
d0e793a
Add simple reduction benchmarks on top of multiply ones
galderz Feb 17, 2025
38537fc
Add assertion comments
galderz Mar 7, 2025
1aa690d
Merge branch 'master' into topic.intrinsify-max-min-long
galderz Mar 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/hotspot/cpu/x86/x86_64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -6407,11 +6407,17 @@ instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));

ins_cost(200); // XXX
format %{ "cmovq$cop $dst, $src\t# signed, long" %}
format %{ "jn$cop skip\t# signed cmove long\n\t"
"movq $dst, $src\n"
"skip:" %}
ins_encode %{
__ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
Label Lskip;
// Invert sense of branch from sense of CMOV
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
__ movq($dst$$Register, $src$$Register);
__ bind(Lskip);
%}
ins_pipe(pipe_cmov_reg); // XXX
ins_pipe(pipe_slow);
%}

instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
Expand Down
218 changes: 80 additions & 138 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,21 +687,17 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_max:
case vmIntrinsics::_min_strict:
case vmIntrinsics::_max_strict:
return inline_min_max(intrinsic_id());

case vmIntrinsics::_maxF:
case vmIntrinsics::_minL:
case vmIntrinsics::_maxL:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxF:
case vmIntrinsics::_minD:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_maxD:
case vmIntrinsics::_minF_strict:
case vmIntrinsics::_maxD_strict:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_minD_strict:
return inline_fp_min_max(intrinsic_id());

case vmIntrinsicID::_maxL:
case vmIntrinsicID::_minL:
return inline_long_min_max(intrinsic_id());
case vmIntrinsics::_maxD_strict:
return inline_min_max(intrinsic_id());

case vmIntrinsics::_VectorUnaryOp:
return inline_vector_nary_operation(1);
Expand Down Expand Up @@ -1940,7 +1936,78 @@ bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {

//----------------------------inline_min_max-----------------------------------
bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
set_result(generate_min_max(id, argument(0), argument(1)));
Node *a = nullptr;
Node *b = nullptr;
Node *n = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are touching this, then you might as well fix the style.

Suggested change
Node *a = nullptr;
Node *b = nullptr;
Node *n = nullptr;
Node* a = nullptr;
Node* b = nullptr;
Node* n = nullptr;

switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_max:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxF:
case vmIntrinsics::_minF_strict:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_min_strict:
case vmIntrinsics::_max_strict:
assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each.");
a = argument(0);
b = argument(1);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_maxD:
case vmIntrinsics::_minD_strict:
case vmIntrinsics::_maxD_strict:
assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each.");
a = round_double_node(argument(0));
b = round_double_node(argument(2));
break;
case vmIntrinsics::_minL:
case vmIntrinsics::_maxL:
assert(callee()->signature()->size() == 4, "minL/maxL has 2 parameters of size 2 each.");
a = argument(0);
b = argument(2);
break;
default:
fatal_unexpected_iid(id);
break;
}

switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_min_strict:
n = new MinINode(a, b);
break;
case vmIntrinsics::_max:
case vmIntrinsics::_max_strict:
n = new MaxINode(a, b);
break;
case vmIntrinsics::_minF:
case vmIntrinsics::_minF_strict:
n = new MinFNode(a, b);
break;
case vmIntrinsics::_maxF:
case vmIntrinsics::_maxF_strict:
n = new MaxFNode(a, b);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_minD_strict:
n = new MinDNode(a, b);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxD_strict:
n = new MaxDNode(a, b);
break;
case vmIntrinsics::_minL:
n = new MinLNode(_gvn.C, a, b);
break;
case vmIntrinsics::_maxL:
n = new MaxLNode(_gvn.C, a, b);
break;
default:
fatal_unexpected_iid(id);
break;
}

set_result(_gvn.transform(n));
return true;
}

Expand Down Expand Up @@ -2019,25 +2086,6 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() {
return true;
}

Node*
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
Node* result_val = nullptr;
switch (id) {
case vmIntrinsics::_min:
case vmIntrinsics::_min_strict:
result_val = _gvn.transform(new MinINode(x0, y0));
break;
case vmIntrinsics::_max:
case vmIntrinsics::_max_strict:
result_val = _gvn.transform(new MaxINode(x0, y0));
break;
default:
fatal_unexpected_iid(id);
break;
}
return result_val;
}

inline int
LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type) {
const TypePtr* base_type = TypePtr::NULL_PTR;
Expand Down Expand Up @@ -4381,7 +4429,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
if (!stopped()) {
// How many elements will we copy from the original?
// The answer is MinI(orig_tail, length).
Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
Node* moved = _gvn.transform(new MinINode(orig_tail, length));

// Generate a direct call to the right arraycopy function(s).
// We know the copy is disjoint but we might not know if the
Expand Down Expand Up @@ -8153,112 +8201,6 @@ bool LibraryCallKit::inline_character_compare(vmIntrinsics::ID id) {
return true;
}

//------------------------------inline_fp_min_max------------------------------
bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) {
/* DISABLED BECAUSE METHOD DATA ISN'T COLLECTED PER CALL-SITE, SEE JDK-8015416.

// The intrinsic should be used only when the API branches aren't predictable,
// the last one performing the most important comparison. The following heuristic
// uses the branch statistics to eventually bail out if necessary.

ciMethodData *md = callee()->method_data();

if ( md != nullptr && md->is_mature() && md->invocation_count() > 0 ) {
ciCallProfile cp = caller()->call_profile_at_bci(bci());

if ( ((double)cp.count()) / ((double)md->invocation_count()) < 0.8 ) {
// Bail out if the call-site didn't contribute enough to the statistics.
return false;
}

uint taken = 0, not_taken = 0;

for (ciProfileData *p = md->first_data(); md->is_valid(p); p = md->next_data(p)) {
if (p->is_BranchData()) {
taken = ((ciBranchData*)p)->taken();
not_taken = ((ciBranchData*)p)->not_taken();
}
}

double balance = (((double)taken) - ((double)not_taken)) / ((double)md->invocation_count());
balance = balance < 0 ? -balance : balance;
if ( balance > 0.2 ) {
// Bail out if the most important branch is predictable enough.
return false;
}
}
*/

Node *a = nullptr;
Node *b = nullptr;
Node *n = nullptr;
switch (id) {
case vmIntrinsics::_maxF:
case vmIntrinsics::_minF:
case vmIntrinsics::_maxF_strict:
case vmIntrinsics::_minF_strict:
assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each.");
a = argument(0);
b = argument(1);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_minD:
case vmIntrinsics::_maxD_strict:
case vmIntrinsics::_minD_strict:
assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each.");
a = round_double_node(argument(0));
b = round_double_node(argument(2));
break;
default:
fatal_unexpected_iid(id);
break;
}
switch (id) {
case vmIntrinsics::_maxF:
case vmIntrinsics::_maxF_strict:
n = new MaxFNode(a, b);
break;
case vmIntrinsics::_minF:
case vmIntrinsics::_minF_strict:
n = new MinFNode(a, b);
break;
case vmIntrinsics::_maxD:
case vmIntrinsics::_maxD_strict:
n = new MaxDNode(a, b);
break;
case vmIntrinsics::_minD:
case vmIntrinsics::_minD_strict:
n = new MinDNode(a, b);
break;
default:
fatal_unexpected_iid(id);
break;
}
set_result(_gvn.transform(n));
return true;
}

//------------------------------inline_long_min_max------------------------------
bool LibraryCallKit::inline_long_min_max(vmIntrinsics::ID id) {
assert(callee()->signature()->size() == 4, "minL/maxL has 2 parameters of size 2 each.");
Node *a = argument(0);
Node *b = argument(2);
Node *n = nullptr;
switch (id) {
case vmIntrinsics::_minL:
n = new MinLNode(_gvn.C, a, b);
break;
case vmIntrinsics::_maxL:
n = new MaxLNode(_gvn.C, a, b);
break;
default:
fatal_unexpected_iid(id);
break;
}
set_result(_gvn.transform(n));
return true;
}

bool LibraryCallKit::inline_profileBoolean() {
Node* counts = argument(1);
const TypeAryPtr* ary = nullptr;
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ class LibraryCallKit : public GraphKit {
bool inline_math_subtractExactL(bool is_decrement);
bool inline_min_max(vmIntrinsics::ID id);
bool inline_notify(vmIntrinsics::ID id);
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
// This returns Type::AnyPtr, RawPtr, or OopPtr.
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false);
Expand Down Expand Up @@ -337,8 +336,6 @@ class LibraryCallKit : public GraphKit {
bool inline_vectorizedMismatch();
bool inline_fma(vmIntrinsics::ID id);
bool inline_character_compare(vmIntrinsics::ID id);
bool inline_fp_min_max(vmIntrinsics::ID id);
bool inline_long_min_max(vmIntrinsics::ID id);
bool inline_galoisCounterMode_AESCrypt();
Node* inline_galoisCounterMode_AESCrypt_predicate();

Expand Down
Loading