Skip to content

Commit 9f05c41

Browse files
Sandhya ViswanathanRahul KanduRazvan LupusorumagicusDamonFool
committed
8265783: Create a separate library for x86 Intel SVML assembly intrinsics
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org> Co-authored-by: Rahul Kandu <rkandu@openjdk.org> Co-authored-by: Razvan Lupusoru <rlupusoru@openjdk.org> Co-authored-by: Magnus Ihse Bursie <ihse@openjdk.org> Co-authored-by: Jie Fu <jiefu@openjdk.org> Co-authored-by: Ahmet Akkas <ahmet.akkas@intel.com> Co-authored-by: Marius Cornea <marius.cornea@intel.com> Reviewed-by: erikj, kvn, psandoz
1 parent e27c4d4 commit 9f05c41

File tree

119 files changed

+415949
-187
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+415949
-187
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#
2+
# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
3+
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
#
5+
# This code is free software; you can redistribute it and/or modify it
6+
# under the terms of the GNU General Public License version 2 only, as
7+
# published by the Free Software Foundation. Oracle designates this
8+
# particular file as subject to the "Classpath" exception as provided
9+
# by Oracle in the LICENSE file that accompanied this code.
10+
#
11+
# This code is distributed in the hope that it will be useful, but WITHOUT
12+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
# version 2 for more details (a copy is included in the LICENSE file that
15+
# accompanied this code).
16+
#
17+
# You should have received a copy of the GNU General Public License version
18+
# 2 along with this work; if not, write to the Free Software Foundation,
19+
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20+
#
21+
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22+
# or visit www.oracle.com if you need additional information or have any
23+
# questions.
24+
#
25+
26+
include LibCommon.gmk
27+
28+
################################################################################
29+
30+
ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64), true+true)
31+
$(eval $(call SetupJdkLibrary, BUILD_LIBSVML, \
32+
NAME := svml, \
33+
CFLAGS := $(CFLAGS_JDKLIB), \
34+
LDFLAGS := $(LDFLAGS_JDKLIB) \
35+
$(call SET_SHARED_LIBRARY_ORIGIN), \
36+
LDFLAGS_windows := -defaultlib:msvcrt, \
37+
))
38+
39+
TARGETS += $(BUILD_LIBSVML)
40+
endif
41+
42+
################################################################################

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,6 +2428,16 @@ const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
24282428
return new TypeVectMask(elemTy, length);
24292429
}
24302430

2431+
// Vector calling convention not yet implemented.
2432+
const bool Matcher::supports_vector_calling_convention(void) {
2433+
return false;
2434+
}
2435+
2436+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2437+
Unimplemented();
2438+
return OptoRegPair(0, 0);
2439+
}
2440+
24312441
const int Matcher::float_pressure(int default_pressure_threshold) {
24322442
return default_pressure_threshold;
24332443
}

src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,13 @@ static int c_calling_convention_priv(const BasicType *sig_bt,
864864
return stk_args;
865865
}
866866

867+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
868+
uint num_bits,
869+
uint total_args_passed) {
870+
Unimplemented();
871+
return 0;
872+
}
873+
867874
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
868875
VMRegPair *regs,
869876
VMRegPair *regs2,

src/hotspot/cpu/arm/arm.ad

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,16 @@ const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
991991
return NULL;
992992
}
993993

994+
// Vector calling convention not yet implemented.
995+
const bool Matcher::supports_vector_calling_convention(void) {
996+
return false;
997+
}
998+
999+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1000+
Unimplemented();
1001+
return OptoRegPair(0, 0);
1002+
}
1003+
9941004
const int Matcher::float_pressure(int default_pressure_threshold) {
9951005
return default_pressure_threshold;
9961006
}

src/hotspot/cpu/arm/sharedRuntime_arm.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,13 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
354354
return slot;
355355
}
356356

357+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
358+
uint num_bits,
359+
uint total_args_passed) {
360+
Unimplemented();
361+
return 0;
362+
}
363+
357364
int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
358365
VMRegPair *regs,
359366
int total_args_passed) {

src/hotspot/cpu/ppc/ppc.ad

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,6 +2183,16 @@ const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
21832183
return NULL;
21842184
}
21852185

2186+
// Vector calling convention not yet implemented.
2187+
const bool Matcher::supports_vector_calling_convention(void) {
2188+
return false;
2189+
}
2190+
2191+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2192+
Unimplemented();
2193+
return OptoRegPair(0, 0);
2194+
}
2195+
21862196
const int Matcher::float_pressure(int default_pressure_threshold) {
21872197
return default_pressure_threshold;
21882198
}

src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,13 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
917917
}
918918
#endif // COMPILER2
919919

920+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
921+
uint num_bits,
922+
uint total_args_passed) {
923+
Unimplemented();
924+
return 0;
925+
}
926+
920927
static address gen_c2i_adapter(MacroAssembler *masm,
921928
int total_args_passed,
922929
int comp_args_on_stack,

src/hotspot/cpu/s390/s390.ad

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,16 @@ const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
15441544
return NULL;
15451545
}
15461546

1547+
// Vector calling convention not yet implemented.
1548+
const bool Matcher::supports_vector_calling_convention(void) {
1549+
return false;
1550+
}
1551+
1552+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1553+
Unimplemented();
1554+
return OptoRegPair(0, 0);
1555+
}
1556+
15471557
const int Matcher::float_pressure(int default_pressure_threshold) {
15481558
return default_pressure_threshold;
15491559
}

src/hotspot/cpu/s390/sharedRuntime_s390.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,13 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
852852
return align_up(stk, 2);
853853
}
854854

855+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
856+
uint num_bits,
857+
uint total_args_passed) {
858+
Unimplemented();
859+
return 0;
860+
}
861+
855862
////////////////////////////////////////////////////////////////////////
856863
//
857864
// Argument shufflers

src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,13 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
10441044
return stack;
10451045
}
10461046

1047+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
1048+
uint num_bits,
1049+
uint total_args_passed) {
1050+
Unimplemented();
1051+
return 0;
1052+
}
1053+
10471054
// A simple move of integer like type
10481055
static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
10491056
if (src.first()->is_stack()) {

src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,31 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
11491149
return stk_args;
11501150
}
11511151

1152+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
1153+
uint num_bits,
1154+
uint total_args_passed) {
1155+
assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512,
1156+
"only certain vector sizes are supported for now");
1157+
1158+
static const XMMRegister VEC_ArgReg[32] = {
1159+
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
1160+
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
1161+
xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
1162+
xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31
1163+
};
1164+
1165+
uint stk_args = 0;
1166+
uint fp_args = 0;
1167+
1168+
for (uint i = 0; i < total_args_passed; i++) {
1169+
VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg();
1170+
int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15));
1171+
regs[i].set_pair(vmreg->next(next_val), vmreg);
1172+
}
1173+
1174+
return stk_args;
1175+
}
1176+
11521177
void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
11531178
// We always ignore the frame_slots arg and just use the space just below frame pointer
11541179
// which by this time is free to use

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7002,6 +7002,67 @@ address generate_avx_ghash_processBlocks() {
70027002
StubRoutines::_montgomerySquare
70037003
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
70047004
}
7005+
7006+
// Get svml stub routine addresses
7007+
void *libsvml = NULL;
7008+
char ebuf[1024];
7009+
libsvml = os::dll_load(JNI_LIB_PREFIX "svml" JNI_LIB_SUFFIX, ebuf, sizeof ebuf);
7010+
if (libsvml != NULL) {
7011+
// SVML method naming convention
7012+
// All the methods are named as __svml_op<T><N>_ha_<VV>
7013+
// Where:
7014+
// ha stands for high accuracy
7015+
// <T> is optional to indicate float/double
7016+
// Set to f for vector float operation
7017+
// Omitted for vector double operation
7018+
// <N> is the number of elements in the vector
7019+
// 1, 2, 4, 8, 16
7020+
// e.g. 128 bit float vector has 4 float elements
7021+
// <VV> indicates the avx/sse level:
7022+
// z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2
7023+
// e.g. __svml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns
7024+
// __svml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns
7025+
7026+
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "svml" JNI_LIB_SUFFIX, p2i(libsvml));
7027+
if (UseAVX > 2) {
7028+
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7029+
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7030+
if ((!VM_Version::supports_avx512dq()) &&
7031+
(vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
7032+
continue;
7033+
}
7034+
snprintf(ebuf, sizeof(ebuf), "__svml_%sf16_ha_z0", VectorSupport::svmlname[op]);
7035+
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libsvml, ebuf);
7036+
7037+
snprintf(ebuf, sizeof(ebuf), "__svml_%s8_ha_z0", VectorSupport::svmlname[op]);
7038+
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libsvml, ebuf);
7039+
}
7040+
}
7041+
const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
7042+
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7043+
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7044+
if (vop == VectorSupport::VECTOR_OP_POW) {
7045+
continue;
7046+
}
7047+
snprintf(ebuf, sizeof(ebuf), "__svml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7048+
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsvml, ebuf);
7049+
7050+
snprintf(ebuf, sizeof(ebuf), "__svml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7051+
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsvml, ebuf);
7052+
7053+
snprintf(ebuf, sizeof(ebuf), "__svml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7054+
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libsvml, ebuf);
7055+
7056+
snprintf(ebuf, sizeof(ebuf), "__svml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7057+
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsvml, ebuf);
7058+
7059+
snprintf(ebuf, sizeof(ebuf), "__svml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7060+
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsvml, ebuf);
7061+
7062+
snprintf(ebuf, sizeof(ebuf), "__svml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7063+
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libsvml, ebuf);
7064+
}
7065+
}
70057066
#endif // COMPILER2
70067067

70077068
if (UseVectorizedMismatchIntrinsic) {

src/hotspot/cpu/x86/x86.ad

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
17071707
}
17081708
}
17091709
break;
1710+
case Op_CallLeafVector:
1711+
if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
1712+
return false;
1713+
}
1714+
break;
17101715
case Op_AddReductionVI:
17111716
if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
17121717
return false;
@@ -1954,6 +1959,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
19541959
int max_size = max_vector_size(bt);
19551960
// Min size which can be loaded into vector is 4 bytes.
19561961
int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1962+
// Support for calling svml double64 vectors
1963+
if (bt == T_DOUBLE) {
1964+
size = 1;
1965+
}
19571966
return MIN2(size,max_size);
19581967
}
19591968

src/hotspot/cpu/x86/x86_32.ad

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,16 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
13981398

13991399
//=============================================================================
14001400

1401+
// Vector calling convention not supported.
1402+
const bool Matcher::supports_vector_calling_convention() {
1403+
return false;
1404+
}
1405+
1406+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1407+
Unimplemented();
1408+
return OptoRegPair(0, 0);
1409+
}
1410+
14011411
// Is this branch offset short enough that a short branch can be used?
14021412
//
14031413
// NOTE: If the platform does not provide any short branch variants, then

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,9 @@ int MachCallDynamicJavaNode::ret_addr_offset()
463463

464464
int MachCallRuntimeNode::ret_addr_offset() {
465465
int offset = 13; // movq r10,#addr; callq (r10)
466-
offset += clear_avx_size();
466+
if (this->ideal_Opcode() != Op_CallLeafVector) {
467+
offset += clear_avx_size();
468+
}
467469
return offset;
468470
}
469471

@@ -1692,6 +1694,23 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
16921694

16931695
//=============================================================================
16941696

1697+
const bool Matcher::supports_vector_calling_convention(void) {
1698+
if (EnableVectorSupport && UseVectorStubs) {
1699+
return true;
1700+
}
1701+
return false;
1702+
}
1703+
1704+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1705+
assert(EnableVectorSupport && UseVectorStubs, "sanity");
1706+
int lo = XMM0_num;
1707+
int hi = XMM0b_num;
1708+
if (ideal_reg == Op_VecX) hi = XMM0d_num;
1709+
else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1710+
else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1711+
return OptoRegPair(hi, lo);
1712+
}
1713+
16951714
// Is this branch offset short enough that a short branch can be used?
16961715
//
16971716
// NOTE: If the platform does not provide any short branch variants, then
@@ -12997,6 +13016,18 @@ instruct CallLeafDirect(method meth)
1299713016
ins_pipe(pipe_slow);
1299813017
%}
1299913018

13019+
// Call runtime without safepoint and with vector arguments
13020+
instruct CallLeafDirectVector(method meth)
13021+
%{
13022+
match(CallLeafVector);
13023+
effect(USE meth);
13024+
13025+
ins_cost(300);
13026+
format %{ "call_leaf,vector " %}
13027+
ins_encode(Java_To_Runtime(meth));
13028+
ins_pipe(pipe_slow);
13029+
%}
13030+
1300013031
//
1300113032
instruct CallNativeDirect(method meth)
1300213033
%{

src/hotspot/cpu/zero/sharedRuntime_zero.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,10 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
125125
ShouldNotCallThis();
126126
return 0;
127127
}
128+
129+
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
130+
uint num_bits,
131+
uint total_args_passed) {
132+
ShouldNotCallThis();
133+
return 0;
134+
}

src/hotspot/share/adlc/formssel.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,8 @@ Form::CallType InstructForm::is_ideal_call() const {
419419
idx = 0;
420420
if(_matrule->find_type("CallLeafNoFP",idx)) return Form::JAVA_LEAF;
421421
idx = 0;
422+
if(_matrule->find_type("CallLeafVector",idx)) return Form::JAVA_LEAF;
423+
idx = 0;
422424
if(_matrule->find_type("CallNative",idx)) return Form::JAVA_NATIVE;
423425
idx = 0;
424426

0 commit comments

Comments
 (0)