From 4eaca6b8b63aeef56c3bb4dd2996f232c414d0e1 Mon Sep 17 00:00:00 2001 From: Mohamed Issa Date: Tue, 24 Jun 2025 14:27:44 -0700 Subject: [PATCH 1/3] Check for special values first in x86_64 cbrt intrinsic --- .../cpu/x86/stubGenerator_x86_64_cbrt.cpp | 52 +++++-------------- 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp index da60a9be27633..bb77c2935963d 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp @@ -46,6 +46,11 @@ // /******************************************************************************/ +ATTRIBUTE_ALIGNED(4) static const juint _ABS_MASK[] = +{ + 4294967295, 2147483647 +}; + ATTRIBUTE_ALIGNED(4) static const juint _SIG_MASK[] = { 0, 1032192 @@ -188,10 +193,10 @@ address StubGenerator::generate_libmCbrt() { StubCodeMark mark(this, stub_id); address start = __ pc(); - Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; - Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1; + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1; Label B1_1, B1_2, B1_4; + address ABS_MASK = (address)_ABS_MASK; address SIG_MASK = (address)_SIG_MASK; address EXP_MASK = (address)_EXP_MASK; address EXP_MSK2 = (address)_EXP_MSK2; @@ -208,8 +213,12 @@ address StubGenerator::generate_libmCbrt() { __ enter(); // required for proper stackwalking of RuntimeStub frame __ bind(B1_1); - __ subq(rsp, 24); - __ movsd(Address(rsp), xmm0); + __ ucomisd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/); + __ jcc(Assembler::zero, L_2TAG_PACKET_1_0_1); // Branch only if x is +/- zero or NaN + __ movq(xmm1, xmm0); + __ andpd(xmm1, ExternalAddress(ABS_MASK), r11 /*rscratch*/); + __ ucomisd(xmm1, ExternalAddress(INF), r11 /*rscratch*/); + __ jcc(Assembler::equal, B1_4); // Branch only if x is +/- INF __ bind(B1_2); __ movq(xmm7, xmm0); @@ -228,8 +237,6 @@ address StubGenerator::generate_libmCbrt() { __ andl(rdx, rax); __ cmpl(rdx, 0); __ jcc(Assembler::equal, L_2TAG_PACKET_0_0_1); // Branch only if |x| is denormalized - __ cmpl(rdx, 524032); - __ jcc(Assembler::equal, L_2TAG_PACKET_1_0_1); // Branch only if |x| is INF or NaN __ shrl(rdx, 8); __ shrq(r9, 8); __ andpd(xmm2, xmm0); @@ -297,8 +304,6 @@ address StubGenerator::generate_libmCbrt() { __ andl(rdx, rax); __ shrl(rdx, 8); __ shrq(r9, 8); - __ cmpl(rdx, 0); - __ jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); // Branch only if |x| is zero __ andpd(xmm2, xmm0); __ andpd(xmm0, xmm5); __ orpd(xmm3, xmm2); @@ -322,41 +327,10 @@ address StubGenerator::generate_libmCbrt() { __ psllq(xmm7, 52); __ jmp(L_2TAG_PACKET_2_0_1); - __ bind(L_2TAG_PACKET_3_0_1); - __ cmpq(r9, 0); - __ jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); // Branch only if x is negative zero - __ xorpd(xmm0, xmm0); - __ jmp(B1_4); - - __ bind(L_2TAG_PACKET_4_0_1); - __ movsd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/); - __ jmp(B1_4); - __ bind(L_2TAG_PACKET_1_0_1); - __ movl(rax, Address(rsp, 4)); - __ movl(rdx, Address(rsp)); - __ movl(rcx, rax); - __ andl(rcx, 2147483647); - __ cmpl(rcx, 2146435072); - __ jcc(Assembler::above, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN - __ cmpl(rdx, 0); - __ jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN - __ cmpl(rax, 2146435072); - __ jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); // Branch only if x is negative INF - __ movsd(xmm0, ExternalAddress(INF), r11 /*rscratch*/); - __ jmp(B1_4); - - __ bind(L_2TAG_PACKET_6_0_1); - __ movsd(xmm0, ExternalAddress(NEG_INF), r11 /*rscratch*/); - __ jmp(B1_4); - - __ bind(L_2TAG_PACKET_5_0_1); - __ movsd(xmm0, Address(rsp)); __ addsd(xmm0, xmm0); - __ movq(Address(rsp, 8), xmm0); __ bind(B1_4); - __ addq(rsp, 24); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); From 59201ed96c1533ce8722c56688631d84c89d9a90 Mon Sep 17 00:00:00 2001 From: Mohamed Issa Date: Tue, 24 Jun 2025 15:55:11 -0700 Subject: [PATCH 2/3] Make absolute mask memory constant 16 byte aligned for compatibility with andpd instruction --- src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp index bb77c2935963d..aeee1b800fe82 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp @@ -46,7 +46,7 @@ // /******************************************************************************/ -ATTRIBUTE_ALIGNED(4) static const juint _ABS_MASK[] = +ATTRIBUTE_ALIGNED(16) static const juint _ABS_MASK[] = { 4294967295, 2147483647 }; From 615169d8aa679c665ac4c5ad30ea011505e503b7 Mon Sep 17 00:00:00 2001 From: Mohamed Issa Date: Thu, 26 Jun 2025 18:36:29 -0700 Subject: [PATCH 3/3] Ensure ABS_MASK is a 128-bit memory sized location and only use equal enum for UCOMISD checks --- src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp index aeee1b800fe82..6faa2081fb27e 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp @@ -46,9 +46,10 @@ // /******************************************************************************/ +/* Represents 0x7FFFFFFFFFFFFFFF double precision in lower 64 bits*/ ATTRIBUTE_ALIGNED(16) static const juint _ABS_MASK[] = { - 4294967295, 2147483647 + 4294967295, 2147483647, 0, 0 }; ATTRIBUTE_ALIGNED(4) static const juint _SIG_MASK[] = @@ -214,7 +215,7 @@ address StubGenerator::generate_libmCbrt() { __ bind(B1_1); __ ucomisd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/); - __ jcc(Assembler::zero, L_2TAG_PACKET_1_0_1); // Branch only if x is +/- zero or NaN + __ jcc(Assembler::equal, L_2TAG_PACKET_1_0_1); // Branch only if x is +/- zero or NaN __ movq(xmm1, xmm0); __ andpd(xmm1, ExternalAddress(ABS_MASK), r11 /*rscratch*/); __ ucomisd(xmm1, ExternalAddress(INF), r11 /*rscratch*/);