Skip to content

Commit 9a104e0

Browse files
Sandhya Viswanathanshipilev
authored andcommitted
8318562: Computational test more than 2x slower when AVX instructions are used
Reviewed-by: shade Backport-of: 0881f2b0c43870ed10b1166d04cef9832e58629e
1 parent 5ed1926 commit 9a104e0

File tree

4 files changed

+246
-3
lines changed

4 files changed

+246
-3
lines changed

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2026,6 +2026,92 @@ void MacroAssembler::cmpoop(Register src1, jobject src2) {
20262026
}
20272027
#endif
20282028

2029+
void MacroAssembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
2030+
if ((UseAVX > 0) && (dst != src)) {
2031+
xorpd(dst, dst);
2032+
}
2033+
Assembler::cvtss2sd(dst, src);
2034+
}
2035+
2036+
void MacroAssembler::cvtss2sd(XMMRegister dst, Address src) {
2037+
if (UseAVX > 0) {
2038+
xorpd(dst, dst);
2039+
}
2040+
Assembler::cvtss2sd(dst, src);
2041+
}
2042+
2043+
void MacroAssembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
2044+
if ((UseAVX > 0) && (dst != src)) {
2045+
xorps(dst, dst);
2046+
}
2047+
Assembler::cvtsd2ss(dst, src);
2048+
}
2049+
2050+
void MacroAssembler::cvtsd2ss(XMMRegister dst, Address src) {
2051+
if (UseAVX > 0) {
2052+
xorps(dst, dst);
2053+
}
2054+
Assembler::cvtsd2ss(dst, src);
2055+
}
2056+
2057+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Register src) {
2058+
if (UseAVX > 0) {
2059+
xorpd(dst, dst);
2060+
}
2061+
Assembler::cvtsi2sdl(dst, src);
2062+
}
2063+
2064+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Address src) {
2065+
if (UseAVX > 0) {
2066+
xorpd(dst, dst);
2067+
}
2068+
Assembler::cvtsi2sdl(dst, src);
2069+
}
2070+
2071+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Register src) {
2072+
if (UseAVX > 0) {
2073+
xorps(dst, dst);
2074+
}
2075+
Assembler::cvtsi2ssl(dst, src);
2076+
}
2077+
2078+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Address src) {
2079+
if (UseAVX > 0) {
2080+
xorps(dst, dst);
2081+
}
2082+
Assembler::cvtsi2ssl(dst, src);
2083+
}
2084+
2085+
#ifdef _LP64
2086+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Register src) {
2087+
if (UseAVX > 0) {
2088+
xorpd(dst, dst);
2089+
}
2090+
Assembler::cvtsi2sdq(dst, src);
2091+
}
2092+
2093+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Address src) {
2094+
if (UseAVX > 0) {
2095+
xorpd(dst, dst);
2096+
}
2097+
Assembler::cvtsi2sdq(dst, src);
2098+
}
2099+
2100+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Register src) {
2101+
if (UseAVX > 0) {
2102+
xorps(dst, dst);
2103+
}
2104+
Assembler::cvtsi2ssq(dst, src);
2105+
}
2106+
2107+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Address src) {
2108+
if (UseAVX > 0) {
2109+
xorps(dst, dst);
2110+
}
2111+
Assembler::cvtsi2ssq(dst, src);
2112+
}
2113+
#endif // _LP64
2114+
20292115
void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
20302116
if (reachable(adr)) {
20312117
lock();

src/hotspot/cpu/x86/macroAssembler_x86.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,23 @@ class MacroAssembler: public Assembler {
752752

753753
void cmpxchgptr(Register reg, Address adr);
754754

755-
void locked_cmpxchgptr(Register reg, AddressLiteral adr);
755+
// cvt instructions
756+
void cvtss2sd(XMMRegister dst, XMMRegister src);
757+
void cvtss2sd(XMMRegister dst, Address src);
758+
void cvtsd2ss(XMMRegister dst, XMMRegister src);
759+
void cvtsd2ss(XMMRegister dst, Address src);
760+
void cvtsi2sdl(XMMRegister dst, Register src);
761+
void cvtsi2sdl(XMMRegister dst, Address src);
762+
void cvtsi2ssl(XMMRegister dst, Register src);
763+
void cvtsi2ssl(XMMRegister dst, Address src);
764+
#ifdef _LP64
765+
void cvtsi2sdq(XMMRegister dst, Register src);
766+
void cvtsi2sdq(XMMRegister dst, Address src);
767+
void cvtsi2ssq(XMMRegister dst, Register src);
768+
void cvtsi2ssq(XMMRegister dst, Address src);
769+
#endif
756770

771+
void locked_cmpxchgptr(Register reg, AddressLiteral adr);
757772

758773
void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
759774
void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10582,7 +10582,7 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
1058210582
instruct convF2D_reg_reg(regD dst, regF src)
1058310583
%{
1058410584
match(Set dst (ConvF2D src));
10585-
10585+
effect(TEMP dst);
1058610586
format %{ "cvtss2sd $dst, $src" %}
1058710587
ins_encode %{
1058810588
__ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
@@ -10604,7 +10604,7 @@ instruct convF2D_reg_mem(regD dst, memory src)
1060410604
instruct convD2F_reg_reg(regF dst, regD src)
1060510605
%{
1060610606
match(Set dst (ConvD2F src));
10607-
10607+
effect(TEMP dst);
1060810608
format %{ "cvtsd2ss $dst, $src" %}
1060910609
ins_encode %{
1061010610
__ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.vm.compiler;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.OutputTimeUnit;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
37+
import java.util.concurrent.TimeUnit;
38+
39+
@State(Scope.Thread)
40+
@BenchmarkMode(Mode.AverageTime)
41+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
42+
@Warmup(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
43+
@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
44+
@Fork(value = 3)
45+
public class ComputePI {
46+
47+
@Benchmark
48+
public double compute_pi_int_dbl() {
49+
double pi = 4.0;
50+
boolean sign = false;
51+
52+
for (int i = 3; i < 1000; i += 2) {
53+
if (sign) {
54+
pi += 4.0 / i;
55+
} else {
56+
pi -= 4.0 / i;
57+
}
58+
sign = !sign;
59+
}
60+
return pi;
61+
}
62+
63+
@Benchmark
64+
public double compute_pi_int_flt() {
65+
float pi = 4.0f;
66+
boolean sign = false;
67+
68+
for (int i = 3; i < 1000; i += 2) {
69+
if (sign) {
70+
pi += 4.0f / i;
71+
} else {
72+
pi -= 4.0f / i;
73+
}
74+
sign = !sign;
75+
}
76+
return pi;
77+
}
78+
79+
@Benchmark
80+
public double compute_pi_long_dbl() {
81+
double pi = 4.0;
82+
boolean sign = false;
83+
84+
for (long i = 3; i < 1000; i += 2) {
85+
if (sign) {
86+
pi += 4.0 / i;
87+
} else {
88+
pi -= 4.0 / i;
89+
}
90+
sign = !sign;
91+
}
92+
return pi;
93+
}
94+
95+
@Benchmark
96+
public double compute_pi_long_flt() {
97+
float pi = 4.0f;
98+
boolean sign = false;
99+
100+
for (long i = 3; i < 1000; i += 2) {
101+
if (sign) {
102+
pi += 4.0f / i;
103+
} else {
104+
pi -= 4.0f / i;
105+
}
106+
sign = !sign;
107+
}
108+
return pi;
109+
}
110+
111+
@Benchmark
112+
public double compute_pi_flt_dbl() {
113+
double pi = 4.0;
114+
boolean sign = false;
115+
116+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
117+
if (sign) {
118+
pi += 4.0 / i;
119+
} else {
120+
pi -= 4.0 / i;
121+
}
122+
sign = !sign;
123+
}
124+
return pi;
125+
}
126+
127+
@Benchmark
128+
public double compute_pi_dbl_flt() {
129+
float pi = 4.0f;
130+
boolean sign = false;
131+
132+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
133+
if (sign) {
134+
pi += 4.0f / i;
135+
} else {
136+
pi -= 4.0f / i;
137+
}
138+
sign = !sign;
139+
}
140+
return pi;
141+
}
142+
}

0 commit comments

Comments
 (0)