Skip to content

Commit 0941f2c

Browse files
Sandhya Viswanathanshipilev
authored andcommitted
8318562: Computational test more than 2x slower when AVX instructions are used
Backport-of: 0881f2b0c43870ed10b1166d04cef9832e58629e
1 parent e7bd0ab commit 0941f2c

File tree

4 files changed

+247
-2
lines changed

4 files changed

+247
-2
lines changed

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,92 @@ void MacroAssembler::cmpoop(Register src1, jobject src2, Register rscratch) {
18781878
}
18791879
#endif
18801880

1881+
void MacroAssembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1882+
if ((UseAVX > 0) && (dst != src)) {
1883+
xorpd(dst, dst);
1884+
}
1885+
Assembler::cvtss2sd(dst, src);
1886+
}
1887+
1888+
void MacroAssembler::cvtss2sd(XMMRegister dst, Address src) {
1889+
if (UseAVX > 0) {
1890+
xorpd(dst, dst);
1891+
}
1892+
Assembler::cvtss2sd(dst, src);
1893+
}
1894+
1895+
void MacroAssembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1896+
if ((UseAVX > 0) && (dst != src)) {
1897+
xorps(dst, dst);
1898+
}
1899+
Assembler::cvtsd2ss(dst, src);
1900+
}
1901+
1902+
void MacroAssembler::cvtsd2ss(XMMRegister dst, Address src) {
1903+
if (UseAVX > 0) {
1904+
xorps(dst, dst);
1905+
}
1906+
Assembler::cvtsd2ss(dst, src);
1907+
}
1908+
1909+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Register src) {
1910+
if (UseAVX > 0) {
1911+
xorpd(dst, dst);
1912+
}
1913+
Assembler::cvtsi2sdl(dst, src);
1914+
}
1915+
1916+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Address src) {
1917+
if (UseAVX > 0) {
1918+
xorpd(dst, dst);
1919+
}
1920+
Assembler::cvtsi2sdl(dst, src);
1921+
}
1922+
1923+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Register src) {
1924+
if (UseAVX > 0) {
1925+
xorps(dst, dst);
1926+
}
1927+
Assembler::cvtsi2ssl(dst, src);
1928+
}
1929+
1930+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Address src) {
1931+
if (UseAVX > 0) {
1932+
xorps(dst, dst);
1933+
}
1934+
Assembler::cvtsi2ssl(dst, src);
1935+
}
1936+
1937+
#ifdef _LP64
1938+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Register src) {
1939+
if (UseAVX > 0) {
1940+
xorpd(dst, dst);
1941+
}
1942+
Assembler::cvtsi2sdq(dst, src);
1943+
}
1944+
1945+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Address src) {
1946+
if (UseAVX > 0) {
1947+
xorpd(dst, dst);
1948+
}
1949+
Assembler::cvtsi2sdq(dst, src);
1950+
}
1951+
1952+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Register src) {
1953+
if (UseAVX > 0) {
1954+
xorps(dst, dst);
1955+
}
1956+
Assembler::cvtsi2ssq(dst, src);
1957+
}
1958+
1959+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Address src) {
1960+
if (UseAVX > 0) {
1961+
xorps(dst, dst);
1962+
}
1963+
Assembler::cvtsi2ssq(dst, src);
1964+
}
1965+
#endif // _LP64
1966+
18811967
void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch) {
18821968
assert(rscratch != noreg || always_reachable(adr), "missing");
18831969

src/hotspot/cpu/x86/macroAssembler_x86.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,23 @@ class MacroAssembler: public Assembler {
799799

800800
void cmpxchgptr(Register reg, Address adr);
801801

802+
803+
// cvt instructions
804+
void cvtss2sd(XMMRegister dst, XMMRegister src);
805+
void cvtss2sd(XMMRegister dst, Address src);
806+
void cvtsd2ss(XMMRegister dst, XMMRegister src);
807+
void cvtsd2ss(XMMRegister dst, Address src);
808+
void cvtsi2sdl(XMMRegister dst, Register src);
809+
void cvtsi2sdl(XMMRegister dst, Address src);
810+
void cvtsi2ssl(XMMRegister dst, Register src);
811+
void cvtsi2ssl(XMMRegister dst, Address src);
812+
#ifdef _LP64
813+
void cvtsi2sdq(XMMRegister dst, Register src);
814+
void cvtsi2sdq(XMMRegister dst, Address src);
815+
void cvtsi2ssq(XMMRegister dst, Register src);
816+
void cvtsi2ssq(XMMRegister dst, Address src);
817+
#endif
818+
802819
void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg);
803820

804821
void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10940,7 +10940,7 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
1094010940
instruct convF2D_reg_reg(regD dst, regF src)
1094110941
%{
1094210942
match(Set dst (ConvF2D src));
10943-
10943+
effect(TEMP dst);
1094410944
format %{ "cvtss2sd $dst, $src" %}
1094510945
ins_encode %{
1094610946
__ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
@@ -10962,7 +10962,7 @@ instruct convF2D_reg_mem(regD dst, memory src)
1096210962
instruct convD2F_reg_reg(regF dst, regD src)
1096310963
%{
1096410964
match(Set dst (ConvD2F src));
10965-
10965+
effect(TEMP dst);
1096610966
format %{ "cvtsd2ss $dst, $src" %}
1096710967
ins_encode %{
1096810968
__ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.vm.compiler;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.OutputTimeUnit;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
37+
import java.util.concurrent.TimeUnit;
38+
39+
@State(Scope.Thread)
40+
@BenchmarkMode(Mode.AverageTime)
41+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
42+
@Warmup(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
43+
@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
44+
@Fork(value = 3)
45+
public class ComputePI {
46+
47+
@Benchmark
48+
public double compute_pi_int_dbl() {
49+
double pi = 4.0;
50+
boolean sign = false;
51+
52+
for (int i = 3; i < 1000; i += 2) {
53+
if (sign) {
54+
pi += 4.0 / i;
55+
} else {
56+
pi -= 4.0 / i;
57+
}
58+
sign = !sign;
59+
}
60+
return pi;
61+
}
62+
63+
@Benchmark
64+
public double compute_pi_int_flt() {
65+
float pi = 4.0f;
66+
boolean sign = false;
67+
68+
for (int i = 3; i < 1000; i += 2) {
69+
if (sign) {
70+
pi += 4.0f / i;
71+
} else {
72+
pi -= 4.0f / i;
73+
}
74+
sign = !sign;
75+
}
76+
return pi;
77+
}
78+
79+
@Benchmark
80+
public double compute_pi_long_dbl() {
81+
double pi = 4.0;
82+
boolean sign = false;
83+
84+
for (long i = 3; i < 1000; i += 2) {
85+
if (sign) {
86+
pi += 4.0 / i;
87+
} else {
88+
pi -= 4.0 / i;
89+
}
90+
sign = !sign;
91+
}
92+
return pi;
93+
}
94+
95+
@Benchmark
96+
public double compute_pi_long_flt() {
97+
float pi = 4.0f;
98+
boolean sign = false;
99+
100+
for (long i = 3; i < 1000; i += 2) {
101+
if (sign) {
102+
pi += 4.0f / i;
103+
} else {
104+
pi -= 4.0f / i;
105+
}
106+
sign = !sign;
107+
}
108+
return pi;
109+
}
110+
111+
@Benchmark
112+
public double compute_pi_flt_dbl() {
113+
double pi = 4.0;
114+
boolean sign = false;
115+
116+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
117+
if (sign) {
118+
pi += 4.0 / i;
119+
} else {
120+
pi -= 4.0 / i;
121+
}
122+
sign = !sign;
123+
}
124+
return pi;
125+
}
126+
127+
@Benchmark
128+
public double compute_pi_dbl_flt() {
129+
float pi = 4.0f;
130+
boolean sign = false;
131+
132+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
133+
if (sign) {
134+
pi += 4.0f / i;
135+
} else {
136+
pi -= 4.0f / i;
137+
}
138+
sign = !sign;
139+
}
140+
return pi;
141+
}
142+
}

0 commit comments

Comments
 (0)