Skip to content

Commit 0881f2b

Browse files
author
Sandhya Viswanathan
committed
8318562: Computational test more than 2x slower when AVX instructions are used
Reviewed-by: kvn
1 parent 9b372e2 commit 0881f2b

File tree

4 files changed

+247
-2
lines changed

4 files changed

+247
-2
lines changed

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,6 +1871,92 @@ void MacroAssembler::cmpoop(Register src1, jobject src2, Register rscratch) {
18711871
}
18721872
#endif
18731873

1874+
void MacroAssembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1875+
if ((UseAVX > 0) && (dst != src)) {
1876+
xorpd(dst, dst);
1877+
}
1878+
Assembler::cvtss2sd(dst, src);
1879+
}
1880+
1881+
void MacroAssembler::cvtss2sd(XMMRegister dst, Address src) {
1882+
if (UseAVX > 0) {
1883+
xorpd(dst, dst);
1884+
}
1885+
Assembler::cvtss2sd(dst, src);
1886+
}
1887+
1888+
void MacroAssembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1889+
if ((UseAVX > 0) && (dst != src)) {
1890+
xorps(dst, dst);
1891+
}
1892+
Assembler::cvtsd2ss(dst, src);
1893+
}
1894+
1895+
void MacroAssembler::cvtsd2ss(XMMRegister dst, Address src) {
1896+
if (UseAVX > 0) {
1897+
xorps(dst, dst);
1898+
}
1899+
Assembler::cvtsd2ss(dst, src);
1900+
}
1901+
1902+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Register src) {
1903+
if (UseAVX > 0) {
1904+
xorpd(dst, dst);
1905+
}
1906+
Assembler::cvtsi2sdl(dst, src);
1907+
}
1908+
1909+
void MacroAssembler::cvtsi2sdl(XMMRegister dst, Address src) {
1910+
if (UseAVX > 0) {
1911+
xorpd(dst, dst);
1912+
}
1913+
Assembler::cvtsi2sdl(dst, src);
1914+
}
1915+
1916+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Register src) {
1917+
if (UseAVX > 0) {
1918+
xorps(dst, dst);
1919+
}
1920+
Assembler::cvtsi2ssl(dst, src);
1921+
}
1922+
1923+
void MacroAssembler::cvtsi2ssl(XMMRegister dst, Address src) {
1924+
if (UseAVX > 0) {
1925+
xorps(dst, dst);
1926+
}
1927+
Assembler::cvtsi2ssl(dst, src);
1928+
}
1929+
1930+
#ifdef _LP64
1931+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Register src) {
1932+
if (UseAVX > 0) {
1933+
xorpd(dst, dst);
1934+
}
1935+
Assembler::cvtsi2sdq(dst, src);
1936+
}
1937+
1938+
void MacroAssembler::cvtsi2sdq(XMMRegister dst, Address src) {
1939+
if (UseAVX > 0) {
1940+
xorpd(dst, dst);
1941+
}
1942+
Assembler::cvtsi2sdq(dst, src);
1943+
}
1944+
1945+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Register src) {
1946+
if (UseAVX > 0) {
1947+
xorps(dst, dst);
1948+
}
1949+
Assembler::cvtsi2ssq(dst, src);
1950+
}
1951+
1952+
void MacroAssembler::cvtsi2ssq(XMMRegister dst, Address src) {
1953+
if (UseAVX > 0) {
1954+
xorps(dst, dst);
1955+
}
1956+
Assembler::cvtsi2ssq(dst, src);
1957+
}
1958+
#endif // _LP64
1959+
18741960
void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch) {
18751961
assert(rscratch != noreg || always_reachable(adr), "missing");
18761962

src/hotspot/cpu/x86/macroAssembler_x86.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,23 @@ class MacroAssembler: public Assembler {
800800

801801
void cmpxchgptr(Register reg, Address adr);
802802

803+
804+
// cvt instructions
805+
void cvtss2sd(XMMRegister dst, XMMRegister src);
806+
void cvtss2sd(XMMRegister dst, Address src);
807+
void cvtsd2ss(XMMRegister dst, XMMRegister src);
808+
void cvtsd2ss(XMMRegister dst, Address src);
809+
void cvtsi2sdl(XMMRegister dst, Register src);
810+
void cvtsi2sdl(XMMRegister dst, Address src);
811+
void cvtsi2ssl(XMMRegister dst, Register src);
812+
void cvtsi2ssl(XMMRegister dst, Address src);
813+
#ifdef _LP64
814+
void cvtsi2sdq(XMMRegister dst, Register src);
815+
void cvtsi2sdq(XMMRegister dst, Address src);
816+
void cvtsi2ssq(XMMRegister dst, Register src);
817+
void cvtsi2ssq(XMMRegister dst, Address src);
818+
#endif
819+
803820
void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg);
804821

805822
void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10149,7 +10149,7 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
1014910149
instruct convF2D_reg_reg(regD dst, regF src)
1015010150
%{
1015110151
match(Set dst (ConvF2D src));
10152-
10152+
effect(TEMP dst);
1015310153
format %{ "cvtss2sd $dst, $src" %}
1015410154
ins_encode %{
1015510155
__ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
@@ -10171,7 +10171,7 @@ instruct convF2D_reg_mem(regD dst, memory src)
1017110171
instruct convD2F_reg_reg(regF dst, regD src)
1017210172
%{
1017310173
match(Set dst (ConvD2F src));
10174-
10174+
effect(TEMP dst);
1017510175
format %{ "cvtsd2ss $dst, $src" %}
1017610176
ins_encode %{
1017710177
__ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.vm.compiler;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.OutputTimeUnit;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
37+
import java.util.concurrent.TimeUnit;
38+
39+
@State(Scope.Thread)
40+
@BenchmarkMode(Mode.AverageTime)
41+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
42+
@Warmup(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
43+
@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
44+
@Fork(value = 3)
45+
public class ComputePI {
46+
47+
@Benchmark
48+
public double compute_pi_int_dbl() {
49+
double pi = 4.0;
50+
boolean sign = false;
51+
52+
for (int i = 3; i < 1000; i += 2) {
53+
if (sign) {
54+
pi += 4.0 / i;
55+
} else {
56+
pi -= 4.0 / i;
57+
}
58+
sign = !sign;
59+
}
60+
return pi;
61+
}
62+
63+
@Benchmark
64+
public double compute_pi_int_flt() {
65+
float pi = 4.0f;
66+
boolean sign = false;
67+
68+
for (int i = 3; i < 1000; i += 2) {
69+
if (sign) {
70+
pi += 4.0f / i;
71+
} else {
72+
pi -= 4.0f / i;
73+
}
74+
sign = !sign;
75+
}
76+
return pi;
77+
}
78+
79+
@Benchmark
80+
public double compute_pi_long_dbl() {
81+
double pi = 4.0;
82+
boolean sign = false;
83+
84+
for (long i = 3; i < 1000; i += 2) {
85+
if (sign) {
86+
pi += 4.0 / i;
87+
} else {
88+
pi -= 4.0 / i;
89+
}
90+
sign = !sign;
91+
}
92+
return pi;
93+
}
94+
95+
@Benchmark
96+
public double compute_pi_long_flt() {
97+
float pi = 4.0f;
98+
boolean sign = false;
99+
100+
for (long i = 3; i < 1000; i += 2) {
101+
if (sign) {
102+
pi += 4.0f / i;
103+
} else {
104+
pi -= 4.0f / i;
105+
}
106+
sign = !sign;
107+
}
108+
return pi;
109+
}
110+
111+
@Benchmark
112+
public double compute_pi_flt_dbl() {
113+
double pi = 4.0;
114+
boolean sign = false;
115+
116+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
117+
if (sign) {
118+
pi += 4.0 / i;
119+
} else {
120+
pi -= 4.0 / i;
121+
}
122+
sign = !sign;
123+
}
124+
return pi;
125+
}
126+
127+
@Benchmark
128+
public double compute_pi_dbl_flt() {
129+
float pi = 4.0f;
130+
boolean sign = false;
131+
132+
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
133+
if (sign) {
134+
pi += 4.0f / i;
135+
} else {
136+
pi -= 4.0f / i;
137+
}
138+
sign = !sign;
139+
}
140+
return pi;
141+
}
142+
}

0 commit comments

Comments
 (0)