Skip to content

Commit

Permalink
8295276: AArch64: Add backend support for half float conversion intri…
Browse files Browse the repository at this point in the history
…nsics

This patch adds aarch64 backend support for library intrinsics that
implement conversions between half-precision and single-precision
floats.

Ran the following benchmarks to assess the performance with this patch -

org.openjdk.bench.java.math.Fp16ConversionBenchmark.floatToFloat16
org.openjdk.bench.java.math.Fp16ConversionBenchmark.float16ToFloat

The performance (ops/ms) gain with the patch on an ARM NEON machine is
shown below -

  Benchmark                                      Gain
  Fp16ConversionBenchmark.float16ToFloat         3.42
  Fp16ConversionBenchmark.floatToFloat16         5.85
  • Loading branch information
Bhavana-Kilambi committed Oct 20, 2022
1 parent 9b97162 commit 121f543
Show file tree
Hide file tree
Showing 4 changed files with 658 additions and 624 deletions.
26 changes: 26 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -14578,6 +14578,32 @@ instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
ins_pipe(fp_f2l);
%}

instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{
match(Set dst (ConvF2HF src));
format %{ "fcvt $tmp, $src\t# convert single to half precision\n\t"
"smov $dst, $tmp\t# move result from $tmp to $dst"
%}
effect(TEMP tmp);
ins_encode %{
__ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister);
__ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0);
%}
ins_pipe(pipe_slow);
%}

instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{
match(Set dst (ConvHF2F src));
format %{ "mov $tmp, $src\t# move source from $src to $tmp\n\t"
"fcvt $dst, $tmp\t# convert half to single precision"
%}
effect(TEMP tmp);
ins_encode %{
__ mov($tmp$$FloatRegister, __ H, 0, $src$$Register);
__ fcvths($dst$$FloatRegister, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
match(Set dst (ConvI2F src));

Expand Down
36 changes: 19 additions & 17 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Expand Up @@ -1907,31 +1907,33 @@ void mvnw(Register Rd, Register Rm,
#undef INSN

// Floating-point data-processing (1 source)
void data_processing(unsigned op31, unsigned type, unsigned opcode,
void data_processing(unsigned type, unsigned opcode,
FloatRegister Vd, FloatRegister Vn) {
starti;
f(op31, 31, 29);
f(0b000, 31, 29);
f(0b11110, 28, 24);
f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
rf(Vn, 5), rf(Vd, 0);
}

#define INSN(NAME, op31, type, opcode) \
#define INSN(NAME, type, opcode) \
void NAME(FloatRegister Vd, FloatRegister Vn) { \
data_processing(op31, type, opcode, Vd, Vn); \
}

INSN(fmovs, 0b000, 0b00, 0b000000);
INSN(fabss, 0b000, 0b00, 0b000001);
INSN(fnegs, 0b000, 0b00, 0b000010);
INSN(fsqrts, 0b000, 0b00, 0b000011);
INSN(fcvts, 0b000, 0b00, 0b000101); // Single-precision to double-precision

INSN(fmovd, 0b000, 0b01, 0b000000);
INSN(fabsd, 0b000, 0b01, 0b000001);
INSN(fnegd, 0b000, 0b01, 0b000010);
INSN(fsqrtd, 0b000, 0b01, 0b000011);
INSN(fcvtd, 0b000, 0b01, 0b000100); // Double-precision to single-precision
data_processing(type, opcode, Vd, Vn); \
}

INSN(fmovs, 0b00, 0b000000);
INSN(fabss, 0b00, 0b000001);
INSN(fnegs, 0b00, 0b000010);
INSN(fsqrts, 0b00, 0b000011);
INSN(fcvts, 0b00, 0b000101); // Single-precision to double-precision
INSN(fcvths, 0b11, 0b000100); // Half-precision to single-precision
INSN(fcvtsh, 0b00, 0b000111); // Single-precision to half-precision

INSN(fmovd, 0b01, 0b000000);
INSN(fabsd, 0b01, 0b000001);
INSN(fnegd, 0b01, 0b000010);
INSN(fsqrtd, 0b01, 0b000011);
INSN(fcvtd, 0b01, 0b000100); // Double-precision to single-precision

private:
void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
Expand Down
8 changes: 6 additions & 2 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Expand Up @@ -957,7 +957,9 @@ def cstr(self):
class FloatInstruction(Instruction):

def aname(self):
if (self._name.endswith("s") | self._name.endswith("d")):
if (self._name in ["fcvtsh", "fcvths"]):
return self._name[:len(self._name)-2]
elif (self._name.endswith("s") | self._name.endswith("d")):
return self._name[:len(self._name)-1]
else:
return self._name
Expand Down Expand Up @@ -1012,6 +1014,8 @@ def __init__(self, args):
elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]):
self._width = RegVariant(3, 3)
self._bitwiseop = True
elif name == "revb":
self._width = RegVariant(1, 3)
else:
self._width = RegVariant(0, 3)

Expand Down Expand Up @@ -1458,7 +1462,7 @@ def generate(kind, names):

generate(TwoRegFloatOp,
[["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
["fcvts", "ds"],
["fcvts", "ds"], ["fcvtsh", "hs"], ["fcvths", "sh"],
["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
["fcvtd", "sd"],
])
Expand Down

0 comments on commit 121f543

Please sign in to comment.