Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 193 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
Expand Down Expand Up @@ -2296,6 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_FmaHF:
// UseFMA flag also needs to be checked along with FEAT_FP16
if (!UseFMA || !is_feat_fp16_supported()) {
return false;
}
break;
case Op_AddHF:
case Op_SubHF:
case Op_MulHF:
case Op_DivHF:
case Op_MinHF:
case Op_MaxHF:
case Op_SqrtHF:
// Half-precision floating point scalar operations require FEAT_FP16
// to be available. FEAT_FP16 is enabled if both "fphp" and "asimdhp"
// features are supported.
if (!is_feat_fp16_supported()) {
return false;
}
break;
}

return true; // Per default match rules are supported.
Expand Down Expand Up @@ -4599,6 +4619,15 @@ operand immF0()
interface(CONST_INTER);
%}

// Half Float (FP16) Immediate
operand immH()
%{
match(ConH);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}

//
operand immFPacked()
%{
Expand Down Expand Up @@ -6942,6 +6971,21 @@ instruct loadConD(vRegD dst, immD con) %{
ins_pipe(fp_load_constant_d);
%}

// Load Half Float Constant
// The "ldr" instruction loads a 32-bit word from the constant pool into a
// 32-bit register but only the bottom half will be populated and the top
// 16 bits are zero.
instruct loadConH(vRegF dst, immH con) %{
match(Set dst con);
format %{
"ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t"
%}
ins_encode %{
__ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
%}
ins_pipe(fp_load_constant_s);
%}

// Store Instructions

// Store Byte
Expand Down Expand Up @@ -13606,6 +13650,17 @@ instruct bits_reverse_L(iRegLNoSp dst, iRegL src)
// ============================================================================
// Floating Point Arithmetic Instructions

instruct addHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (AddHF src1 src2));
format %{ "faddh $dst, $src1, $src2" %}
ins_encode %{
__ faddh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_dop_reg_reg_s);
%}

instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (AddF src1 src2));

Expand Down Expand Up @@ -13636,6 +13691,17 @@ instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
ins_pipe(fp_dop_reg_reg_d);
%}

instruct subHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (SubHF src1 src2));
format %{ "fsubh $dst, $src1, $src2" %}
ins_encode %{
__ fsubh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_dop_reg_reg_s);
%}

instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (SubF src1 src2));

Expand Down Expand Up @@ -13666,6 +13732,17 @@ instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
ins_pipe(fp_dop_reg_reg_d);
%}

instruct mulHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (MulHF src1 src2));
format %{ "fmulh $dst, $src1, $src2" %}
ins_encode %{
__ fmulh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_dop_reg_reg_s);
%}

instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (MulF src1 src2));

Expand Down Expand Up @@ -13696,6 +13773,20 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
ins_pipe(fp_dop_reg_reg_d);
%}

// src1 * src2 + src3 (half-precision float)
instruct maddHF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
match(Set dst (FmaHF src3 (Binary src1 src2)));
format %{ "fmaddh $dst, $src1, $src2, $src3" %}
ins_encode %{
assert(UseFMA, "Needs FMA instructions support.");
__ fmaddh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister,
$src3$$FloatRegister);
%}
ins_pipe(pipe_class_default);
%}

// src1 * src2 + src3
instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
match(Set dst (FmaF src3 (Binary src1 src2)));
Expand Down Expand Up @@ -13837,6 +13928,29 @@ instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zer
ins_pipe(pipe_class_default);
%}

// Math.max(HH)H (half-precision float)
instruct maxHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (MaxHF src1 src2));
format %{ "fmaxh $dst, $src1, $src2" %}
ins_encode %{
__ fmaxh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_dop_reg_reg_s);
%}

// Math.min(HH)H (half-precision float)
instruct minHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (MinHF src1 src2));
format %{ "fminh $dst, $src1, $src2" %}
ins_encode %{
__ fminh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_dop_reg_reg_s);
%}

// Math.max(FF)F
instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
Expand Down Expand Up @@ -13894,6 +14008,16 @@ instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
ins_pipe(fp_dop_reg_reg_d);
%}

instruct divHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (DivHF src1 src2));
format %{ "fdivh $dst, $src1, $src2" %}
ins_encode %{
__ fdivh($dst$$FloatRegister,
$src1$$FloatRegister,
$src2$$FloatRegister);
%}
ins_pipe(fp_div_s);
%}

instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (DivF src1 src2));
Expand Down Expand Up @@ -14067,6 +14191,16 @@ instruct sqrtF_reg(vRegF dst, vRegF src) %{
ins_pipe(fp_div_d);
%}

instruct sqrtHF_reg(vRegF dst, vRegF src) %{
match(Set dst (SqrtHF src));
format %{ "fsqrth $dst, $src" %}
ins_encode %{
__ fsqrth($dst$$FloatRegister,
$src$$FloatRegister);
%}
ins_pipe(fp_div_s);
%}

// Math.rint, floor, ceil
instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{
match(Set dst (RoundDoubleMode src rmode));
Expand Down Expand Up @@ -17116,6 +17250,64 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
ins_pipe(pipe_slow);
%}

//----------------------------- Reinterpret ----------------------------------
// Reinterpret a half-precision float value in a floating point register to a general purpose register
instruct reinterpretHF2S(iRegINoSp dst, vRegF src) %{
match(Set dst (ReinterpretHF2S src));
format %{ "reinterpretHF2S $dst, $src" %}
ins_encode %{
__ smov($dst$$Register, $src$$FloatRegister, __ H, 0);
%}
ins_pipe(pipe_slow);
%}

// Reinterpret a half-precision float value in a general purpose register to a floating point register
instruct reinterpretS2HF(vRegF dst, iRegINoSp src) %{
match(Set dst (ReinterpretS2HF src));
format %{ "reinterpretS2HF $dst, $src" %}
ins_encode %{
__ mov($dst$$FloatRegister, __ H, 0, $src$$Register);
%}
ins_pipe(pipe_slow);
%}

// Without this optimization, ReinterpretS2HF (ConvF2HF src) would result in the following
// instructions (the first two are for ConvF2HF and the last instruction is for ReinterpretS2HF) -
// fcvt $tmp1_fpr, $src_fpr // Convert float to half-precision float
// mov $tmp2_gpr, $tmp1_fpr // Move half-precision float in FPR to a GPR
// mov $dst_fpr, $tmp2_gpr // Move the result from a GPR to an FPR
// The move from FPR to GPR in ConvF2HF and the move from GPR to FPR in ReinterpretS2HF
// can be omitted in this pattern, resulting in -
// fcvt $dst, $src // Convert float to half-precision float
instruct convF2HFAndS2HF(vRegF dst, vRegF src)
%{
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
format %{ "convF2HFAndS2HF $dst, $src" %}
ins_encode %{
__ fcvtsh($dst$$FloatRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

// Without this optimization, ConvHF2F (ReinterpretHF2S src) would result in the following
// instructions (the first one is for ReinterpretHF2S and the last two are for ConvHF2F) -
// mov $tmp1_gpr, $src_fpr // Move the half-precision float from an FPR to a GPR
// mov $tmp2_fpr, $tmp1_gpr // Move the same value from GPR to an FPR
// fcvt $dst_fpr, $tmp2_fpr // Convert the half-precision float to 32-bit float
// The move from FPR to GPR in ReinterpretHF2S and the move from GPR to FPR in ConvHF2F
// can be omitted as the input (src) is already in an FPR required for the fcvths instruction
// resulting in -
// fcvt $dst, $src // Convert half-precision float to a 32-bit float
instruct convHF2SAndHF2F(vRegF dst, vRegF src)
%{
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
format %{ "convHF2SAndHF2F $dst, $src" %}
ins_encode %{
__ fcvths($dst$$FloatRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

// ============================================================================
// This name is KNOWN by the ADLC and cannot be changed.
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
Expand Down
80 changes: 57 additions & 23 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2032,6 +2032,8 @@ void mvnw(Register Rd, Register Rm,
INSN(fsqrtd, 0b01, 0b000011);
INSN(fcvtd, 0b01, 0b000100); // Double-precision to single-precision

INSN(fsqrth, 0b11, 0b000011); // Half-precision sqrt

private:
void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
Expand Down Expand Up @@ -2059,37 +2061,68 @@ void mvnw(Register Rd, Register Rm,
#undef INSN

// Floating-point data-processing (2 source)
void data_processing(unsigned op31, unsigned type, unsigned opcode,
void data_processing(unsigned op31, unsigned type, unsigned opcode, unsigned op21,
FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
starti;
f(op31, 31, 29);
f(0b11110, 28, 24);
f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
f(type, 23, 22), f(op21, 21), f(opcode, 15, 10);
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
}

#define INSN(NAME, op31, type, opcode) \
#define INSN(NAME, op31, type, opcode, op21) \
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
data_processing(op31, type, opcode, Vd, Vn, Vm); \
}

INSN(fabds, 0b011, 0b10, 0b110101);
INSN(fmuls, 0b000, 0b00, 0b000010);
INSN(fdivs, 0b000, 0b00, 0b000110);
INSN(fadds, 0b000, 0b00, 0b001010);
INSN(fsubs, 0b000, 0b00, 0b001110);
INSN(fmaxs, 0b000, 0b00, 0b010010);
INSN(fmins, 0b000, 0b00, 0b010110);
INSN(fnmuls, 0b000, 0b00, 0b100010);

INSN(fabdd, 0b011, 0b11, 0b110101);
INSN(fmuld, 0b000, 0b01, 0b000010);
INSN(fdivd, 0b000, 0b01, 0b000110);
INSN(faddd, 0b000, 0b01, 0b001010);
INSN(fsubd, 0b000, 0b01, 0b001110);
INSN(fmaxd, 0b000, 0b01, 0b010010);
INSN(fmind, 0b000, 0b01, 0b010110);
INSN(fnmuld, 0b000, 0b01, 0b100010);
data_processing(op31, type, opcode, op21, Vd, Vn, Vm); \
}

INSN(fmuls, 0b000, 0b00, 0b000010, 0b1);
INSN(fdivs, 0b000, 0b00, 0b000110, 0b1);
INSN(fadds, 0b000, 0b00, 0b001010, 0b1);
INSN(fsubs, 0b000, 0b00, 0b001110, 0b1);
INSN(fmaxs, 0b000, 0b00, 0b010010, 0b1);
INSN(fmins, 0b000, 0b00, 0b010110, 0b1);
INSN(fnmuls, 0b000, 0b00, 0b100010, 0b1);

INSN(fmuld, 0b000, 0b01, 0b000010, 0b1);
INSN(fdivd, 0b000, 0b01, 0b000110, 0b1);
INSN(faddd, 0b000, 0b01, 0b001010, 0b1);
INSN(fsubd, 0b000, 0b01, 0b001110, 0b1);
INSN(fmaxd, 0b000, 0b01, 0b010010, 0b1);
INSN(fmind, 0b000, 0b01, 0b010110, 0b1);
INSN(fnmuld, 0b000, 0b01, 0b100010, 0b1);

// Half-precision floating-point instructions
INSN(fmulh, 0b000, 0b11, 0b000010, 0b1);
INSN(fdivh, 0b000, 0b11, 0b000110, 0b1);
INSN(faddh, 0b000, 0b11, 0b001010, 0b1);
INSN(fsubh, 0b000, 0b11, 0b001110, 0b1);
INSN(fmaxh, 0b000, 0b11, 0b010010, 0b1);
INSN(fminh, 0b000, 0b11, 0b010110, 0b1);
INSN(fnmulh, 0b000, 0b11, 0b100010, 0b1);
#undef INSN

// Advanced SIMD scalar three same
#define INSN(NAME, U, size, opcode) \
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
starti; \
f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(size, 23, 22), f(1, 21); \
rf(Vm, 16), f(opcode, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \
}

INSN(fabds, 0b1, 0b10, 0b11010); // Floating-point Absolute Difference (single-precision)
INSN(fabdd, 0b1, 0b11, 0b11010); // Floating-point Absolute Difference (double-precision)

#undef INSN

// Advanced SIMD scalar three same FP16
#define INSN(NAME, U, a, opcode) \
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
starti; \
f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(a, 23), f(0b10, 22, 21); \
rf(Vm, 16), f(0b00, 15, 14), f(opcode, 13, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \
}

INSN(fabdh, 0b1, 0b1, 0b010); // Floating-point Absolute Difference (half-precision float)

#undef INSN

Expand Down Expand Up @@ -2120,6 +2153,7 @@ void mvnw(Register Rd, Register Rm,
INSN(fnmaddd, 0b000, 0b01, 1, 0);
INSN(fnmsub, 0b000, 0b01, 1, 1);

INSN(fmaddh, 0b000, 0b11, 0, 0); // half-precision fused multiply-add (scalar)
#undef INSN

// Floating-point conditional select
Expand Down
Loading