Skip to content

Commit 98d75f1

Browse files
Bhavana Kilambinick-arm
Bhavana Kilambi
authored andcommitted
8299038: Add AArch64 backend support for auto-vectorized FP16 conversions
Reviewed-by: xgong, ngasson
1 parent cac72a6 commit 98d75f1

File tree

6 files changed

+209
-77
lines changed

6 files changed

+209
-77
lines changed

src/hotspot/cpu/aarch64/aarch64_vector.ad

+48-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
2-
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
3-
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
2+
// Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
3+
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
44
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
//
66
// This code is free software; you can redistribute it and/or modify it
@@ -4159,6 +4159,52 @@ instruct vcvtDtoF_gt64b(vReg dst, vReg src, vReg tmp) %{
41594159
ins_pipe(pipe_slow);
41604160
%}
41614161

4162+
// VectorCastHF2F
4163+
4164+
instruct vcvtHFtoF(vReg dst, vReg src) %{
4165+
match(Set dst (VectorCastHF2F src));
4166+
format %{ "vcvtHFtoF $dst, $src" %}
4167+
ins_encode %{
4168+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4169+
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4170+
// 4HF to 4F
4171+
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
4172+
} else {
4173+
assert(UseSVE > 0, "must be sve");
4174+
__ sve_vector_extend($dst$$FloatRegister, __ S, $src$$FloatRegister, __ H);
4175+
__ sve_fcvt($dst$$FloatRegister, __ S, ptrue, $dst$$FloatRegister, __ H);
4176+
}
4177+
%}
4178+
ins_pipe(pipe_slow);
4179+
%}
4180+
4181+
// VectorCastF2HF
4182+
4183+
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
4184+
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
4185+
match(Set dst (VectorCastF2HF src));
4186+
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
4187+
ins_encode %{
4188+
// 4F to 4HF
4189+
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
4190+
%}
4191+
ins_pipe(pipe_slow);
4192+
%}
4193+
4194+
instruct vcvtFtoHF_sve(vReg dst, vReg src, vReg tmp) %{
4195+
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
4196+
match(Set dst (VectorCastF2HF src));
4197+
effect(TEMP_DEF dst, TEMP tmp);
4198+
format %{ "vcvtFtoHF_sve $dst, $src\t# KILL $tmp" %}
4199+
ins_encode %{
4200+
assert(UseSVE > 0, "must be sve");
4201+
__ sve_fcvt($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister, __ S);
4202+
__ sve_vector_narrow($dst$$FloatRegister, __ H,
4203+
$dst$$FloatRegister, __ S, $tmp$$FloatRegister);
4204+
%}
4205+
ins_pipe(pipe_slow);
4206+
%}
4207+
41624208
// ------------------------------ Replicate ------------------------------------
41634209

41644210
// replicate from reg

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

+48-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
2-
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
3-
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
2+
// Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
3+
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
44
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
//
66
// This code is free software; you can redistribute it and/or modify it
@@ -2731,6 +2731,52 @@ instruct vcvtDtoF_gt64b(vReg dst, vReg src, vReg tmp) %{
27312731
ins_pipe(pipe_slow);
27322732
%}
27332733

2734+
// VectorCastHF2F
2735+
2736+
instruct vcvtHFtoF(vReg dst, vReg src) %{
2737+
match(Set dst (VectorCastHF2F src));
2738+
format %{ "vcvtHFtoF $dst, $src" %}
2739+
ins_encode %{
2740+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
2741+
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
2742+
// 4HF to 4F
2743+
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
2744+
} else {
2745+
assert(UseSVE > 0, "must be sve");
2746+
__ sve_vector_extend($dst$$FloatRegister, __ S, $src$$FloatRegister, __ H);
2747+
__ sve_fcvt($dst$$FloatRegister, __ S, ptrue, $dst$$FloatRegister, __ H);
2748+
}
2749+
%}
2750+
ins_pipe(pipe_slow);
2751+
%}
2752+
2753+
// VectorCastF2HF
2754+
2755+
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
2756+
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
2757+
match(Set dst (VectorCastF2HF src));
2758+
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
2759+
ins_encode %{
2760+
// 4F to 4HF
2761+
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
2762+
%}
2763+
ins_pipe(pipe_slow);
2764+
%}
2765+
2766+
instruct vcvtFtoHF_sve(vReg dst, vReg src, vReg tmp) %{
2767+
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
2768+
match(Set dst (VectorCastF2HF src));
2769+
effect(TEMP_DEF dst, TEMP tmp);
2770+
format %{ "vcvtFtoHF_sve $dst, $src\t# KILL $tmp" %}
2771+
ins_encode %{
2772+
assert(UseSVE > 0, "must be sve");
2773+
__ sve_fcvt($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister, __ S);
2774+
__ sve_vector_narrow($dst$$FloatRegister, __ H,
2775+
$dst$$FloatRegister, __ S, $tmp$$FloatRegister);
2776+
%}
2777+
ins_pipe(pipe_slow);
2778+
%}
2779+
27342780
// ------------------------------ Replicate ------------------------------------
27352781

27362782
dnl REPLICATE_INT($1, $2, $3 )

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

+23-3
Original file line numberDiff line numberDiff line change
@@ -3943,9 +3943,29 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
39433943
starti;
39443944
assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
39453945
T_src != T_dst, "invalid register variant");
3946-
guarantee(T_src != H && T_dst != H, "half-precision unsupported");
3947-
f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18);
3948-
f(T_dst, 17, 16), f(0b101, 15, 13);
3946+
// The encodings of fields op1 (bits 17-16) and op2 (bits 23-22)
3947+
// depend on T_src and T_dst as given below -
3948+
// +-----+------+---------------------------------------------+
3949+
// | op2 | op1 | Instruction Details |
3950+
// +-----+------+---------------------------------------------+
3951+
// | 10 | 01 | FCVT - half-precision to single-precision |
3952+
// | 11 | 01 | FCVT - half-precision to double-precision |
3953+
// | 10 | 00 | FCVT - single-precision to half-precision |
3954+
// | 11 | 11 | FCVT - single-precision to double-precision |
3955+
// | 11 | 00 | FCVT - double-preciison to half-precision |
3956+
// | 11 | 10 | FCVT - double-precision to single-precision |
3957+
// +-----+------+---+-----------------------------------------+
3958+
int op1 = 0b00;
3959+
int op2 = (T_src == D || T_dst == D) ? 0b11 : 0b10;
3960+
if (T_src == H) {
3961+
op1 = 0b01;
3962+
} else if (T_dst == S) {
3963+
op1 = 0b10;
3964+
} else if (T_dst == D) {
3965+
op1 = 0b11;
3966+
}
3967+
f(0b01100101, 31, 24), f(op2, 23, 22), f(0b0010, 21, 18);
3968+
f(op1, 17, 16), f(0b101, 15, 13);
39493969
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
39503970
}
39513971

test/hotspot/gtest/aarch64/aarch64-asmtest.py

+4
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,10 @@ def generate(kind, names):
17721772
["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ H);", "scvtf\tz6.h, p3/m, z1.h"],
17731773
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
17741774
["fcvt", "__ sve_fcvt(z1, __ S, p3, z0, __ D);", "fcvt\tz1.s, p3/m, z0.d"],
1775+
["fcvt", "__ sve_fcvt(z5, __ S, p3, z4, __ H);", "fcvt\tz5.s, p3/m, z4.h"],
1776+
["fcvt", "__ sve_fcvt(z1, __ H, p3, z0, __ S);", "fcvt\tz1.h, p3/m, z0.s"],
1777+
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ H);", "fcvt\tz5.d, p3/m, z4.h"],
1778+
["fcvt", "__ sve_fcvt(z1, __ H, p3, z0, __ D);", "fcvt\tz1.h, p3/m, z0.d"],
17751779
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);", "fcvtzs\tz19.d, p2/m, z1.d"],
17761780
["fcvtzs", "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);", "fcvtzs\tz9.s, p1/m, z8.s"],
17771781
["fcvtzs", "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);", "fcvtzs\tz1.s, p2/m, z0.d"],

test/hotspot/gtest/aarch64/asmtest.out.h

+26-21
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,10 @@
915915
__ sve_scvtf(z6, __ H, p3, z1, __ H); // scvtf z6.h, p3/m, z1.h
916916
__ sve_fcvt(z5, __ D, p3, z4, __ S); // fcvt z5.d, p3/m, z4.s
917917
__ sve_fcvt(z1, __ S, p3, z0, __ D); // fcvt z1.s, p3/m, z0.d
918+
__ sve_fcvt(z5, __ S, p3, z4, __ H); // fcvt z5.s, p3/m, z4.h
919+
__ sve_fcvt(z1, __ H, p3, z0, __ S); // fcvt z1.h, p3/m, z0.s
920+
__ sve_fcvt(z5, __ D, p3, z4, __ H); // fcvt z5.d, p3/m, z4.h
921+
__ sve_fcvt(z1, __ H, p3, z0, __ D); // fcvt z1.h, p3/m, z0.d
918922
__ sve_fcvtzs(z19, __ D, p2, z1, __ D); // fcvtzs z19.d, p2/m, z1.d
919923
__ sve_fcvtzs(z9, __ S, p1, z8, __ S); // fcvtzs z9.s, p1/m, z8.s
920924
__ sve_fcvtzs(z1, __ S, p2, z0, __ D); // fcvtzs z1.s, p2/m, z0.d
@@ -1245,30 +1249,30 @@
12451249
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
12461250
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
12471251
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
1248-
0x14000000, 0x17ffffd7, 0x14000405, 0x94000000,
1249-
0x97ffffd4, 0x94000402, 0x3400000a, 0x34fffa2a,
1250-
0x34007fea, 0x35000008, 0x35fff9c8, 0x35007f88,
1251-
0xb400000b, 0xb4fff96b, 0xb4007f2b, 0xb500001d,
1252-
0xb5fff91d, 0xb5007edd, 0x10000013, 0x10fff8b3,
1253-
0x10007e73, 0x90000013, 0x36300016, 0x3637f836,
1254-
0x36307df6, 0x3758000c, 0x375ff7cc, 0x37587d8c,
1252+
0x14000000, 0x17ffffd7, 0x14000409, 0x94000000,
1253+
0x97ffffd4, 0x94000406, 0x3400000a, 0x34fffa2a,
1254+
0x3400806a, 0x35000008, 0x35fff9c8, 0x35008008,
1255+
0xb400000b, 0xb4fff96b, 0xb4007fab, 0xb500001d,
1256+
0xb5fff91d, 0xb5007f5d, 0x10000013, 0x10fff8b3,
1257+
0x10007ef3, 0x90000013, 0x36300016, 0x3637f836,
1258+
0x36307e76, 0x3758000c, 0x375ff7cc, 0x37587e0c,
12551259
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
12561260
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
12571261
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
12581262
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
1259-
0x54007b60, 0x54000001, 0x54fff541, 0x54007b01,
1260-
0x54000002, 0x54fff4e2, 0x54007aa2, 0x54000002,
1261-
0x54fff482, 0x54007a42, 0x54000003, 0x54fff423,
1262-
0x540079e3, 0x54000003, 0x54fff3c3, 0x54007983,
1263-
0x54000004, 0x54fff364, 0x54007924, 0x54000005,
1264-
0x54fff305, 0x540078c5, 0x54000006, 0x54fff2a6,
1265-
0x54007866, 0x54000007, 0x54fff247, 0x54007807,
1266-
0x54000008, 0x54fff1e8, 0x540077a8, 0x54000009,
1267-
0x54fff189, 0x54007749, 0x5400000a, 0x54fff12a,
1268-
0x540076ea, 0x5400000b, 0x54fff0cb, 0x5400768b,
1269-
0x5400000c, 0x54fff06c, 0x5400762c, 0x5400000d,
1270-
0x54fff00d, 0x540075cd, 0x5400000e, 0x54ffefae,
1271-
0x5400756e, 0x5400000f, 0x54ffef4f, 0x5400750f,
1263+
0x54007be0, 0x54000001, 0x54fff541, 0x54007b81,
1264+
0x54000002, 0x54fff4e2, 0x54007b22, 0x54000002,
1265+
0x54fff482, 0x54007ac2, 0x54000003, 0x54fff423,
1266+
0x54007a63, 0x54000003, 0x54fff3c3, 0x54007a03,
1267+
0x54000004, 0x54fff364, 0x540079a4, 0x54000005,
1268+
0x54fff305, 0x54007945, 0x54000006, 0x54fff2a6,
1269+
0x540078e6, 0x54000007, 0x54fff247, 0x54007887,
1270+
0x54000008, 0x54fff1e8, 0x54007828, 0x54000009,
1271+
0x54fff189, 0x540077c9, 0x5400000a, 0x54fff12a,
1272+
0x5400776a, 0x5400000b, 0x54fff0cb, 0x5400770b,
1273+
0x5400000c, 0x54fff06c, 0x540076ac, 0x5400000d,
1274+
0x54fff00d, 0x5400764d, 0x5400000e, 0x54ffefae,
1275+
0x540075ee, 0x5400000f, 0x54ffef4f, 0x5400758f,
12721276
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
12731277
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
12741278
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
@@ -1434,7 +1438,8 @@
14341438
0x05733820, 0x05b238a4, 0x05f138e6, 0x0570396a,
14351439
0x65d0a001, 0x65d6a443, 0x65d4a826, 0x6594ac26,
14361440
0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85,
1437-
0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801,
1441+
0x65caac01, 0x6589ac85, 0x6588ac01, 0x65c9ac85,
1442+
0x65c8ac01, 0x65dea833, 0x659ca509, 0x65d8a801,
14381443
0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601,
14391444
0x052281e0, 0x05238601, 0x04a14026, 0x042244a6,
14401445
0x046344a6, 0x04a444a6, 0x04e544a7, 0x0568aca7,
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -26,70 +26,81 @@
2626
* @bug 8294588
2727
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs
2828
* @requires vm.compiler2.enabled
29-
* @requires os.simpleArch == "x64"
29+
* @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) | os.arch == "aarch64"
3030
* @library /test/lib /
3131
* @run driver compiler.vectorization.TestFloatConversionsVector
3232
*/
3333

3434
package compiler.vectorization;
3535

3636
import compiler.lib.ir_framework.*;
37+
import jdk.test.lib.Asserts;
3738

3839
public class TestFloatConversionsVector {
39-
private static final int ARRLEN = 1024;
40-
private static final int ITERS = 11000;
41-
private static float [] finp;
42-
private static short [] sout;
43-
private static short [] sinp;
44-
private static float [] fout;
40+
private static final int ARRLEN = 1024;
41+
private static final int ITERS = 11000;
42+
private static float [] finp;
43+
private static short [] sout;
44+
private static short [] sinp;
45+
private static float [] fout;
4546

46-
public static void main(String args[]) {
47-
TestFramework.runWithFlags("-XX:-TieredCompilation",
48-
"-XX:CompileThresholdScaling=0.3");
49-
System.out.println("PASSED");
50-
}
47+
public static void main(String args[]) {
48+
TestFramework.runWithFlags("-XX:-TieredCompilation",
49+
"-XX:CompileThresholdScaling=0.3");
50+
System.out.println("PASSED");
51+
}
5152

52-
@Test
53-
@IR(counts = {IRNode.VECTOR_CAST_F2HF, "> 0"}, applyIfCPUFeatureOr = {"avx512f", "true", "f16c", "true"})
54-
public void test_float_float16(short[] sout, float[] finp) {
55-
for (int i = 0; i < finp.length; i++) {
56-
sout[i] = Float.floatToFloat16(finp[i]);
57-
}
58-
}
53+
@Test
54+
@IR(counts = {IRNode.VECTOR_CAST_F2HF, "> 0"})
55+
public void test_float_float16(short[] sout, float[] finp) {
56+
for (int i = 0; i < finp.length; i++) {
57+
sout[i] = Float.floatToFloat16(finp[i]);
58+
}
59+
}
5960

60-
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
61-
public void kernel_test_float_float16() {
62-
finp = new float[ARRLEN];
63-
sout = new short[ARRLEN];
61+
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
62+
public void kernel_test_float_float16() {
63+
finp = new float[ARRLEN];
64+
sout = new short[ARRLEN];
6465

65-
for (int i = 0; i < ARRLEN; i++) {
66-
finp[i] = (float) i * 1.4f;
67-
}
66+
for (int i = 0; i < ARRLEN; i++) {
67+
finp[i] = (float) i * 1.4f;
68+
}
6869

69-
for (int i = 0; i < ITERS; i++) {
70-
test_float_float16(sout, finp);
71-
}
72-
}
70+
for (int i = 0; i < ITERS; i++) {
71+
test_float_float16(sout, finp);
72+
}
7373

74-
@Test
75-
@IR(counts = {IRNode.VECTOR_CAST_HF2F, "> 0"}, applyIfCPUFeatureOr = {"avx512f", "true", "f16c", "true"})
76-
public void test_float16_float(float[] fout, short[] sinp) {
77-
for (int i = 0; i < sinp.length; i++) {
78-
fout[i] = Float.float16ToFloat(sinp[i]);
79-
}
80-
}
74+
// Verifying the result
75+
for (int i = 0; i < ARRLEN; i++) {
76+
Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]);
77+
}
78+
}
8179

82-
@Run(test = {"test_float16_float"}, mode = RunMode.STANDALONE)
83-
public void kernel_test_float16_float() {
84-
sinp = new short[ARRLEN];
85-
fout = new float[ARRLEN];
80+
@Test
81+
@IR(counts = {IRNode.VECTOR_CAST_HF2F, "> 0"})
82+
public void test_float16_float(float[] fout, short[] sinp) {
83+
for (int i = 0; i < sinp.length; i++) {
84+
fout[i] = Float.float16ToFloat(sinp[i]);
85+
}
86+
}
8687

87-
for (int i = 0; i < ARRLEN; i++) {
88-
sinp[i] = (short)i;
89-
}
88+
@Run(test = {"test_float16_float"}, mode = RunMode.STANDALONE)
89+
public void kernel_test_float16_float() {
90+
sinp = new short[ARRLEN];
91+
fout = new float[ARRLEN];
9092

91-
for (int i = 0; i < ITERS; i++) {
92-
test_float16_float(fout , sinp);
93-
}
94-
}
93+
for (int i = 0; i < ARRLEN; i++) {
94+
sinp[i] = (short)i;
95+
}
96+
97+
for (int i = 0; i < ITERS; i++) {
98+
test_float16_float(fout, sinp);
99+
}
100+
101+
// Verifying the result
102+
for (int i = 0; i < ARRLEN; i++) {
103+
Asserts.assertEquals(Float.float16ToFloat(sinp[i]), fout[i]);
104+
}
105+
}
95106
}

0 commit comments

Comments
 (0)