Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8262355: Support for AVX-512 opmask register allocation. #2768

Closed
wants to merge 23 commits into from
Closed
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9e1c3e0
8262355: Support for AVX-512 opmask register allocation.
Feb 28, 2021
69003aa
8262355: Fix for AARCH64 build failure.
Feb 28, 2021
ffacbc6
8262355: Creating a new ideal type TypeVectMask for mask generating n…
Mar 4, 2021
4eaa3d8
8262355: Some synthetic changes for cleanup.
Mar 4, 2021
4fadca5
8262355 : Review comments resolution and deopt handling for mask gene…
Mar 10, 2021
f82741e
8262355: Fix for hs-minimal and windows build failures.
Mar 10, 2021
72f02a9
8262355: Fix for windows build failure.
Mar 10, 2021
a46b2bf
8262355: Removing object re-materialization handling for mask-generat…
Mar 11, 2021
df16ac0
8262355: Review comments resolution.
Mar 16, 2021
fcf2cce
8262355: Fix build failure
Mar 16, 2021
847fdce
8262355: Review comments resolutions.
Mar 17, 2021
f1748bf
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 17, 2021
29e889e
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 18, 2021
7751342
8262355: Review comments resolution
Mar 18, 2021
661fbda
8262355: Extending Type::isa_vect and Type::is_vect routines to TypeV…
Mar 21, 2021
8a05fbb
8262355: Review comments resolution.
Mar 24, 2021
837428d
8262355: Adding missed safety check.
Mar 25, 2021
13e791a
8262355: Updating copywriter for edited files.
Mar 28, 2021
eeea2d7
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 29, 2021
5aa0730
8262355: Review comments resolutions.
Apr 1, 2021
366641a
8262355: Fix AARCH64 build issue
Apr 1, 2021
d6bec3d
Merge http://github.com/openjdk/jdk into JDK-8262355
Apr 1, 2021
b9810d2
8262355: Rebasing patch, 32bit clean-up.
Apr 2, 2021
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

@@ -467,22 +467,22 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// ----------------------------
// SVE Predicate Registers
// ----------------------------
reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg());
reg_def P0 (SOC, SOC, Op_RegVectMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVectMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVectMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVectMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVectMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVectMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVectMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVectMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVectMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVectMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVectMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVectMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVectMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVectMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVectMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVectMask, 15, p15->as_VMReg());

// ----------------------------
// Special Registers
@@ -2439,6 +2439,14 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}

bool Matcher::supports_vector_variable_shifts(void) {
return true;
}
@@ -5601,7 +5609,7 @@ operand vRegD_V31()
operand pRegGov()
%{
constraint(ALLOC_IN_RC(gov_pr));
match(RegVMask);
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);
@@ -1,5 +1,5 @@
//
// Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -993,6 +993,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

This comment has been minimized.

@nsjian

nsjian Mar 17, 2021
Contributor

Since you have changed this file, I think you need to update the copyright header year from 2020 to 2021. (And all the other files you touched.)


bool Matcher::supports_vector_variable_shifts(void) {
return VM_Version::has_simd();
}
@@ -2156,6 +2156,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
@@ -1546,6 +1546,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
@@ -2452,6 +2452,22 @@ void Assembler::kmovwl(KRegister dst, Address src) {
emit_operand((Register)dst, src);
}

void Assembler::kmovwl(Address dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x91);
emit_operand((Register)src, dst);
}

void Assembler::kmovwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0x90, (0xC0 | encode));
}

void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -1459,6 +1459,8 @@ class Assembler : public AbstractAssembler {
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovwl(Address dst, KRegister src);
void kmovwl(KRegister dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
void kmovql(KRegister dst, KRegister src);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -49,18 +49,18 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
}
}

void C2_MacroAssembler::setvectmask(Register dst, Register src) {
void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::kmovdl(mask, dst);
Assembler::movl(dst, src);
}

void C2_MacroAssembler::restorevectmask() {
void C2_MacroAssembler::restorevectmask(KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::knotwl(k1, k0);
Assembler::knotwl(mask, k0);
}

#if INCLUDE_RTM_OPT
@@ -1893,10 +1893,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}

void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
assert(ArrayCopyPartialInlineSize <= 64,"");
mov64(dst, -1L);
bzhiq(dst, dst, len);
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64

@@ -2154,7 +2155,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}

void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
@@ -2192,14 +2194,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
break;
case 64:
{
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(ktemp, ktemp);
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(ktemp, ktemp);
kortestql(mask, mask);
}
}
break;
@@ -2916,7 +2917,7 @@ void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Regis
// Compare strings, used for char[] and byte[].
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae) {
XMMRegister vec1, int ae, KRegister mask) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
@@ -3069,12 +3070,12 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
} else {
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
}
kortestql(k7, k7);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
addptr(result, stride2x2); // update since we already compared at this addr
subl(cnt2, stride2x2); // and sub the size too
@@ -3258,7 +3259,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,

bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);

kmovql(cnt1, k7);
kmovql(cnt1, mask);
notq(cnt1);
bsfq(cnt2, cnt1);
if (ae != StrIntrinsicNode::LL) {
@@ -3307,7 +3308,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// }
void C2_MacroAssembler::has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2) {
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
// rsi: byte array
// rcx: len
// rax: result
@@ -3339,8 +3340,8 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,

bind(test_64_loop);
// Check whether our 64 elements of size byte contain negatives
evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(k2, k2);
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(mask1, mask1);
jcc(Assembler::notZero, TRUE_LABEL);

addptr(len, 64);
@@ -3357,7 +3358,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
notq(tmp3_aliased);
kmovql(k3, tmp3_aliased);
kmovql(mask2, tmp3_aliased);
#else
Label k_init;
jmp(k_init);
@@ -3382,11 +3383,11 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
lea(len, InternalAddress(tmp));
// create mask to test for negative byte inside a vector
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);

#endif
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(k2, k3);
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
jcc(Assembler::notZero, TRUE_LABEL);

jmp(FALSE_LABEL);
@@ -3513,7 +3514,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char) {
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;

@@ -3576,8 +3577,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop

evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
addptr(limit, 64); // update since we already compared at this addr
cmpl(limit, -64);
@@ -3594,8 +3595,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
//
addptr(result, -64); // it is safe, bc we just came from this area
evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare

jmp(TRUE_LABEL);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,8 +31,8 @@
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);

// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
void setvectmask(Register dst, Register src, KRegister mask);
void restorevectmask(KRegister mask);

// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
@@ -131,7 +131,7 @@

// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);

// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
@@ -146,7 +146,7 @@
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(Register dst, Register len, Register temp);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64

// dst = reduce(op, src2) using vtmp as temps
@@ -244,17 +244,17 @@
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
XMMRegister vec1, int ae, KRegister mask = knoreg);

// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);

// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);

#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
ProTip! Use n and p to navigate between commits in a pull request.