Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8262355: Support for AVX-512 opmask register allocation. #2768

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9e1c3e0
8262355: Support for AVX-512 opmask register allocation.
Feb 28, 2021
69003aa
8262355: Fix for AARCH64 build failure.
Feb 28, 2021
ffacbc6
8262355: Creating a new ideal type TypeVectMask for mask generating n…
Mar 4, 2021
4eaa3d8
8262355: Some synthetic changes for cleanup.
Mar 4, 2021
4fadca5
8262355 : Review comments resolution and deopt handling for mask gene…
Mar 10, 2021
f82741e
8262355: Fix for hs-minimal and windows build failures.
Mar 10, 2021
72f02a9
8262355: Fix for windows build failure.
Mar 10, 2021
a46b2bf
8262355: Removing object re-materialization handling for mask-generat…
Mar 11, 2021
df16ac0
8262355: Review comments resolution.
Mar 16, 2021
fcf2cce
8262355: Fix build failure
Mar 16, 2021
847fdce
8262355: Review comments resolutions.
Mar 17, 2021
f1748bf
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 17, 2021
29e889e
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 18, 2021
7751342
8262355: Review comments resolution
Mar 18, 2021
661fbda
8262355: Extending Type::isa_vect and Type::is_vect routines to TypeV…
Mar 21, 2021
8a05fbb
8262355: Review comments resolution.
Mar 24, 2021
837428d
8262355: Adding missed safety check.
Mar 25, 2021
13e791a
8262355: Updating copywriter for edited files.
Mar 28, 2021
eeea2d7
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8262355
Mar 29, 2021
5aa0730
8262355: Review comments resolutions.
Apr 1, 2021
366641a
8262355: Fix AARCH64 build issue
Apr 1, 2021
d6bec3d
Merge http://github.com/openjdk/jdk into JDK-8262355
Apr 1, 2021
b9810d2
8262355: Rebasing patch, 32bit clean-up.
Apr 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -2439,6 +2439,14 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}

bool Matcher::supports_vector_variable_shifts(void) {
return true;
}
Expand Down
10 changes: 9 additions & 1 deletion src/hotspot/cpu/arm/arm.ad
@@ -1,5 +1,5 @@
//
// Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -993,6 +993,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you have changed this file, I think you need to update the copyright header year from 2020 to 2021. (And all the other files you touched.)


bool Matcher::supports_vector_variable_shifts(void) {
return VM_Version::has_simd();
}
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/cpu/ppc/ppc.ad
Expand Up @@ -2160,6 +2160,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/cpu/s390/s390.ad
Expand Up @@ -1546,6 +1546,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
Expand Down
61 changes: 31 additions & 30 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,18 +49,18 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
}
}

void C2_MacroAssembler::setvectmask(Register dst, Register src) {
void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::kmovdl(mask, dst);
Assembler::movl(dst, src);
}

void C2_MacroAssembler::restorevectmask() {
void C2_MacroAssembler::restorevectmask(KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::knotwl(k1, k0);
Assembler::knotwl(mask, k0);
}

#if INCLUDE_RTM_OPT
Expand Down Expand Up @@ -1893,10 +1893,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}

void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
assert(ArrayCopyPartialInlineSize <= 64,"");
mov64(dst, -1L);
bzhiq(dst, dst, len);
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64

Expand Down Expand Up @@ -2154,7 +2155,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}

void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
Expand Down Expand Up @@ -2192,14 +2194,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
break;
case 64:
{
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(ktemp, ktemp);
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(ktemp, ktemp);
kortestql(mask, mask);
}
}
break;
Expand Down Expand Up @@ -2916,7 +2917,7 @@ void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Regis
// Compare strings, used for char[] and byte[].
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae) {
XMMRegister vec1, int ae, KRegister mask) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
Expand Down Expand Up @@ -3069,12 +3070,12 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
} else {
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
}
kortestql(k7, k7);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
addptr(result, stride2x2); // update since we already compared at this addr
subl(cnt2, stride2x2); // and sub the size too
Expand Down Expand Up @@ -3258,7 +3259,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,

bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);

kmovql(cnt1, k7);
kmovql(cnt1, mask);
notq(cnt1);
bsfq(cnt2, cnt1);
if (ae != StrIntrinsicNode::LL) {
Expand Down Expand Up @@ -3307,7 +3308,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// }
void C2_MacroAssembler::has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2) {
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
// rsi: byte array
// rcx: len
// rax: result
Expand Down Expand Up @@ -3339,8 +3340,8 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,

bind(test_64_loop);
// Check whether our 64 elements of size byte contain negatives
evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(k2, k2);
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(mask1, mask1);
jcc(Assembler::notZero, TRUE_LABEL);

addptr(len, 64);
Expand All @@ -3357,7 +3358,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
notq(tmp3_aliased);
kmovql(k3, tmp3_aliased);
kmovql(mask2, tmp3_aliased);
#else
Label k_init;
jmp(k_init);
Expand All @@ -3382,11 +3383,11 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
lea(len, InternalAddress(tmp));
// create mask to test for negative byte inside a vector
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);

#endif
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(k2, k3);
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
jcc(Assembler::notZero, TRUE_LABEL);

jmp(FALSE_LABEL);
Expand Down Expand Up @@ -3513,7 +3514,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char) {
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;

Expand Down Expand Up @@ -3576,8 +3577,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop

evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
addptr(limit, 64); // update since we already compared at this addr
cmpl(limit, -64);
Expand All @@ -3594,8 +3595,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
//
addptr(result, -64); // it is safe, bc we just came from this area
evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare

jmp(TRUE_LABEL);
Expand Down
16 changes: 8 additions & 8 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -31,8 +31,8 @@
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);

// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
void setvectmask(Register dst, Register src, KRegister mask);
void restorevectmask(KRegister mask);

// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
Expand Down Expand Up @@ -131,7 +131,7 @@

// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);

// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
Expand All @@ -146,7 +146,7 @@
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(Register dst, Register len, Register temp);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64

// dst = reduce(op, src2) using vtmp as temps
Expand Down Expand Up @@ -244,17 +244,17 @@
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
XMMRegister vec1, int ae, KRegister mask = knoreg);

// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);

// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);

#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
4 changes: 2 additions & 2 deletions src/hotspot/cpu/x86/c2_init_x86.cpp
Expand Up @@ -30,7 +30,7 @@

// processor dependent initialization for i486

LP64_ONLY(extern void reg_mask_init();)
extern void reg_mask_init();

void Compile::pd_compiler2_init() {
guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
Expand Down Expand Up @@ -61,5 +61,5 @@ void Compile::pd_compiler2_init() {
OptoReg::invalidate(i);
}
}
LP64_ONLY(reg_mask_init();)
reg_mask_init();
}