Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8262355: Support for AVX-512 opmask register allocation.
Reviewed-by: vlivanov, njian, kvn
  • Loading branch information
Jatin Bhateja committed Apr 4, 2021
1 parent 0780666 commit f084bd2
Show file tree
Hide file tree
Showing 41 changed files with 1,569 additions and 294 deletions.
42 changes: 25 additions & 17 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -467,22 +467,22 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// ----------------------------
// SVE Predicate Registers
// ----------------------------
reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg());
reg_def P0 (SOC, SOC, Op_RegVectMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVectMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVectMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVectMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVectMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVectMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVectMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVectMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVectMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVectMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVectMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVectMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVectMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVectMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVectMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVectMask, 15, p15->as_VMReg());

// ----------------------------
// Special Registers
Expand Down Expand Up @@ -2439,6 +2439,14 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}

bool Matcher::supports_vector_variable_shifts(void) {
return true;
}
Expand Down Expand Up @@ -5601,7 +5609,7 @@ operand vRegD_V31()
operand pRegGov()
%{
constraint(ALLOC_IN_RC(gov_pr));
match(RegVMask);
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);
Expand Down
10 changes: 9 additions & 1 deletion src/hotspot/cpu/arm/arm.ad
@@ -1,5 +1,5 @@
//
// Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -993,6 +993,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return VM_Version::has_simd();
}
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/cpu/ppc/ppc.ad
Expand Up @@ -2156,6 +2156,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/cpu/s390/s390.ad
Expand Up @@ -1546,6 +1546,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}

bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
Expand Down
16 changes: 16 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Expand Up @@ -2452,6 +2452,22 @@ void Assembler::kmovwl(KRegister dst, Address src) {
emit_operand((Register)dst, src);
}

void Assembler::kmovwl(Address dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x91);
emit_operand((Register)src, dst);
}

void Assembler::kmovwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0x90, (0xC0 | encode));
}

void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Expand Up @@ -1459,6 +1459,8 @@ class Assembler : public AbstractAssembler {
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovwl(Address dst, KRegister src);
void kmovwl(KRegister dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
void kmovql(KRegister dst, KRegister src);
Expand Down
61 changes: 31 additions & 30 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,18 +49,18 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
}
}

void C2_MacroAssembler::setvectmask(Register dst, Register src) {
void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::kmovdl(mask, dst);
Assembler::movl(dst, src);
}

void C2_MacroAssembler::restorevectmask() {
void C2_MacroAssembler::restorevectmask(KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::knotwl(k1, k0);
Assembler::knotwl(mask, k0);
}

#if INCLUDE_RTM_OPT
Expand Down Expand Up @@ -1893,10 +1893,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}

void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
assert(ArrayCopyPartialInlineSize <= 64,"");
mov64(dst, -1L);
bzhiq(dst, dst, len);
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64

Expand Down Expand Up @@ -2154,7 +2155,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}

void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
Expand Down Expand Up @@ -2192,14 +2194,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
break;
case 64:
{
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(ktemp, ktemp);
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(ktemp, ktemp);
kortestql(mask, mask);
}
}
break;
Expand Down Expand Up @@ -2916,7 +2917,7 @@ void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Regis
// Compare strings, used for char[] and byte[].
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae) {
XMMRegister vec1, int ae, KRegister mask) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
Expand Down Expand Up @@ -3069,12 +3070,12 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
} else {
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
}
kortestql(k7, k7);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
addptr(result, stride2x2); // update since we already compared at this addr
subl(cnt2, stride2x2); // and sub the size too
Expand Down Expand Up @@ -3258,7 +3259,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,

bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);

kmovql(cnt1, k7);
kmovql(cnt1, mask);
notq(cnt1);
bsfq(cnt2, cnt1);
if (ae != StrIntrinsicNode::LL) {
Expand Down Expand Up @@ -3307,7 +3308,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// }
void C2_MacroAssembler::has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2) {
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
// rsi: byte array
// rcx: len
// rax: result
Expand Down Expand Up @@ -3339,8 +3340,8 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,

bind(test_64_loop);
// Check whether our 64 elements of size byte contain negatives
evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(k2, k2);
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(mask1, mask1);
jcc(Assembler::notZero, TRUE_LABEL);

addptr(len, 64);
Expand All @@ -3357,7 +3358,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
notq(tmp3_aliased);
kmovql(k3, tmp3_aliased);
kmovql(mask2, tmp3_aliased);
#else
Label k_init;
jmp(k_init);
Expand All @@ -3382,11 +3383,11 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
lea(len, InternalAddress(tmp));
// create mask to test for negative byte inside a vector
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);

#endif
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(k2, k3);
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
jcc(Assembler::notZero, TRUE_LABEL);

jmp(FALSE_LABEL);
Expand Down Expand Up @@ -3513,7 +3514,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char) {
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;

Expand Down Expand Up @@ -3576,8 +3577,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop

evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
addptr(limit, 64); // update since we already compared at this addr
cmpl(limit, -64);
Expand All @@ -3594,8 +3595,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
//
addptr(result, -64); // it is safe, bc we just came from this area
evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare

jmp(TRUE_LABEL);
Expand Down
16 changes: 8 additions & 8 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -31,8 +31,8 @@
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);

// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
void setvectmask(Register dst, Register src, KRegister mask);
void restorevectmask(KRegister mask);

// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
Expand Down Expand Up @@ -131,7 +131,7 @@

// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);

// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
Expand All @@ -146,7 +146,7 @@
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(Register dst, Register len, Register temp);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64

// dst = reduce(op, src2) using vtmp as temps
Expand Down Expand Up @@ -244,17 +244,17 @@
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
XMMRegister vec1, int ae, KRegister mask = knoreg);

// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);

// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);

#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

1 comment on commit f084bd2

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.