Skip to content

Commit c371782

Browse files
Jose Ricardo ZivianiTheRealMDoerr
authored andcommitted
8255553: [PPC64] Introduce and use setbc and setnbc P10 instructions
Reviewed-by: mdoerr
1 parent 3b9c5a3 commit c371782

File tree

7 files changed

+86
-118
lines changed

7 files changed

+86
-118
lines changed

src/hotspot/cpu/ppc/assembler_ppc.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,9 @@ class Assembler : public AbstractAssembler {
341341
MCRXRX_OPCODE = (31u << OPCODE_SHIFT | 576u << 1),
342342
SETB_OPCODE = (31u << OPCODE_SHIFT | 128u << 1),
343343

344+
SETBC_OPCODE = (31u << OPCODE_SHIFT | 384u << 1),
345+
SETNBC_OPCODE = (31u << OPCODE_SHIFT | 448u << 1),
346+
344347
// condition register logic instructions
345348
CRAND_OPCODE = (19u << OPCODE_SHIFT | 257u << 1),
346349
CRNAND_OPCODE = (19u << OPCODE_SHIFT | 225u << 1),
@@ -1693,6 +1696,12 @@ class Assembler : public AbstractAssembler {
16931696
inline void mcrxrx(ConditionRegister cra);
16941697
inline void setb( Register d, ConditionRegister cra);
16951698

1699+
// >= Power10
1700+
inline void setbc( Register d, int biint);
1701+
inline void setbc( Register d, ConditionRegister cr, Condition cc);
1702+
inline void setnbc(Register d, int biint);
1703+
inline void setnbc(Register d, ConditionRegister cr, Condition cc);
1704+
16961705
// Special purpose registers
16971706
// Exception Register
16981707
inline void mtxer(Register s1);

src/hotspot/cpu/ppc/assembler_ppc.inline.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,17 @@ inline void Assembler::mcrxrx(ConditionRegister cra)
384384
inline void Assembler::setb(Register d, ConditionRegister cra)
385385
{ emit_int32(SETB_OPCODE | rt(d) | bfa(cra)); }
386386

387+
inline void Assembler::setbc(Register d, int biint)
388+
{ emit_int32(SETBC_OPCODE | rt(d) | bi(biint)); }
389+
inline void Assembler::setbc(Register d, ConditionRegister cr, Condition cc) {
390+
setbc(d, bi0(cr, cc));
391+
}
392+
inline void Assembler::setnbc(Register d, int biint)
393+
{ emit_int32(SETNBC_OPCODE | rt(d) | bi(biint)); }
394+
inline void Assembler::setnbc(Register d, ConditionRegister cr, Condition cc) {
395+
setnbc(d, bi0(cr, cc));
396+
}
397+
387398
// Special purpose registers
388399
// Exception Register
389400
inline void Assembler::mtxer(Register s1) { emit_int32(MTXER_OPCODE | rs(s1)); }

src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,7 +1527,6 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
15271527

15281528
void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
15291529
const Register Rdst = dst->as_register();
1530-
Label done;
15311530
if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
15321531
bool is_unordered_less = (code == lir_ucmp_fd2i);
15331532
if (left->is_single_fpu()) {
@@ -1537,18 +1536,13 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
15371536
} else {
15381537
ShouldNotReachHere();
15391538
}
1540-
__ li(Rdst, is_unordered_less ? -1 : 1);
1541-
__ bso(CCR0, done);
1539+
__ set_cmpu3(Rdst, is_unordered_less); // is_unordered_less ? -1 : 1
15421540
} else if (code == lir_cmp_l2i) {
15431541
__ cmpd(CCR0, left->as_register_lo(), right->as_register_lo());
1542+
__ set_cmp3(Rdst); // set result as follows: <: -1, =: 0, >: 1
15441543
} else {
15451544
ShouldNotReachHere();
15461545
}
1547-
__ mfcr(R0); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
1548-
__ srwi(Rdst, R0, 30);
1549-
__ srawi(R0, R0, 31);
1550-
__ orr(Rdst, R0, Rdst); // set result as follows: <: -1, =: 0, >: 1
1551-
__ bind(done);
15521546
}
15531547

15541548

src/hotspot/cpu/ppc/macroAssembler_ppc.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ class MacroAssembler: public Assembler {
155155
//
156156
// branch, jump
157157
//
158+
// set dst to -1, 0, +1 as follows: if CCR0bi is "greater than", dst is set to 1,
159+
// if CCR0bi is "equal", dst is set to 0, otherwise it's set to -1.
160+
void inline set_cmp3(Register dst);
161+
// set dst to (treat_unordered_like_less ? -1 : +1)
162+
void inline set_cmpu3(Register dst, bool treat_unordered_like_less);
158163

159164
inline void pd_patch_instruction(address branch, address target, const char* file, int line);
160165
NOT_PRODUCT(static void pd_print_patched_instruction(address branch);)

src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,31 @@ inline bool MacroAssembler::is_bc_far_variant3_at(address instruction_addr) {
236236
is_endgroup(instruction_2);
237237
}
238238

239+
// set dst to -1, 0, +1 as follows: if CCR0bi is "greater than", dst is set to 1,
240+
// if CCR0bi is "equal", dst is set to 0, otherwise it's set to -1.
241+
inline void MacroAssembler::set_cmp3(Register dst) {
242+
assert_different_registers(dst, R0);
243+
// P10, prefer using setbc intructions
244+
if (VM_Version::has_brw()) {
245+
setbc(R0, CCR0, Assembler::greater); // Set 1 to R0 if CCR0bi is "greater than", otherwise 0
246+
setnbc(dst, CCR0, Assembler::less); // Set -1 to dst if CCR0bi is "less than", otherwise 0
247+
} else {
248+
mfcr(R0); // copy CR register to R0
249+
srwi(dst, R0, 30); // copy the first two bits to dst
250+
srawi(R0, R0, 31); // move the first bit to last position - sign extended
251+
}
252+
orr(dst, dst, R0); // dst | R0 will be -1, 0, or +1
253+
}
254+
255+
// set dst to (treat_unordered_like_less ? -1 : +1)
256+
inline void MacroAssembler::set_cmpu3(Register dst, bool treat_unordered_like_less) {
257+
if (treat_unordered_like_less) {
258+
cror(CCR0, Assembler::less, CCR0, Assembler::summary_overflow); // treat unordered like less
259+
} else {
260+
cror(CCR0, Assembler::greater, CCR0, Assembler::summary_overflow); // treat unordered like greater
261+
}
262+
set_cmp3(dst);
263+
}
239264

240265
// Convenience bc_far versions
241266
inline void MacroAssembler::blt_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, less), L, optimize); }

src/hotspot/cpu/ppc/ppc.ad

Lines changed: 30 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -11385,90 +11385,20 @@ instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero
1138511385
ins_pipe(pipe_class_compare);
1138611386
%}
1138711387

11388-
instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{
11389-
// no match-rule, false predicate
11390-
effect(DEF dst, USE crx);
11391-
predicate(false);
11388+
// Manifest a CmpL3 result in an integer register.
11389+
instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
11390+
match(Set dst (CmpL3 src1 src2));
11391+
effect(KILL cr0);
11392+
ins_cost(DEFAULT_COST * 5);
11393+
size(VM_Version::has_brw() ? 16 : 20);
1139211394

11393-
ins_variable_size_depending_on_alignment(true);
11395+
format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
1139411396

11395-
format %{ "cmovI $crx, $dst, -1, 0, +1" %}
11396-
// Worst case is branch + move + branch + move + stop, no stop without scheduler.
11397-
size(16);
1139811397
ins_encode %{
11399-
Label done;
11400-
// li(Rdst, 0); // equal -> 0
11401-
__ beq($crx$$CondRegister, done);
11402-
__ li($dst$$Register, 1); // greater -> +1
11403-
__ bgt($crx$$CondRegister, done);
11404-
__ li($dst$$Register, -1); // unordered or less -> -1
11405-
__ bind(done);
11406-
%}
11407-
ins_pipe(pipe_class_compare);
11408-
%}
11409-
11410-
instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{
11411-
// no match-rule, false predicate
11412-
effect(DEF dst, USE crx);
11413-
predicate(false);
11414-
11415-
format %{ "CmovI $crx, $dst, -1, 0, +1 \t// postalloc expanded" %}
11416-
postalloc_expand %{
11417-
//
11418-
// replaces
11419-
//
11420-
// region crx
11421-
// \ |
11422-
// dst=cmovI_conIvalueMinus1_conIvalue0_conIvalue1
11423-
//
11424-
// with
11425-
//
11426-
// region
11427-
// \
11428-
// dst=loadConI16(0)
11429-
// |
11430-
// ^ region crx
11431-
// | \ |
11432-
// dst=cmovI_conIvalueMinus1_conIvalue1
11433-
//
11434-
11435-
// Create new nodes.
11436-
MachNode *m1 = new loadConI16Node();
11437-
MachNode *m2 = new cmovI_conIvalueMinus1_conIvalue1Node();
11438-
11439-
// inputs for new nodes
11440-
m1->add_req(n_region);
11441-
m2->add_req(n_region, n_crx);
11442-
m2->add_prec(m1);
11443-
11444-
// operands for new nodes
11445-
m1->_opnds[0] = op_dst;
11446-
m1->_opnds[1] = new immI16Oper(0);
11447-
m2->_opnds[0] = op_dst;
11448-
m2->_opnds[1] = op_crx;
11449-
11450-
// registers for new nodes
11451-
ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11452-
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11453-
11454-
// Insert new nodes.
11455-
nodes->push(m1);
11456-
nodes->push(m2);
11457-
%}
11458-
%}
11459-
11460-
// Manifest a CmpL3 result in an integer register. Very painful.
11461-
// This is the test to avoid.
11462-
// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
11463-
instruct cmpL3_reg_reg_ExEx(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
11464-
match(Set dst (CmpL3 src1 src2));
11465-
ins_cost(DEFAULT_COST*5+BRANCH_COST);
11466-
11467-
expand %{
11468-
flagsReg tmp1;
11469-
cmpL_reg_reg(tmp1, src1, src2);
11470-
cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
11398+
__ cmpd(CCR0, $src1$$Register, $src2$$Register);
11399+
__ set_cmp3($dst$$Register);
1147111400
%}
11401+
ins_pipe(pipe_class_default);
1147211402
%}
1147311403

1147411404
// Implicit range checks.
@@ -11791,15 +11721,19 @@ instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
1179111721
%}
1179211722

1179311723
// Compare float, generate -1,0,1
11794-
instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{
11724+
instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
1179511725
match(Set dst (CmpF3 src1 src2));
11796-
ins_cost(DEFAULT_COST*5+BRANCH_COST);
11726+
effect(KILL cr0);
11727+
ins_cost(DEFAULT_COST * 6);
11728+
size(VM_Version::has_brw() ? 20 : 24);
1179711729

11798-
expand %{
11799-
flagsReg tmp1;
11800-
cmpFUnordered_reg_reg(tmp1, src1, src2);
11801-
cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
11730+
format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
11731+
11732+
ins_encode %{
11733+
__ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11734+
__ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
1180211735
%}
11736+
ins_pipe(pipe_class_default);
1180311737
%}
1180411738

1180511739
instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
@@ -11871,15 +11805,19 @@ instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
1187111805
%}
1187211806

1187311807
// Compare double, generate -1,0,1
11874-
instruct cmpD3_reg_reg_ExEx(iRegIdst dst, regD src1, regD src2) %{
11808+
instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
1187511809
match(Set dst (CmpD3 src1 src2));
11876-
ins_cost(DEFAULT_COST*5+BRANCH_COST);
11810+
effect(KILL cr0);
11811+
ins_cost(DEFAULT_COST * 6);
11812+
size(VM_Version::has_brw() ? 20 : 24);
1187711813

11878-
expand %{
11879-
flagsReg tmp1;
11880-
cmpDUnordered_reg_reg(tmp1, src1, src2);
11881-
cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
11814+
format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11815+
11816+
ins_encode %{
11817+
__ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11818+
__ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
1188211819
%}
11820+
ins_pipe(pipe_class_default);
1188311821
%}
1188411822

1188511823
// Compare char

src/hotspot/cpu/ppc/templateTable_ppc_64.cpp

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,10 +1591,7 @@ void TemplateTable::lcmp() {
15911591
__ pop_l(Rscratch); // first operand, deeper in stack
15921592

15931593
__ cmpd(CCR0, Rscratch, R17_tos); // compare
1594-
__ mfcr(R17_tos); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
1595-
__ srwi(Rscratch, R17_tos, 30);
1596-
__ srawi(R17_tos, R17_tos, 31);
1597-
__ orr(R17_tos, Rscratch, R17_tos); // set result as follows: <: -1, =: 0, >: 1
1594+
__ set_cmp3(R17_tos); // set result as follows: <: -1, =: 0, >: 1
15981595
}
15991596

16001597
// fcmpl/fcmpg and dcmpl/dcmpg bytecodes
@@ -1611,21 +1608,10 @@ void TemplateTable::float_cmp(bool is_float, int unordered_result) {
16111608
__ pop_d(Rfirst);
16121609
}
16131610

1614-
Label Lunordered, Ldone;
16151611
__ fcmpu(CCR0, Rfirst, Rsecond); // compare
1616-
if (unordered_result) {
1617-
__ bso(CCR0, Lunordered);
1618-
}
1619-
__ mfcr(R17_tos); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
1620-
__ srwi(Rscratch, R17_tos, 30);
1621-
__ srawi(R17_tos, R17_tos, 31);
1622-
__ orr(R17_tos, Rscratch, R17_tos); // set result as follows: <: -1, =: 0, >: 1
1623-
if (unordered_result) {
1624-
__ b(Ldone);
1625-
__ bind(Lunordered);
1626-
__ load_const_optimized(R17_tos, unordered_result);
1627-
}
1628-
__ bind(Ldone);
1612+
// if unordered_result is 1, treat unordered_result like 'greater than'
1613+
assert(unordered_result == 1 || unordered_result == -1, "unordered_result can be either 1 or -1");
1614+
__ set_cmpu3(R17_tos, unordered_result != 1);
16291615
}
16301616

16311617
// Branch_conditional which takes TemplateTable::Condition.

0 commit comments

Comments
 (0)