Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8259822: [PPC64] Support the prefixed instruction format added in POWER10 #2095

Closed
wants to merge 11 commits into from
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 SAP SE. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2021 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -220,6 +220,12 @@ class Assembler : public AbstractAssembler {
SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20
RS_SHIFT = 21u, // RS field in bits 21 -- 25
OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31

// Shift counts in prefix word
PRE_TYPE_SHIFT = 24u, // Prefix type in bits 24 -- 25
PRE_ST1_SHIFT = 23u, // ST1 field in bits 23 -- 23
PRE_R_SHIFT = 20u, // R-bit in bits 20 -- 20
PRE_ST4_SHIFT = 20u, // ST4 field in bits 23 -- 20
};

enum opcdxos_masks {
@@ -796,6 +802,28 @@ class Assembler : public AbstractAssembler {

};

enum opcdeos_mask {
// Mask for prefix primary opcode field
PREFIX_OPCODE_MASK = (63u << OPCODE_SHIFT),
// Mask for prefix opcode and type fields
PREFIX_OPCODE_TYPE_MASK = (63u << OPCODE_SHIFT) | (3u << PRE_TYPE_SHIFT),
// Masks for type 00/10 and type 01/11, including opcode, type, and st fieds
PREFIX_OPCODE_TYPEx0_MASK = PREFIX_OPCODE_TYPE_MASK | ( 1u << PRE_ST1_SHIFT),
PREFIX_OPCODE_TYPEx1_MASK = PREFIX_OPCODE_TYPE_MASK | (15u << PRE_ST4_SHIFT),

// Masks for each instructions
PADDI_PREFIX_OPCODE_MASK = PREFIX_OPCODE_TYPEx0_MASK,
PADDI_SUFFIX_OPCODE_MASK = ADDI_OPCODE_MASK,
};

enum opcdeos {
PREFIX_PRIMARY_OPCODE = (1u << OPCODE_SHIFT),

// Prefixed addi/li
PADDI_PREFIX_OPCODE = PREFIX_PRIMARY_OPCODE | (2u << PRE_TYPE_SHIFT),
PADDI_SUFFIX_OPCODE = ADDI_OPCODE,
};

// Trap instructions TO bits
enum trap_to_bits {
// single bits
@@ -1081,6 +1109,20 @@ class Assembler : public AbstractAssembler {
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }

// For extended opcodes (prefixed instructions) introduced with Power 10
static long inv_r_eo( int x) { return inv_opp_u_field(x, 11, 11); }
static long inv_type( int x) { return inv_opp_u_field(x, 7, 6); }
static long inv_st_x0( int x) { return inv_opp_u_field(x, 8, 8); }
static long inv_st_x1( int x) { return inv_opp_u_field(x, 11, 8); }

// - 8LS:D/MLS:D Formats
static long inv_d0_eo( long x) { return inv_opp_u_field(x, 31, 14); }

// - 8RR:XX4/8RR:D Formats
static long inv_imm0_eo(int x) { return inv_opp_u_field(x, 31, 16); }
static long inv_uimm_eo(int x) { return inv_opp_u_field(x, 31, 29); }
static long inv_imm_eo( int x) { return inv_opp_u_field(x, 31, 24); }

#define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
#define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))

@@ -1202,6 +1244,24 @@ class Assembler : public AbstractAssembler {
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions

// For extended opcodes (prefixed instructions) introduced with Power 10
static long r_eo( int x) { return opp_u_field(x, 11, 11); }
static long type( int x) { return opp_u_field(x, 7, 6); }
static long st_x0( int x) { return opp_u_field(x, 8, 8); }
static long st_x1( int x) { return opp_u_field(x, 11, 8); }

// - 8LS:D/MLS:D Formats
static long d0_eo( long x) { return opp_u_field((x >> 16) & 0x3FFFF, 31, 14); }
static long d1_eo( long x) { return opp_u_field(x & 0xFFFF, 31, 16); }
static long s0_eo( long x) { return d0_eo(x); }
static long s1_eo( long x) { return d1_eo(x); }

// - 8RR:XX4/8RR:D Formats
static long imm0_eo( int x) { return opp_u_field(x >> 16, 31, 16); }
static long imm1_eo( int x) { return opp_u_field(x & 0xFFFF, 31, 16); }
static long uimm_eo( int x) { return opp_u_field(x, 31, 29); }
static long imm_eo( int x) { return opp_u_field(x, 31, 24); }

//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
//static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes
@@ -1301,9 +1361,15 @@ class Assembler : public AbstractAssembler {
// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
inline void addi( Register d, Register a, int si16);
inline void addis(Register d, Register a, int si16);

// Prefixed add immediate, introduced by POWER10
inline void paddi(Register d, Register a, long si34, bool r);
inline void pli( Register d, long si34);

private:
inline void addi_r0ok( Register d, Register a, int si16);
inline void addis_r0ok(Register d, Register a, int si16);
inline void paddi_r0ok(Register d, Register a, long si34, bool r);
public:
inline void addic_( Register d, Register a, int si16);
inline void subfic( Register d, Register a, int si16);
@@ -130,6 +130,17 @@ inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(
inline void Assembler::divwu( Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::divwu_( Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }

// Prefixed instructions, introduced by POWER10
inline void Assembler::paddi(Register d, Register a, long si34, bool r = false) {
assert(a != R0 || r, "r0 not allowed, unless R is set (CIA relative)");
paddi_r0ok( d, a, si34, r);

This comment has been minimized.

@CoreyAshford

CoreyAshford Feb 1, 2021

The space after the ( isn't needed here, since it's not aligning with a similar call above or below.

This comment has been minimized.

@TheRealMDoerr

TheRealMDoerr Mar 23, 2021
Contributor

Right, please remove extra spaces.

}

inline void Assembler::paddi_r0ok(Register d, Register a, long si34, bool r = false) {
emit_int32(PADDI_PREFIX_OPCODE | r_eo(r) | d0_eo(si34));
emit_int32(PADDI_SUFFIX_OPCODE | rt(d) | ra(a) | d1_eo(si34));
}

// Fixed-Point Arithmetic Instructions with Overflow detection
inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
@@ -168,6 +179,9 @@ inline void Assembler::lis( Register d, int si16) { Assembler::addi
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }

// Prefixed instructions, introduced by POWER10
inline void Assembler::pli(Register d, long si34) { Assembler::paddi_r0ok( d, R0, si34, false); }

// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
inline void Assembler::cmp( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); }
@@ -1155,7 +1155,41 @@ static int cc_to_biint(int cc, int flags_reg) {
// is the number of bytes (not instructions) which will be inserted before
// the instruction. The padding must match the size of a NOP instruction.

// Currently not used on this platform.
// Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
// (See Section 1.6 of Power ISA Version 3.1)
static int compute_prefix_padding(int current_offset) {
assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
"Code buffer must be aligned to a multiple of 64 bytes");
if (is_aligned(current_offset + BytesPerInstWord, 64)) {
return BytesPerInstWord;
}
return 0;
}

int loadConI32Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int loadConL34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addI_reg_imm32Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addL_reg_imm34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addP_reg_imm34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}


//=============================================================================

@@ -1896,7 +1930,7 @@ uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {

#ifndef PRODUCT
void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
st->print("NOP \t// %d nops to pad for loops.", _count);
st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
}
#endif

@@ -4041,6 +4075,15 @@ operand immIhi16() %{
interface(CONST_INTER);
%}

// Integer Immediate: 32-bit immediate for prefixed addi and load/store.
operand immI32() %{
predicate(PowerArchitecturePPC64 >= 10);
op_cost(0);
match(ConI);
format %{ %}
interface(CONST_INTER);
%}

operand immInegpow2() %{
predicate(is_power_of_2(-(juint)(n->get_int())));
match(ConI);
@@ -4282,6 +4325,15 @@ operand immL32() %{
interface(CONST_INTER);
%}

// Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
operand immL34() %{
predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
match(ConL);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}

// Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
operand immLhighest16() %{
predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
@@ -5816,6 +5868,23 @@ instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
ins_pipe(pipe_class_default);
%}

instruct loadConI32(iRegIdst dst, immI32 src) %{
match(Set dst src);
// This macro is valid only in Power 10 and up, but adding the following predicate here
// caused a build error, so we comment it out for now.
// predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PLI $dst, $src" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($dst$$Register, $src$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

instruct loadConI_Ex(iRegIdst dst, immI src) %{
match(Set dst src);
ins_cost(DEFAULT_COST*2);
@@ -5887,6 +5956,24 @@ instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
%}
%}

// Load 34-bit long constant using prefixed addi. No constant pool entries required.
instruct loadConL34(iRegLdst dst, immL34 src) %{
match(Set dst src);
// This macro is valid only in Power 10 and up, but adding the following predicate here
// caused a build error, so we comment it out for now.
// predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

This comment has been minimized.

@CoreyAshford

CoreyAshford Jan 21, 2021

There's no predicate for >= POWER10. I can see how this works because of the immL34 operand having its own predicate, but in later instructs, e.g. addL_reg_imm34 even though the operand is immI32, you still add the explicit predicate.

I'd rather there be an explicit POWER10 predicate in this instruct.

This comment has been minimized.

@kazunoriogata

kazunoriogata Jan 26, 2021
Author

If predicate is added, adlc fails with an error message: "Syntax Error: :ADLC does not support instruction chain rules with predicates" I think addL_reg_imm34 allows predicate because it is not called from other rules. Is it better to leave some comments? (BTW, immI32 is only for POWER10 or higher. POWER9 version uses immI16 or immI16hi.)

This comment has been minimized.

@CoreyAshford

CoreyAshford Jan 27, 2021

Hmm, I'm confused. I don't see any other reference to loadConL34 in ppc.ad.

This comment has been minimized.

@CoreyAshford

CoreyAshford Feb 1, 2021

I wish we knew why this predicate is causing an issue, but I guess it's not important because the operand type provides sufficient limiting of the instruct.

This comment has been minimized.

@TheRealMDoerr

TheRealMDoerr Mar 23, 2021
Contributor

Ok, I think we can't use a predicate for Set dst src. It's fine to have it in the operand.
Would be nice to describe this in the comment.


format %{ "PLI $dst, $src \t// long" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($dst$$Register, $src$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Load long constant 0x????000000000000.
instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
match(Set dst src);
@@ -8521,6 +8608,21 @@ instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Immediate Addition using prefixed addi
instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
match(Set dst (AddI src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);
format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Long Addition
instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
match(Set dst (AddL src1 src2));
@@ -8595,6 +8697,23 @@ instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Long Immediate Addition using prefixed addi
// No constant pool entries required.
instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
match(Set dst (AddL src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Pointer Register Addition
instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
match(Set dst (AddP src1 src2));
@@ -8632,6 +8751,23 @@ instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Pointer Immediate Addition using prefixed addi
// No constant pool entries required.
instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
match(Set dst (AddP src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

//---------------------
// Subtraction Instructions

@@ -11934,6 +12070,7 @@ instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fla

instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
match(Set dst (Whitespace src1));
predicate(PowerArchitecturePPC64 <= 9);
effect(TEMP src2, TEMP crx);
ins_cost(4 * DEFAULT_COST);

@@ -11953,6 +12090,29 @@ instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fl
ins_pipe(pipe_class_default);
%}

// Power 10 version, using prefixed addi to load 32-bit constant
instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
match(Set dst (Whitespace src1));
predicate(PowerArchitecturePPC64 >= 10);
effect(TEMP src2, TEMP crx);
ins_cost(3 * DEFAULT_COST);

format %{ "PLI $src2, 0x201C0D09\n\t"
"CMPRB $crx, 1, $src1, $src2\n\t"
"SETB $dst, $crx" %}
size(16);
ins_encode %{
// 0x09 to 0x0D, 0x1C to 0x20
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($src2$$Register, 0x201C0D09);
// compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
__ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
__ setb($dst$$Register, $crx$$CondRegister);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

//----------Branches---------------------------------------------------------
// Jump

ProTip! Use n and p to navigate between commits in a pull request.