Skip to content
Permalink
Browse files
8259822: [PPC64] Support the prefixed instruction format added in POW…
…ER10

Reviewed-by: cashford, mdoerr
  • Loading branch information
Kazunori Ogata authored and TheRealMDoerr committed Apr 9, 2021
1 parent a45733f commit f7a6c63ad30a67b34868118a4a88ed100e8bd769
Showing with 244 additions and 4 deletions.
  1. +68 −2 src/hotspot/cpu/ppc/assembler_ppc.hpp
  2. +14 −0 src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
  3. +162 −2 src/hotspot/cpu/ppc/ppc.ad
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 SAP SE. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2021 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -221,6 +221,12 @@ class Assembler : public AbstractAssembler {
SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20
RS_SHIFT = 21u, // RS field in bits 21 -- 25
OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31

// Shift counts in prefix word
PRE_TYPE_SHIFT = 24u, // Prefix type in bits 24 -- 25
PRE_ST1_SHIFT = 23u, // ST1 field in bits 23 -- 23
PRE_R_SHIFT = 20u, // R-bit in bits 20 -- 20
PRE_ST4_SHIFT = 20u, // ST4 field in bits 23 -- 20
};

enum opcdxos_masks {
@@ -797,6 +803,28 @@ class Assembler : public AbstractAssembler {

};

enum opcdeos_mask {
// Mask for prefix primary opcode field
PREFIX_OPCODE_MASK = (63u << OPCODE_SHIFT),
// Mask for prefix opcode and type fields
PREFIX_OPCODE_TYPE_MASK = (63u << OPCODE_SHIFT) | (3u << PRE_TYPE_SHIFT),
// Masks for type 00/10 and type 01/11, including opcode, type, and st fieds
PREFIX_OPCODE_TYPEx0_MASK = PREFIX_OPCODE_TYPE_MASK | ( 1u << PRE_ST1_SHIFT),
PREFIX_OPCODE_TYPEx1_MASK = PREFIX_OPCODE_TYPE_MASK | (15u << PRE_ST4_SHIFT),

// Masks for each instructions
PADDI_PREFIX_OPCODE_MASK = PREFIX_OPCODE_TYPEx0_MASK,
PADDI_SUFFIX_OPCODE_MASK = ADDI_OPCODE_MASK,
};

enum opcdeos {
PREFIX_PRIMARY_OPCODE = (1u << OPCODE_SHIFT),

// Prefixed addi/li
PADDI_PREFIX_OPCODE = PREFIX_PRIMARY_OPCODE | (2u << PRE_TYPE_SHIFT),
PADDI_SUFFIX_OPCODE = ADDI_OPCODE,
};

// Trap instructions TO bits
enum trap_to_bits {
// single bits
@@ -1082,6 +1110,20 @@ class Assembler : public AbstractAssembler {
static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }

// For extended opcodes (prefixed instructions) introduced with Power 10
static long inv_r_eo( int x) { return inv_opp_u_field(x, 11, 11); }
static long inv_type( int x) { return inv_opp_u_field(x, 7, 6); }
static long inv_st_x0( int x) { return inv_opp_u_field(x, 8, 8); }
static long inv_st_x1( int x) { return inv_opp_u_field(x, 11, 8); }

// - 8LS:D/MLS:D Formats
static long inv_d0_eo( long x) { return inv_opp_u_field(x, 31, 14); }

// - 8RR:XX4/8RR:D Formats
static long inv_imm0_eo(int x) { return inv_opp_u_field(x, 31, 16); }
static long inv_uimm_eo(int x) { return inv_opp_u_field(x, 31, 29); }
static long inv_imm_eo( int x) { return inv_opp_u_field(x, 31, 24); }

#define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
#define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))

@@ -1203,6 +1245,24 @@ class Assembler : public AbstractAssembler {
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions

// For extended opcodes (prefixed instructions) introduced with Power 10
static long r_eo( int x) { return opp_u_field(x, 11, 11); }
static long type( int x) { return opp_u_field(x, 7, 6); }
static long st_x0( int x) { return opp_u_field(x, 8, 8); }
static long st_x1( int x) { return opp_u_field(x, 11, 8); }

// - 8LS:D/MLS:D Formats
static long d0_eo( long x) { return opp_u_field((x >> 16) & 0x3FFFF, 31, 14); }
static long d1_eo( long x) { return opp_u_field(x & 0xFFFF, 31, 16); }
static long s0_eo( long x) { return d0_eo(x); }
static long s1_eo( long x) { return d1_eo(x); }

// - 8RR:XX4/8RR:D Formats
static long imm0_eo( int x) { return opp_u_field(x >> 16, 31, 16); }
static long imm1_eo( int x) { return opp_u_field(x & 0xFFFF, 31, 16); }
static long uimm_eo( int x) { return opp_u_field(x, 31, 29); }
static long imm_eo( int x) { return opp_u_field(x, 31, 24); }

//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
//static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes
@@ -1302,9 +1362,15 @@ class Assembler : public AbstractAssembler {
// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
inline void addi( Register d, Register a, int si16);
inline void addis(Register d, Register a, int si16);

// Prefixed add immediate, introduced by POWER10
inline void paddi(Register d, Register a, long si34, bool r);
inline void pli( Register d, long si34);

private:
inline void addi_r0ok( Register d, Register a, int si16);
inline void addis_r0ok(Register d, Register a, int si16);
inline void paddi_r0ok(Register d, Register a, long si34, bool r);
public:
inline void addic_( Register d, Register a, int si16);
inline void subfic( Register d, Register a, int si16);
@@ -131,6 +131,17 @@ inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(
inline void Assembler::divwu( Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::divwu_( Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }

// Prefixed instructions, introduced by POWER10
inline void Assembler::paddi(Register d, Register a, long si34, bool r = false) {
assert(a != R0 || r, "r0 not allowed, unless R is set (CIA relative)");
paddi_r0ok( d, a, si34, r);
}

inline void Assembler::paddi_r0ok(Register d, Register a, long si34, bool r = false) {
emit_int32(PADDI_PREFIX_OPCODE | r_eo(r) | d0_eo(si34));
emit_int32(PADDI_SUFFIX_OPCODE | rt(d) | ra(a) | d1_eo(si34));
}

// Fixed-Point Arithmetic Instructions with Overflow detection
inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
@@ -169,6 +180,9 @@ inline void Assembler::lis( Register d, int si16) { Assembler::addi
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }

// Prefixed instructions, introduced by POWER10
inline void Assembler::pli(Register d, long si34) { Assembler::paddi_r0ok( d, R0, si34, false); }

// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
inline void Assembler::cmp( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); }
@@ -1155,7 +1155,41 @@ static int cc_to_biint(int cc, int flags_reg) {
// is the number of bytes (not instructions) which will be inserted before
// the instruction. The padding must match the size of a NOP instruction.

// Currently not used on this platform.
// Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
// (See Section 1.6 of Power ISA Version 3.1)
static int compute_prefix_padding(int current_offset) {
assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
"Code buffer must be aligned to a multiple of 64 bytes");
if (is_aligned(current_offset + BytesPerInstWord, 64)) {
return BytesPerInstWord;
}
return 0;
}

int loadConI32Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int loadConL34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addI_reg_imm32Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addL_reg_imm34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int addP_reg_imm34Node::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}

int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
return compute_prefix_padding(current_offset);
}


//=============================================================================

@@ -1893,7 +1927,7 @@ uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {

#ifndef PRODUCT
void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
st->print("NOP \t// %d nops to pad for loops.", _count);
st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
}
#endif

@@ -4016,6 +4050,15 @@ operand immIhi16() %{
interface(CONST_INTER);
%}

// Integer Immediate: 32-bit immediate for prefixed addi and load/store.
operand immI32() %{
predicate(PowerArchitecturePPC64 >= 10);
op_cost(0);
match(ConI);
format %{ %}
interface(CONST_INTER);
%}

operand immInegpow2() %{
predicate(is_power_of_2(-(juint)(n->get_int())));
match(ConI);
@@ -4257,6 +4300,15 @@ operand immL32() %{
interface(CONST_INTER);
%}

// Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
operand immL34() %{
predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
match(ConL);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}

// Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
operand immLhighest16() %{
predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
@@ -5791,6 +5843,23 @@ instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
ins_pipe(pipe_class_default);
%}

instruct loadConI32(iRegIdst dst, immI32 src) %{
match(Set dst src);
// This macro is valid only in Power 10 and up, but adding the following predicate here
// caused a build error, so we comment it out for now.
// predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PLI $dst, $src" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($dst$$Register, $src$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

instruct loadConI_Ex(iRegIdst dst, immI src) %{
match(Set dst src);
ins_cost(DEFAULT_COST*2);
@@ -5862,6 +5931,24 @@ instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
%}
%}

// Load 34-bit long constant using prefixed addi. No constant pool entries required.
instruct loadConL34(iRegLdst dst, immL34 src) %{
match(Set dst src);
// This macro is valid only in Power 10 and up, but adding the following predicate here
// caused a build error, so we comment it out for now.
// predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PLI $dst, $src \t// long" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($dst$$Register, $src$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Load long constant 0x????000000000000.
instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
match(Set dst src);
@@ -8474,6 +8561,21 @@ instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Immediate Addition using prefixed addi
instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
match(Set dst (AddI src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);
format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Long Addition
instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
match(Set dst (AddL src1 src2));
@@ -8548,6 +8650,23 @@ instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Long Immediate Addition using prefixed addi
// No constant pool entries required.
instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
match(Set dst (AddL src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

// Pointer Register Addition
instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
match(Set dst (AddP src1 src2));
@@ -8585,6 +8704,23 @@ instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
ins_pipe(pipe_class_default);
%}

// Pointer Immediate Addition using prefixed addi
// No constant pool entries required.
instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
match(Set dst (AddP src1 src2));
predicate(PowerArchitecturePPC64 >= 10);
ins_cost(DEFAULT_COST+1);

format %{ "PADDI $dst, $src1, $src2" %}
size(8);
ins_encode %{
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ paddi($dst$$Register, $src1$$Register, $src2$$constant);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

//---------------------
// Subtraction Instructions

@@ -11887,6 +12023,7 @@ instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fla

instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
match(Set dst (Whitespace src1));
predicate(PowerArchitecturePPC64 <= 9);
effect(TEMP src2, TEMP crx);
ins_cost(4 * DEFAULT_COST);

@@ -11906,6 +12043,29 @@ instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fl
ins_pipe(pipe_class_default);
%}

// Power 10 version, using prefixed addi to load 32-bit constant
instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
match(Set dst (Whitespace src1));
predicate(PowerArchitecturePPC64 >= 10);
effect(TEMP src2, TEMP crx);
ins_cost(3 * DEFAULT_COST);

format %{ "PLI $src2, 0x201C0D09\n\t"
"CMPRB $crx, 1, $src1, $src2\n\t"
"SETB $dst, $crx" %}
size(16);
ins_encode %{
// 0x09 to 0x0D, 0x1C to 0x20
assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
__ pli($src2$$Register, 0x201C0D09);
// compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
__ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
__ setb($dst$$Register, $crx$$CondRegister);
%}
ins_pipe(pipe_class_default);
ins_alignment(2);
%}

//----------Branches---------------------------------------------------------
// Jump

1 comment on commit f7a6c63

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on f7a6c63 Apr 9, 2021

Please sign in to comment.