Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8248190: Enable Power10 system and implement new byte-reverse instruc…
…tions

Reviewed-by: mdoerr, stuefe
  • Loading branch information
jrziviani authored and Michihiro Horie committed Aug 25, 2020
1 parent 7d0afd2 commit c3296c4698387cef4a48ab4beafdc2f46fc6001f
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 13 deletions.
@@ -436,6 +436,10 @@ class Assembler : public AbstractAssembler {
NAND_OPCODE = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM
NOR_OPCODE = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM

// Byte reverse opcodes (introduced with Power10)
BRH_OPCODE = (31u << OPCODE_SHIFT | 219u << 1), // X-FORM
BRW_OPCODE = (31u << OPCODE_SHIFT | 155u << 1), // X-FORM
BRD_OPCODE = (31u << OPCODE_SHIFT | 187u << 1), // X-FORM

// opcodes only used for floating arithmetic
FADD_OPCODE = (63u << OPCODE_SHIFT | 21u << 1),
@@ -1568,6 +1572,11 @@ class Assembler : public AbstractAssembler {
// testbit with condition register
inline void testbitdi(ConditionRegister cr, Register a, Register s, int ui6);

// Byte reverse instructions (introduced with Power10)
inline void brh( Register a, Register s);
inline void brw( Register a, Register s);
inline void brd( Register a, Register s);

// rotate instructions
inline void rotldi( Register a, Register s, int n);
inline void rotrdi( Register a, Register s, int n);
@@ -287,6 +287,11 @@ inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, i
}
}

// Byte reverse instructions (introduced with Power10)
inline void Assembler::brh(Register a, Register s) { emit_int32(BRH_OPCODE | rta(a) | rs(s)); }
inline void Assembler::brw(Register a, Register s) { emit_int32(BRW_OPCODE | rta(a) | rs(s)); }
inline void Assembler::brd(Register a, Register s) { emit_int32(BRD_OPCODE | rta(a) | rs(s)); }

// rotate instructions
inline void Assembler::rotldi( Register a, Register s, int n) { Assembler::rldicl(a, s, n, 0); }
inline void Assembler::rotrdi( Register a, Register s, int n) { Assembler::rldicl(a, s, 64-n, 0); }
@@ -84,8 +84,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
constraint) \
\
product(uintx, PowerArchitecturePPC64, 0, \
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
"Specify the PowerPC family version in use. If not provided, " \
"HotSpot will determine it automatically. Host family version " \
"is the maximum value allowed (instructions are not emulated).") \
\
product(bool, SuperwordUseVSX, false, \
"Use Power8 VSX instructions for superword optimization.") \
@@ -112,6 +113,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
"Use load instructions for stack banging.") \
\
/* special instructions */ \
product(bool, UseByteReverseInstructions, false, \
"Use byte reverse instructions.") \
\
product(bool, UseVectorByteReverseInstructionsPPC64, false, \
"Use Power9 xxbr* vector byte reverse instructions.") \
\
@@ -13718,6 +13718,7 @@ instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
// Just slightly faster than java implementation.
instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesI src));
predicate(!UseByteReverseInstructions);
ins_cost(7*DEFAULT_COST);

expand %{
@@ -13758,8 +13759,23 @@ instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
ins_pipe(pipe_class_default);
%}

instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesI src));
predicate(UseByteReverseInstructions);
ins_cost(DEFAULT_COST);
size(4);

format %{ "BRW $dst, $src" %}

ins_encode %{
__ brw($dst$$Register, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}

instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
match(Set dst (ReverseBytesL src));
predicate(!UseByteReverseInstructions);
ins_cost(15*DEFAULT_COST);

expand %{
@@ -13815,8 +13831,23 @@ instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
ins_pipe(pipe_class_default);
%}

instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
match(Set dst (ReverseBytesL src));
predicate(UseByteReverseInstructions);
ins_cost(DEFAULT_COST);
size(4);

format %{ "BRD $dst, $src" %}

ins_encode %{
__ brd($dst$$Register, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}

instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesUS src));
predicate(!UseByteReverseInstructions);
ins_cost(2*DEFAULT_COST);

expand %{
@@ -13828,8 +13859,23 @@ instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
%}
%}

instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesUS src));
predicate(UseByteReverseInstructions);
ins_cost(DEFAULT_COST);
size(4);

format %{ "BRH $dst, $src" %}

ins_encode %{
__ brh($dst$$Register, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}

instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesS src));
predicate(!UseByteReverseInstructions);
ins_cost(3*DEFAULT_COST);

expand %{
@@ -13843,6 +13889,22 @@ instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
%}
%}

instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
match(Set dst (ReverseBytesS src));
predicate(UseByteReverseInstructions);
ins_cost(DEFAULT_COST);
size(8);

format %{ "BRH $dst, $src\n\t"
"EXTSH $dst, $dst" %}

ins_encode %{
__ brh($dst$$Register, $src$$Register);
__ extsh($dst$$Register, $dst$$Register);
%}
ins_pipe(pipe_class_default);
%}

// Load Integer reversed byte order
instruct loadI_reversed(iRegIdst dst, indirect mem) %{
match(Set dst (ReverseBytesI (LoadI mem)));
@@ -67,7 +67,9 @@ void VM_Version::initialize() {

// If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
if (VM_Version::has_darn()) {
if (VM_Version::has_brw()) {
FLAG_SET_ERGO(PowerArchitecturePPC64, 10);
} else if (VM_Version::has_darn()) {
FLAG_SET_ERGO(PowerArchitecturePPC64, 9);
} else if (VM_Version::has_lqarx()) {
FLAG_SET_ERGO(PowerArchitecturePPC64, 8);
@@ -84,12 +86,13 @@ void VM_Version::initialize() {

bool PowerArchitecturePPC64_ok = false;
switch (PowerArchitecturePPC64) {
case 9: if (!VM_Version::has_darn() ) break;
case 8: if (!VM_Version::has_lqarx() ) break;
case 7: if (!VM_Version::has_popcntw()) break;
case 6: if (!VM_Version::has_cmpb() ) break;
case 5: if (!VM_Version::has_popcntb()) break;
case 0: PowerArchitecturePPC64_ok = true; break;
case 10: if (!VM_Version::has_brw() ) break;
case 9: if (!VM_Version::has_darn() ) break;
case 8: if (!VM_Version::has_lqarx() ) break;
case 7: if (!VM_Version::has_popcntw()) break;
case 6: if (!VM_Version::has_cmpb() ) break;
case 5: if (!VM_Version::has_popcntb()) break;
case 0: PowerArchitecturePPC64_ok = true; break;
default: break;
}
guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to "
@@ -156,12 +159,23 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorByteReverseInstructionsPPC64, false);
}
}

if (PowerArchitecturePPC64 >= 10) {
if (FLAG_IS_DEFAULT(UseByteReverseInstructions)) {
FLAG_SET_ERGO(UseByteReverseInstructions, true);
}
} else {
if (UseByteReverseInstructions) {
warning("UseByteReverseInstructions specified, but needs at least Power10.");
FLAG_SET_DEFAULT(UseByteReverseInstructions, false);
}
}
#endif

// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@@ -179,7 +193,8 @@ void VM_Version::initialize() {
(has_stdbrx() ? " stdbrx" : ""),
(has_vshasig() ? " sha" : ""),
(has_tm() ? " rtm" : ""),
(has_darn() ? " darn" : "")
(has_darn() ? " darn" : ""),
(has_brw() ? " brw" : "")
// Make sure number of %s matches num_features!
);
_features_string = os::strdup(buf);
@@ -835,6 +850,7 @@ void VM_Version::determine_features() {
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[16] -> vshasig
// rtm is determined by OS
a->darn(R7); // code[17] -> darn
a->brw(R5, R6); // code[18] -> brw
a->blr();

// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -888,6 +904,7 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= vshasig_m;
// feature rtm_m is determined by OS
if (code[feature_cntr++]) features |= darn_m;
if (code[feature_cntr++]) features |= brw_m;

// Print the detection code.
if (PrintAssembly) {
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019 SAP SE. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,7 @@ class VM_Version: public Abstract_VM_Version {
vshasig,
rtm,
darn,
brw,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@@ -74,6 +75,7 @@ class VM_Version: public Abstract_VM_Version {
vshasig_m = (1 << vshasig),
rtm_m = (1 << rtm ),
darn_m = (1 << darn ),
brw_m = (1 << brw ),
all_features_m = (unsigned long)-1
};

@@ -119,6 +121,7 @@ class VM_Version: public Abstract_VM_Version {
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_tm() { return (_features & rtm_m) != 0; }
static bool has_darn() { return (_features & darn_m) != 0; }
static bool has_brw() { return (_features & brw_m) != 0; }

static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8

0 comments on commit c3296c4

Please sign in to comment.