Skip to content
Permalink
Browse files

8234160: Enable optimized mitigation for Intel jcc erratum in C2

Reviewed-by: thartmann, vlivanov, pliden
  • Loading branch information
fisk committed Feb 24, 2020
1 parent 0f21211 commit ccdde497287175933982a5907d1bb8611c0e15a8
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "c2_intelJccErratum_x86.hpp"
#include "opto/cfgnode.hpp"
#include "opto/compile.hpp"
#include "opto/machnode.hpp"
#include "opto/node.hpp"
#include "opto/regalloc.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"

// Compute which 32 byte boundary an address corresponds to
uintptr_t IntelJccErratum::boundary(uintptr_t addr) {
return addr >> 5;
}

bool IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc) {
int jcc_size = int(end_pc - start_pc);
assert(jcc_size <= largest_jcc_size(), "invalid jcc size: %d", jcc_size);
return boundary(start_pc) != boundary(end_pc);
}

bool IntelJccErratum::is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index) {
if (node->is_MachCall() && !node->is_MachCallJava()) {
return true;
}
return node_index == (block->number_of_nodes() - 1);
}

int IntelJccErratum::jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc) {
node->add_flag(Node::Flag_intel_jcc_erratum);
return node->size(regalloc);
}

int IntelJccErratum::tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc) {
ResourceMark rm;
int nop_size = 0;
MachNode* last_m = NULL;

for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
const Block* const block = cfg->get_block(i);
for (uint j = 0; j < block->number_of_nodes(); ++j) {
const Node* const node = block->get_node(j);
if (!node->is_Mach()) {
continue;
}
MachNode* m = node->as_Mach();
if (is_jcc_erratum_branch(block, m, j)) {
// Found a root jcc erratum branch, flag it as problematic
nop_size += jcc_erratum_taint_node(m, regalloc);

if (!m->is_MachReturn() && !m->is_MachCall()) {
// We might fuse a problematic jcc erratum branch with a preceding
// ALU instruction - we must catch such problematic macro fusions
// and flag the ALU instruction as problematic too.
for (uint k = 1; k < m->req(); ++k) {
const Node* const use = m->in(k);
if (use == last_m && !m->is_MachReturn()) {
// Flag fused conditions too
nop_size += jcc_erratum_taint_node(last_m, regalloc);
}
}
}
last_m = NULL;
} else {
last_m = m;
}
}
}
return nop_size;
}

int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc) {
int jcc_size = mach->size(regalloc);
if (index_in_block < block->number_of_nodes() - 1) {
Node* next = block->get_node(index_in_block + 1);
if (next->is_Mach() && (next->as_Mach()->flags() & Node::Flag_intel_jcc_erratum)) {
jcc_size += mach->size(regalloc);
}
}
if (jcc_size > largest_jcc_size()) {
// Let's not try fixing this for nodes that seem unreasonably large
return false;
}
if (is_crossing_or_ending_at_32_byte_boundary(current_offset, current_offset + jcc_size)) {
return int(align_up(current_offset, 32) - current_offset);
} else {
return 0;
}
}

#define __ _masm.

uintptr_t IntelJccErratumAlignment::pc() {
return (uintptr_t)__ pc();
}

IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size) :
_masm(masm),
_start_pc(pc()) {
if (!VM_Version::has_intel_jcc_erratum()) {
return;
}

if (Compile::current()->in_scratch_emit_size()) {
// When we measure the size of this 32 byte alignment, we apply a conservative guess.
__ nop(jcc_size);
} else if (IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, _start_pc + jcc_size)) {
// The affected branch might get slowed down by micro code mitigations
// as it could be susceptible to the erratum. Place nops until the next
// 32 byte boundary to make sure the branch will be cached.
const int alignment_nops = (int)(align_up(_start_pc, 32) - _start_pc);
__ nop(alignment_nops);
_start_pc = pc();
}
}

IntelJccErratumAlignment::~IntelJccErratumAlignment() {
if (!VM_Version::has_intel_jcc_erratum() ||
Compile::current()->in_scratch_emit_size()) {
return;
}

assert(!IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, pc()), "Invalid jcc_size estimate");
}
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef CPU_X86_INTELJCCERRATUM_X86_HPP
#define CPU_X86_INTELJCCERRATUM_X86_HPP

#include "memory/allocation.hpp"
#include "utilities/globalDefinitions.hpp"

class Block;
class Compile;
class MachNode;
class MacroAssembler;
class PhaseCFG;
class PhaseRegAlloc;

class IntelJccErratum : public AllStatic {
private:
// Compute which 32 byte boundary an address corresponds to
static uintptr_t boundary(uintptr_t addr);
static int jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc);

public:
static bool is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc);
static bool is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index);
// Analyze JCC erratum branches. Affected nodes get tagged with Flag_intel_jcc_erratum.
// The function returns a conservative estimate of all required nops on all mach nodes.
static int tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc);
// Computes the exact padding for a mach node
static int compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc);
static int largest_jcc_size() { return 20; }
};

class IntelJccErratumAlignment {
private:
MacroAssembler& _masm;
uintptr_t _start_pc;

uintptr_t pc();

public:
IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size);
~IntelJccErratumAlignment();
};

#endif // CPU_X86_INTELJCCERRATUM_X86_HPP

@@ -1,5 +1,5 @@
//
// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -30,16 +30,29 @@ source_hpp %{

source %{

#include "c2_intelJccErratum_x86.hpp"

static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::notZero, *stub->entry());
{
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::notZero, *stub->entry());
}
__ bind(*stub->continuation());
}

static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
static void z_load_barrier_cmpxchg(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
__ jmp(*stub->entry());
{
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
}
{
IntelJccErratumAlignment intel_alignment(_masm, 5 /* jcc_size */);
__ jmp(*stub->entry());
}
__ bind(*stub->continuation());
}

@@ -101,9 +114,7 @@ instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP t
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
@@ -133,9 +144,7 @@ instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlags
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
@@ -41,6 +41,7 @@
int VM_Version::_cpu;
int VM_Version::_model;
int VM_Version::_stepping;
bool VM_Version::_has_intel_jcc_erratum;
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };

// Address of instruction which causes SEGV
@@ -720,6 +721,8 @@ void VM_Version::get_processor_features() {
}
}

_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();

char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
@@ -1698,6 +1701,70 @@ bool VM_Version::use_biased_locking() {
return UseBiasedLocking;
}

bool VM_Version::compute_has_intel_jcc_erratum() {
if (!is_intel_family_core()) {
// Only Intel CPUs are affected.
return false;
}
// The following table of affected CPUs is based on the following document released by Intel:
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
switch (_model) {
case 0x8E:
// 06_8EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U 23e
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Y
// 06_8EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake U43e
// 06_8EH | B | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
// 06_8EH | C | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
// 06_8EH | C | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U42
// 06_8EH | C | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
case 0x4E:
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake U
// 06_4E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake U23e
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake Y
return _stepping == 0x3;
case 0x55:
// 06_55H | 4 | Intel® Xeon® Processor D Family based on microarchitecture code name Skylake D, Bakerville
// 06_55H | 4 | Intel® Xeon® Scalable Processors based on microarchitecture code name Skylake Server
// 06_55H | 4 | Intel® Xeon® Processor W Family based on microarchitecture code name Skylake W
// 06_55H | 4 | Intel® Core™ X-series Processors based on microarchitecture code name Skylake X
// 06_55H | 4 | Intel® Xeon® Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
// 06_55 | 7 | 2nd Generation Intel® Xeon® Scalable Processors based on microarchitecture code name Cascade Lake (server)
return _stepping == 0x4 || _stepping == 0x7;
case 0x5E:
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake H
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake S
return _stepping == 0x3;
case 0x9E:
// 06_9EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake G
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake H
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake S
// 06_9EH | 9 | Intel® Core™ X-series Processors based on microarchitecture code name Kaby Lake X
// 06_9EH | 9 | Intel® Xeon® Processor E3 v6 Family Kaby Lake Xeon E3
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake H
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | B | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | B | Intel® Celeron® Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (8+2)
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
case 0xA6:
// 06_A6H | 0 | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U62
return _stepping == 0x0;
case 0xAE:
// 06_AEH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
return _stepping == 0xA;
default:
// If we are running on another intel machine not recognized in the table, we are okay.
return false;
}
}

// On Xen, the cpuid instruction returns
// eax / registers[0]: Version of Xen
// ebx / registers[1]: chars 'XenV'

0 comments on commit ccdde49

Please sign in to comment.