Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8257692: Using both heap and native segments can degrade performance
Reviewed-by: jvernee, psandoz, vlivanov
  • Loading branch information
mcimadamore committed Dec 11, 2020
1 parent 1f0ac20 commit e367491
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 9 deletions.
1 change: 1 addition & 0 deletions src/hotspot/share/classfile/vmSymbols.hpp
Expand Up @@ -353,6 +353,7 @@
/* Panama Support */ \
template(jdk_internal_invoke_NativeEntryPoint, "jdk/internal/invoke/NativeEntryPoint") \
template(jdk_internal_invoke_NativeEntryPoint_signature, "Ljdk/internal/invoke/NativeEntryPoint;") \
template(jdk_incubator_foreign_MemoryAccess, "jdk/incubator/foreign/MemoryAccess") \
\
/* Support for JVMCI */ \
JVMCI_VM_SYMBOLS_DO(template, do_alias) \
Expand Down
16 changes: 16 additions & 0 deletions src/hotspot/share/oops/methodData.cpp
Expand Up @@ -1599,6 +1599,18 @@ bool MethodData::profile_unsafe(const methodHandle& m, int bci) {
return false;
}

bool MethodData::profile_memory_access(const methodHandle& m, int bci) {
Bytecode_invoke inv(m , bci);
if (inv.is_invokestatic()) {
if (inv.klass() == vmSymbols::jdk_incubator_foreign_MemoryAccess()) {
if (inv.name()->starts_with("get") || inv.name()->starts_with("set")) {
return true;
}
}
}
return false;
}

int MethodData::profile_arguments_flag() {
return TypeProfileLevel % 10;
}
Expand Down Expand Up @@ -1628,6 +1640,10 @@ bool MethodData::profile_arguments_for_invoke(const methodHandle& m, int bci) {
return true;
}

if (profile_memory_access(m, bci)) {
return true;
}

assert(profile_arguments_jsr292_only(), "inconsistent");
return profile_jsr292(m, bci);
}
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/oops/methodData.hpp
Expand Up @@ -2151,6 +2151,7 @@ class MethodData : public Metadata {

static bool profile_jsr292(const methodHandle& m, int bci);
static bool profile_unsafe(const methodHandle& m, int bci);
static bool profile_memory_access(const methodHandle& m, int bci);
static int profile_arguments_flag();
static bool profile_all_arguments();
static bool profile_arguments_for_invoke(const methodHandle& m, int bci);
Expand Down
Expand Up @@ -32,26 +32,26 @@
* This proxy interface is required to allow instances of the {@code MemorySegment} interface (which is defined inside
* an incubating module) to be accessed from the memory access var handles.
*/
public interface MemorySegmentProxy {
public abstract class MemorySegmentProxy {
/**
* Check that memory access is within spatial bounds and that access is compatible with segment access modes.
* @throws UnsupportedOperationException if underlying segment has incompatible access modes (e.g. attempting to write
* a read-only segment).
* @throws IndexOutOfBoundsException if access is out-of-bounds.
*/
void checkAccess(long offset, long length, boolean readOnly);
long unsafeGetOffset();
Object unsafeGetBase();
boolean isSmall();
ScopedMemoryAccess.Scope scope();
public abstract void checkAccess(long offset, long length, boolean readOnly);
public abstract long unsafeGetOffset();
public abstract Object unsafeGetBase();
public abstract boolean isSmall();
public abstract ScopedMemoryAccess.Scope scope();

/* Helper functions for offset computations. These are required so that we can avoid issuing long opcodes
* (e.g. LMUL, LADD) when we're operating on 'small' segments (segments whose length can be expressed with an int).
* C2 BCE code is very sensitive to the kind of opcode being emitted, and this workaround allows us to rescue
* BCE when working with small segments. This workaround should be dropped when JDK-8223051 is resolved.
*/

static long addOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
public static long addOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
if (segmentProxy.isSmall()) {
// force ints for BCE
if (op1 > Integer.MAX_VALUE || op2 > Integer.MAX_VALUE
Expand All @@ -74,7 +74,7 @@ static long addOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
}
}

static long multiplyOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
public static long multiplyOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
if (segmentProxy.isSmall()) {
if (op1 > Integer.MAX_VALUE || op2 > Integer.MAX_VALUE
|| op1 < Integer.MIN_VALUE || op2 < Integer.MIN_VALUE) {
Expand Down
Expand Up @@ -325,3 +325,6 @@ public class ScopedMemoryAccess {
}
// typed-ops here

// Note: all the accessor methods defined below take advantage of argument type profiling
// (see src/hotspot/share/oops/methodData.cpp) which greatly enhances performance when the same accessor
// method is used repeatedly with different 'base' objects.
Expand Up @@ -88,6 +88,10 @@ private static VarHandle unalignedHandle(ValueLayout elementLayout, Class<?> car
return MemoryHandles.varHandle(carrier, 1, elementLayout.order());
}

// Note: all the accessor methods defined below take advantage of argument type profiling
// (see src/hotspot/share/oops/methodData.cpp) which greatly enhances performance when the same accessor
// method is used repeatedly with different segment kinds (e.g. on-heap vs. off-heap).

/**
* Reads a byte from given segment and offset.
*
Expand Down
Expand Up @@ -53,7 +53,7 @@
* are defined for each memory segment kind, see {@link NativeMemorySegmentImpl}, {@link HeapMemorySegmentImpl} and
* {@link MappedMemorySegmentImpl}.
*/
public abstract class AbstractMemorySegmentImpl implements MemorySegment, MemorySegmentProxy {
public abstract class AbstractMemorySegmentImpl extends MemorySegmentProxy implements MemorySegment {

private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();

Expand Down
@@ -0,0 +1,190 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.jdk.incubator.foreign;

import jdk.incubator.foreign.MemoryAccess;
import jdk.incubator.foreign.MemoryLayout;
import jdk.incubator.foreign.MemorySegment;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import sun.misc.Unsafe;

import java.lang.invoke.VarHandle;
import java.util.concurrent.TimeUnit;

import static jdk.incubator.foreign.MemoryLayout.PathElement.sequenceElement;
import static jdk.incubator.foreign.MemoryLayouts.JAVA_INT;

@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(org.openjdk.jmh.annotations.Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Fork(value = 3, jvmArgsAppend = { "--add-modules=jdk.incubator.foreign" })
public class LoopOverPollutedSegments {

static final int ELEM_SIZE = 1_000_000;
static final int CARRIER_SIZE = (int) JAVA_INT.byteSize();
static final int ALLOC_SIZE = ELEM_SIZE * CARRIER_SIZE;

static final Unsafe unsafe = Utils.unsafe;

MemorySegment nativeSegment, heapSegmentBytes, heapSegmentFloats;
byte[] arr;
long addr;

static final VarHandle intHandle = MemoryLayout.ofSequence(JAVA_INT).varHandle(int.class, MemoryLayout.PathElement.sequenceElement());


@Setup
public void setup() {
addr = unsafe.allocateMemory(ALLOC_SIZE);
for (int i = 0; i < ELEM_SIZE; i++) {
unsafe.putInt(addr + (i * 4), i);
}
arr = new byte[ALLOC_SIZE];
nativeSegment = MemorySegment.allocateNative(ALLOC_SIZE, 4);
heapSegmentBytes = MemorySegment.ofArray(new byte[ALLOC_SIZE]);
heapSegmentFloats = MemorySegment.ofArray(new float[ELEM_SIZE]);

for (int rep = 0 ; rep < 5 ; rep++) {
for (int i = 0; i < ELEM_SIZE; i++) {
unsafe.putInt(arr, Unsafe.ARRAY_BYTE_BASE_OFFSET + (i * 4), i);
MemoryAccess.setIntAtIndex(nativeSegment, i, i);
MemoryAccess.setFloatAtIndex(nativeSegment, i, i);
intHandle.set(nativeSegment, (long)i, i);
MemoryAccess.setIntAtIndex(heapSegmentBytes, i, i);
MemoryAccess.setFloatAtIndex(heapSegmentBytes, i, i);
intHandle.set(heapSegmentBytes, (long)i, i);
MemoryAccess.setIntAtIndex(heapSegmentFloats, i, i);
MemoryAccess.setFloatAtIndex(heapSegmentFloats, i, i);
intHandle.set(heapSegmentFloats, (long)i, i);
}
}
}

@TearDown
public void tearDown() {
nativeSegment.close();
heapSegmentBytes = null;
heapSegmentFloats = null;
arr = null;
unsafe.freeMemory(addr);
}

@Benchmark
public int native_segment_VH() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
intHandle.set(nativeSegment, (long)k, k + 1);
int v = (int) intHandle.get(nativeSegment, (long)k);
sum += v;
}
return sum;
}

@Benchmark
public int native_segment_static() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
MemoryAccess.setIntAtOffset(nativeSegment, k, k + 1);
int v = MemoryAccess.getIntAtOffset(nativeSegment, k);
sum += v;
}
return sum;
}

@Benchmark
public int heap_segment_ints_VH() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
intHandle.set(heapSegmentBytes, (long)k, k + 1);
int v = (int) intHandle.get(heapSegmentBytes, (long)k);
sum += v;
}
return sum;
}

@Benchmark
public int heap_segment_ints_static() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
MemoryAccess.setIntAtOffset(heapSegmentBytes, k, k + 1);
int v = MemoryAccess.getIntAtOffset(heapSegmentBytes, k);
sum += v;
}
return sum;
}

@Benchmark
public int heap_segment_floats_VH() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
intHandle.set(heapSegmentFloats, (long)k, k + 1);
int v = (int)intHandle.get(heapSegmentFloats, (long)k);
sum += v;
}
return sum;
}

@Benchmark
public int heap_segment_floats_static() {
int sum = 0;
for (int k = 0; k < ELEM_SIZE; k++) {
MemoryAccess.setIntAtOffset(heapSegmentFloats, k, k + 1);
int v = MemoryAccess.getIntAtOffset(heapSegmentFloats, k);
sum += v;
}
return sum;
}

@Benchmark
public int heap_unsafe() {
int sum = 0;
for (int k = 0; k < ALLOC_SIZE; k += 4) {
unsafe.putInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET, k + 1);
int v = unsafe.getInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET);
sum += v;
}
return sum;
}

@Benchmark
public int native_unsafe() {
int sum = 0;
for (int k = 0; k < ALLOC_SIZE; k += 4) {
unsafe.putInt(addr + k, k + 1);
int v = unsafe.getInt(addr + k);
sum += v;
}
return sum;
}
}

0 comments on commit e367491

Please sign in to comment.