Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions src/java.base/share/classes/jdk/internal/foreign/BufferStack.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package jdk.internal.foreign;

import jdk.internal.misc.CarrierThreadLocal;
import jdk.internal.vm.annotation.ForceInline;

import java.lang.foreign.Arena;
import java.lang.foreign.MemoryLayout;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.SegmentAllocator;
import java.lang.ref.Reference;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Consumer;

/**
* A buffer stack that allows efficient reuse of memory segments. This is useful in cases
* where temporary memory is needed.
* <p>
* Use the factories {@code BufferStack.of(...)} to create new instances of this class.
* <p>
* Note: The reused segments are neither zeroed out before nor after re-use.
*/
public final class BufferStack {

private final long byteSize;
private final long byteAlignment;
private final CarrierThreadLocal<PerThread> tl;

private BufferStack(long byteSize, long byteAlignment) {
this.byteSize = byteSize;
this.byteAlignment = byteAlignment;
this.tl = new CarrierThreadLocal<>() {
@Override
protected BufferStack.PerThread initialValue() {
return BufferStack.PerThread.of(byteSize, byteAlignment);
}
};
}

/**
* {@return a new Arena that tries to provide {@code byteSize} and {@code byteAlignment}
* allocations by recycling the BufferStack's internal memory}
*
* @param byteSize to be reserved from this BufferStack's internal memory
* @param byteAlignment to be used for reservation
*/
@ForceInline
public Arena pushFrame(long byteSize, long byteAlignment) {
return tl.get().pushFrame(byteSize, byteAlignment);
}

/**
* {@return a new Arena that tries to provide {@code byteSize}
* allocations by recycling the BufferStack's internal memory}
*
* @param byteSize to be reserved from this BufferStack's internal memory
*/
@ForceInline
public Arena pushFrame(long byteSize) {
return pushFrame(byteSize, 1);
}

/**
* {@return a new Arena that tries to provide {@code layout}
* allocations by recycling the BufferStack's internal memory}
*
* @param layout for which to reserve internal memory
*/
@ForceInline
public Arena pushFrame(MemoryLayout layout) {
return pushFrame(layout.byteSize(), layout.byteAlignment());
}

@Override
public String toString() {
return "BufferStack[byteSize=" + byteSize + ", byteAlignment=" + byteAlignment + "]";
}

private record PerThread(ReentrantLock lock,
Arena arena,
SlicingAllocator stack,
CleanupAction cleanupAction) {

@ForceInline
public Arena pushFrame(long size, long byteAlignment) {
boolean needsLock = Thread.currentThread().isVirtual() && !lock.isHeldByCurrentThread();
if (needsLock && !lock.tryLock()) {
// Rare: another virtual thread on the same carrier competed for acquisition.
return Arena.ofConfined();
}
if (!stack.canAllocate(size, byteAlignment)) {
if (needsLock) lock.unlock();
return Arena.ofConfined();
}
return new Frame(needsLock, size, byteAlignment);
}

static PerThread of(long byteSize, long byteAlignment) {
final Arena arena = Arena.ofAuto();
return new PerThread(new ReentrantLock(),
arena,
new SlicingAllocator(arena.allocate(byteSize, byteAlignment)),
new CleanupAction(arena));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason why you didn't use a lambda here?

Copy link
Contributor

@mcimadamore mcimadamore May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, not a big fan of records here -- it seems that many implementation details such as cleanup action, lock and slicing allocator are "leaked out" to the caller, that is now responsible to set things up correctly. I think a PerThread class with a constructor taking arena, size, alignment would make the code less coupled and more readable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And, if you do that, you then don't need the of factory -- clients can just use new

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason why you didn't use a lambda here?

I also think that CleanupAction should be changed to lambda

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using an anonymous class for the cleanup action had very adverse effects on performance. I didn't want to use a lambda for startup performance reasons.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If using lambda affects performance, how about using anonymous classes?

            return new PerThread(new ReentrantLock(),
                    arena,
                    new SlicingAllocator(arena.allocate(byteSize, byteAlignment)),
                    new Consumer<MemorySegment>() {
                        @Override
                        public void accept(MemorySegment memorySegment) {
                            Reference.reachabilityFence(arena);
                        }});

Copy link
Member

@liach liach May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anonymous classes also captures outer variables and break cleaner/GC. The only safe measures are local enums or records, which never capture outer variables (anonymous classes cannot be enum or records)

}

private record CleanupAction(Arena arena) implements Consumer<MemorySegment> {
@Override
public void accept(MemorySegment memorySegment) {
Reference.reachabilityFence(arena);
}
}

private final class Frame implements Arena {

private final boolean locked;
private final long parentOffset;
private final long topOfStack;
private final Arena confinedArena;
private final SegmentAllocator frame;

@SuppressWarnings("restricted")
@ForceInline
public Frame(boolean locked, long byteSize, long byteAlignment) {
this.locked = locked;
this.parentOffset = stack.currentOffset();
final MemorySegment frameSegment = stack.allocate(byteSize, byteAlignment);
this.topOfStack = stack.currentOffset();
this.confinedArena = Arena.ofConfined();
// The cleanup action will keep the original automatic `arena` (from which
// the reusable segment is first allocated) alive even if this Frame
// becomes unreachable but there are reachable segments still alive.
this.frame = new SlicingAllocator(frameSegment.reinterpret(confinedArena, cleanupAction));
}

@ForceInline
private void assertOrder() {
if (topOfStack != stack.currentOffset())
throw new IllegalStateException("Out of order access: frame not top-of-stack");
}

@ForceInline
@Override
@SuppressWarnings("restricted")
public MemorySegment allocate(long byteSize, long byteAlignment) {
// Make sure we are on the right thread and not closed
MemorySessionImpl.toMemorySession(confinedArena).checkValidState();
return frame.allocate(byteSize, byteAlignment);
}

@ForceInline
@Override
public MemorySegment.Scope scope() {
return confinedArena.scope();
}

@ForceInline
@Override
public void close() {
assertOrder();
// the Arena::close method is called "early" as it checks thread
// confinement and crucially before any mutation of the internal
// state takes place.
confinedArena.close();
stack.resetTo(parentOffset);
if (locked) {
lock.unlock();
}
}
}
}
Comment on lines +103 to +194
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
private record PerThread(ReentrantLock lock,
Arena arena,
SlicingAllocator stack,
CleanupAction cleanupAction) {
@ForceInline
public Arena pushFrame(long size, long byteAlignment) {
boolean needsLock = Thread.currentThread().isVirtual() && !lock.isHeldByCurrentThread();
if (needsLock && !lock.tryLock()) {
// Rare: another virtual thread on the same carrier competed for acquisition.
return Arena.ofConfined();
}
if (!stack.canAllocate(size, byteAlignment)) {
if (needsLock) lock.unlock();
return Arena.ofConfined();
}
return new Frame(needsLock, size, byteAlignment);
}
static PerThread of(long byteSize, long byteAlignment) {
final Arena arena = Arena.ofAuto();
return new PerThread(new ReentrantLock(),
arena,
new SlicingAllocator(arena.allocate(byteSize, byteAlignment)),
new CleanupAction(arena));
}
private record CleanupAction(Arena arena) implements Consumer<MemorySegment> {
@Override
public void accept(MemorySegment memorySegment) {
Reference.reachabilityFence(arena);
}
}
private final class Frame implements Arena {
private final boolean locked;
private final long parentOffset;
private final long topOfStack;
private final Arena confinedArena;
private final SegmentAllocator frame;
@SuppressWarnings("restricted")
@ForceInline
public Frame(boolean locked, long byteSize, long byteAlignment) {
this.locked = locked;
this.parentOffset = stack.currentOffset();
final MemorySegment frameSegment = stack.allocate(byteSize, byteAlignment);
this.topOfStack = stack.currentOffset();
this.confinedArena = Arena.ofConfined();
// The cleanup action will keep the original automatic `arena` (from which
// the reusable segment is first allocated) alive even if this Frame
// becomes unreachable but there are reachable segments still alive.
this.frame = new SlicingAllocator(frameSegment.reinterpret(confinedArena, cleanupAction));
}
@ForceInline
private void assertOrder() {
if (topOfStack != stack.currentOffset())
throw new IllegalStateException("Out of order access: frame not top-of-stack");
}
@ForceInline
@Override
@SuppressWarnings("restricted")
public MemorySegment allocate(long byteSize, long byteAlignment) {
// Make sure we are on the right thread and not closed
MemorySessionImpl.toMemorySession(confinedArena).checkValidState();
return frame.allocate(byteSize, byteAlignment);
}
@ForceInline
@Override
public MemorySegment.Scope scope() {
return confinedArena.scope();
}
@ForceInline
@Override
public void close() {
assertOrder();
// the Arena::close method is called "early" as it checks thread
// confinement and crucially before any mutation of the internal
// state takes place.
confinedArena.close();
stack.resetTo(parentOffset);
if (locked) {
lock.unlock();
}
}
}
}
private record PerThread(ReentrantLock lock,
Arena arena,
SlicingAllocator stack,
CleanupAction cleanupAction) {
@SuppressWarnings("restricted")
@ForceInline
public Arena pushFrame(long size, long byteAlignment) {
boolean needsLock = Thread.currentThread().isVirtual() && !lock.isHeldByCurrentThread();
if (needsLock && !lock.tryLock()) {
// Rare: another virtual thread on the same carrier competed for acquisition.
return Arena.ofConfined();
}
if (!stack.canAllocate(size, byteAlignment)) {
if (needsLock) lock.unlock();
return Arena.ofConfined();
}
long parentOffset = stack.currentOffset();
final MemorySegment frameSegment = stack.allocate(size, byteAlignment);
long topOfStack = stack.currentOffset();
Arena confinedArena = Arena.ofConfined();
// return new Frame(needsLock, size, byte
// The cleanup action will keep the original automatic `arena` (from which
// the reusable segment is first allocated) alive even if this Frame
// becomes unreachable but there are reachable segments still alive.
return new Frame(this, needsLock, parentOffset, topOfStack, confinedArena, new SlicingAllocator(frameSegment.reinterpret(confinedArena, cleanupAction)));
}
static PerThread of(long byteSize, long byteAlignment) {
final Arena arena = Arena.ofAuto();
return new PerThread(new ReentrantLock(),
arena,
new SlicingAllocator(arena.allocate(byteSize, byteAlignment)),
new CleanupAction(arena));
}
private record CleanupAction(Arena arena) implements Consumer<MemorySegment> {
@Override
public void accept(MemorySegment memorySegment) {
Reference.reachabilityFence(arena);
}
}
}
private record Frame(PerThread thead, boolean locked, long parentOffset, long topOfStack, Arena confinedArena, SegmentAllocator frame) implements Arena {
@ForceInline
private void assertOrder() {
if (topOfStack != thead.stack.currentOffset())
throw new IllegalStateException("Out of order access: frame not top-of-stack");
}
@ForceInline
@Override
@SuppressWarnings("restricted")
public MemorySegment allocate(long byteSize, long byteAlignment) {
// Make sure we are on the right thread and not closed
MemorySessionImpl.toMemorySession(confinedArena).checkValidState();
return frame.allocate(byteSize, byteAlignment);
}
@ForceInline
@Override
public MemorySegment.Scope scope() {
return confinedArena.scope();
}
@ForceInline
@Override
public void close() {
assertOrder();
// the Arena::close method is called "early" as it checks thread
// confinement and crucially before any mutation of the internal
// state takes place.
confinedArena.close();
thead.stack.resetTo(parentOffset);
if (locked) {
thead.lock.unlock();
}
}
}

Same as the suggestion above, you changed the code. I updated and remade the suggestion and changed it to two records. What do you think?


public static BufferStack of(long byteSize, long byteAlignment) {
if (byteSize < 0) {
throw new IllegalArgumentException("Negative byteSize: " + byteSize);
}
if (byteAlignment < 0) {
throw new IllegalArgumentException("Negative byteAlignment: " + byteAlignment);
}
return new BufferStack(byteSize, byteAlignment);
}

public static BufferStack of(long byteSize) {
return new BufferStack(byteSize, 1);
}

public static BufferStack of(MemoryLayout layout) {
// Implicit null check
return of(layout.byteSize(), layout.byteAlignment());
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -38,6 +38,22 @@ public SlicingAllocator(MemorySegment segment) {
this.segment = segment;
}

public long currentOffset() {
return sp;
}

public void resetTo(long offset) {
if (offset < 0 || offset > sp)
throw new IllegalArgumentException(String.format("offset %d should be in [0, %d] ", offset, sp));
this.sp = offset;
}

public boolean canAllocate(long byteSize, long byteAlignment) {
long min = segment.address();
long start = Utils.alignUp(min + sp, byteAlignment) - min;
return start + byteSize <= segment.byteSize();
}

MemorySegment trySlice(long byteSize, long byteAlignment) {
long min = segment.address();
long start = Utils.alignUp(min + sp, byteAlignment) - min;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
*/
package jdk.internal.foreign.abi;

import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.JavaLangInvokeAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.foreign.BufferStack;
import jdk.internal.foreign.CABI;
import jdk.internal.foreign.abi.AbstractLinker.UpcallStubFactory;
import jdk.internal.foreign.abi.aarch64.linux.LinuxAArch64Linker;
Expand Down Expand Up @@ -390,26 +390,12 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) {
: chunkOffset;
}

public static Arena newBoundedArena(long size) {
return new Arena() {
final Arena arena = Arena.ofConfined();
final SegmentAllocator slicingAllocator = SegmentAllocator.slicingAllocator(arena.allocate(size));

@Override
public Scope scope() {
return arena.scope();
}
private static final int LINKER_STACK_SIZE = Integer.getInteger("jdk.internal.foreign.LINKER_STACK_SIZE", 256);
private static final BufferStack LINKER_STACK = BufferStack.of(LINKER_STACK_SIZE, 1);

@Override
public void close() {
arena.close();
}

@Override
public MemorySegment allocate(long byteSize, long byteAlignment) {
return slicingAllocator.allocate(byteSize, byteAlignment);
}
};
@ForceInline
public static Arena newBoundedArena(long size) {
return LINKER_STACK.pushFrame(size, 8);
}

public static Arena newEmptyArena() {
Expand Down
2 changes: 2 additions & 0 deletions test/jdk/ProblemList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,8 @@ jdk/jfr/jvm/TestWaste.java 8282427 generic-

# jdk_foreign

java/foreign/TestBufferStackStress.java 8350455 macosx-all

############################################################################
# Client manual tests

Expand Down
Loading