Skip to content

Commit

Permalink
[WebAssembly] Support for atomic fences
Browse files Browse the repository at this point in the history
Summary:
This adds support for translation of LLVM IR fence instruction. We
convert a singlethread fence to a pseudo compiler barrier which becomes
0 instructions in final binary, and a thread fence to an idempotent
atomicrmw instruction to a memory address.

Reviewers: dschuff, jfb, sunfish, tlively

Subscribers: sbc100, jgravelle-google, llvm-commits

Differential Revision: https://reviews.llvm.org/D50277

llvm-svn: 361884
  • Loading branch information
aheejin committed May 28, 2019
1 parent 81748ba commit 5514658
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 4 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
Expand Up @@ -369,6 +369,10 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->AddBlankLine();
}
break;
case WebAssembly::COMPILER_FENCE:
// This is a compiler barrier that prevents instruction reordering during
// backend compilation, and should not be emitted.
break;
case WebAssembly::EXTRACT_EXCEPTION_I32:
case WebAssembly::EXTRACT_EXCEPTION_I32_S:
// These are pseudo instructions that simulates popping values from stack.
Expand Down
97 changes: 93 additions & 4 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
Expand Up @@ -77,14 +77,103 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
return;
}

// Few custom selection stuff. If we need WebAssembly-specific selection,
// uncomment this block add corresponding case statements.
/*
// Few custom selection stuff.
SDLoc DL(Node);
MachineFunction &MF = CurDAG->getMachineFunction();
switch (Node->getOpcode()) {
case ISD::ATOMIC_FENCE: {
if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
break;

uint64_t SyncScopeID =
cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
switch (SyncScopeID) {
case SyncScope::SingleThread: {
// We lower a single-thread fence to a pseudo compiler barrier instruction
// preventing instruction reordering. This will not be emitted in final
// binary.
MachineSDNode *Fence =
CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
DL, // debug loc
MVT::Other, // outchain type
Node->getOperand(0) // inchain
);
ReplaceNode(Node, Fence);
CurDAG->RemoveDeadNode(Node);
return;
}

case SyncScope::System: {
// For non-emscripten systems, we have not decided on what we should
// traslate fences to yet.
if (!Subtarget->getTargetTriple().isOSEmscripten())
report_fatal_error(
"ATOMIC_FENCE is not yet supported in non-emscripten OSes");

// Wasm does not have a fence instruction, but because all atomic
// instructions in wasm are sequentially consistent, we translate a
// fence to an idempotent atomic RMW instruction to a linear memory
// address. All atomic instructions in wasm are sequentially consistent,
// but this is to ensure a fence also prevents reordering of non-atomic
// instructions in the VM. Even though LLVM IR's fence instruction does
// not say anything about its relationship with non-atomic instructions,
// we think this is more user-friendly.
//
// While any address can work, here we use a value stored in
// __stack_pointer wasm global because there's high chance that area is
// in cache.
//
// So the selected instructions will be in the form of:
// %addr = get_global $__stack_pointer
// %0 = i32.const 0
// i32.atomic.rmw.or %addr, %0
SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
"__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
MachineSDNode *GetGlobal =
CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
DL, // debug loc
MVT::i32, // result type
StackPtrSym // __stack_pointer symbol
);

SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
auto *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getUnknownStack(MF),
// FIXME Volatile isn't really correct, but currently all LLVM
// atomic instructions are treated as volatiles in the backend, so
// we should be consistent.
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
MachineMemOperand::MOStore,
4, 4, AAMDNodes(), nullptr, SyncScope::System,
AtomicOrdering::SequentiallyConsistent);
MachineSDNode *Const0 =
CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
WebAssembly::ATOMIC_RMW_OR_I32, // opcode
DL, // debug loc
MVT::i32, // result type
MVT::Other, // outchain type
{
Zero, // alignment
Zero, // offset
SDValue(GetGlobal, 0), // __stack_pointer
SDValue(Const0, 0), // OR with 0 to make it idempotent
Node->getOperand(0) // inchain
});

CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
CurDAG->RemoveDeadNode(Node);
return;
}
default:
llvm_unreachable("Unknown scope!");
}
}

default:
break;
}
*/

// Select the default instruction.
SelectCode(Node);
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
Expand Up @@ -887,3 +887,13 @@ defm : TerRMWTruncExtPattern<
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
ATOMIC_RMW32_U_CMPXCHG_I64>;

//===----------------------------------------------------------------------===//
// Atomic fences
//===----------------------------------------------------------------------===//

// A compiler fence instruction that prevents reordering of instructions.
let Defs = [ARGUMENTS] in {
let isPseudo = 1, hasSideEffects = 1 in
defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
} // Defs = [ARGUMENTS]
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/WebAssembly/atomic-fence.ll
@@ -0,0 +1,47 @@
; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes

; A multithread fence turns into 'global.get $__stack_pointer' followed by an
; idempotent atomicrmw instruction.
; CHECK-LABEL: multithread_fence:
; CHECK: global.get $push[[SP:[0-9]+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop[[SP]]), $pop[[ZERO]]
; NOATOMIC-NOT: i32.atomic.rmw.or
define void @multithread_fence() {
fence seq_cst
ret void
}

; Fences with weaker memory orderings than seq_cst should be treated the same
; because atomic memory access in wasm are sequentially consistent.
; CHECK-LABEL: multithread_weak_fence:
; CHECK: global.get $push{{.+}}=, __stack_pointer
; CHECK: i32.atomic.rmw.or
; CHECK: i32.atomic.rmw.or
; CHECK: i32.atomic.rmw.or
define void @multithread_weak_fence() {
fence acquire
fence release
fence acq_rel
ret void
}

; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
; that acts as a compiler barrier. The barrier should not be emitted to .s file.
; CHECK-LABEL: singlethread_fence:
; CHECK-NOT: compiler_fence
define void @singlethread_fence() {
fence syncscope("singlethread") seq_cst
fence syncscope("singlethread") acquire
fence syncscope("singlethread") release
fence syncscope("singlethread") acq_rel
ret void
}

0 comments on commit 5514658

Please sign in to comment.