Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -4899,6 +4899,11 @@ def CIR_AtomicFetchKind : CIR_I32EnumAttr<
I32EnumAttrCase<"Min", 7, "min">
]>;

def CIR_SyncScopeKind : CIR_I32EnumAttr<"SyncScopeKind", "sync scope kind", [
I32EnumAttrCase<"SingleThread", 0, "single_thread">,
I32EnumAttrCase<"System", 1, "system">
]>;

def CIR_AtomicFetchOp : CIR_Op<"atomic.fetch", [
AllTypesMatch<["result", "val"]>,
TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
Expand Down Expand Up @@ -5122,6 +5127,36 @@ def CIR_AtomicClearOp : CIR_Op<"atomic.clear"> {
}];
}

def CIR_AtomicFence : CIR_Op<"atomic.fence"> {
let summary = "Atomic thread fence";
let description = [{
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
`__atomic_thread_fence` which enforces memory ordering constraints across
threads within the specified synchronization scope.

This handles all variations including:
- `__atomic_thread_fence`
- `__atomic_signal_fence`
- `__c11_atomic_thread_fence`
- `__c11_atomic_signal_fence`

Example:
```mlir
cir.atomic.fence syncscope(system) seq_cst
cir.atomic.fence syncscope(single_thread) seq_cst
```
}];

let arguments = (ins
Arg<CIR_MemOrder, "memory order">:$ordering,
OptionalAttr<CIR_SyncScopeKind>:$syncscope
);

let assemblyFormat = [{
(`syncscope` `(` $syncscope^ `)`)? $ordering attr-dict
}];
}

//===----------------------------------------------------------------------===//
// BlockAddressOp
//===----------------------------------------------------------------------===//
Expand Down
28 changes: 28 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
return RValue::get(result);
}

static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
const CallExpr *expr,
cir::SyncScopeKind syncScope) {
auto &builder = cgf.getBuilder();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto &builder = cgf.getBuilder();
CIRGenBuilderTy &builder = cgf.getBuilder();

mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));

auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>();
if (!constOrdering)
llvm_unreachable("NYI: variable ordering not supported");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well this is not the intended way to mark NYI features. You should instead add a missing feature guard to MissingFeatures.h, and change code here to:

if (!constOrdering) {
  assert(!cir::MissingFeatures::atomicVariableOrdering());
  return {};
}


if (auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen if the condition does not hold?

cir::MemOrder ordering =
static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
Comment on lines +72 to +73
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cir::MemOrder ordering =
static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
auto ordering = static_cast<cir::MemOrder>(constOrderingAttr.getUInt());


cir::AtomicFence::create(
builder, cgf.getLoc(expr->getSourceRange()), ordering,
cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
}

return {};
}

RValue CIRGenFunction::emitRotate(const CallExpr *e, bool isRotateLeft) {
mlir::Value input = emitScalarExpr(e->getArg(0));
mlir::Value amount = emitScalarExpr(e->getArg(1));
Expand Down Expand Up @@ -612,6 +634,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
builder.createIsFPClass(loc, v, cir::FPClassTest(test)),
convertType(e->getType())));
}
case Builtin::BI__atomic_thread_fence:
return RValue::get(
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::System));
case Builtin::BI__atomic_signal_fence:
return RValue::get(
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::SingleThread));
}

// If this is an alias for a lib function (e.g. __builtin_sin), emit
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) {
llvm_unreachable("unknown memory order");
}

static std::optional<llvm::StringRef>
getLLVMSyncScope(std::optional<cir::SyncScopeKind> syncScope) {
if (syncScope.has_value())
return syncScope.value() == cir::SyncScopeKind::SingleThread
? "singlethread"
: "";
return std::nullopt;
}

mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite(
cir::AtomicCmpXchgOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down Expand Up @@ -826,6 +835,19 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
cir::AtomicFence op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getOrdering());

auto fence = mlir::LLVM::FenceOp::create(rewriter, op.getLoc(), llvmOrder);
fence.setSyncscope(getLLVMSyncScope(adaptor.getSyncscope()));

rewriter.replaceOp(op, fence);

return mlir::success();
}

static mlir::LLVM::AtomicBinOp
getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) {
switch (k) {
Expand Down
181 changes: 181 additions & 0 deletions clang/test/CIR/CodeGen/atomic-thread-fence.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s

struct Data {
int value;
void *ptr;
};

typedef struct Data *DataPtr;

void applyThreadFence() {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
// CIR-LABEL: @applyThreadFence
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: cir.return

// LLVM-LABEL: @applyThreadFence
// LLVM: fence seq_cst
// LLVM: ret void

// OGCG-LABEL: @applyThreadFence
// OGCG: fence seq_cst
// OGCG: ret void
}

void applySignalFence() {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
// CIR-LABEL: @applySignalFence
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: cir.return

// LLVM-LABEL: @applySignalFence
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: ret void

// OGCG-LABEL: @applySignalFence
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: ret void
}

void modifyWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
d->value = 42;
// CIR-LABEL: @modifyWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 8
// LLVM: ret void

// OGCG-LABEL: @modifyWithThreadFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: fence seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// OGCG: store i32 42, ptr %[[DATA_VALUE]], align 8
// OGCG: ret void
}

void modifyWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
d->value = 24;
// CIR-LABEL: @modifyWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 8
// LLVM: ret void

// OGCG-LABEL: @modifyWithSignalFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// OGCG: store i32 24, ptr %[[DATA_VALUE]], align 8
// OGCG: ret void
}

void loadWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
// CIR-LABEL: @loadWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast bitcast %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void

// OGCG-LABEL: @loadWithThreadFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
// OGCG: fence seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// OGCG: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
// OGCG: ret void
}

void loadWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
// CIR-LABEL: @loadWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void

// OGCG-LABEL: @loadWithSignalFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// OGCG: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
// OGCG: ret void
}