14 changes: 6 additions & 8 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1194,13 +1194,14 @@ def AtomicReadOp : OpenMP_Op<"atomic.read", [AllTypesMatch<["x", "v"]>]> {

let arguments = (ins OpenMP_PointerLikeType:$x,
OpenMP_PointerLikeType:$v,
TypeAttr:$element_type,
DefaultValuedOptionalAttr<I64Attr, "0">:$hint_val,
OptionalAttr<MemoryOrderKindAttr>:$memory_order_val);
let assemblyFormat = [{
$v `=` $x
oilist( `memory_order` `(` custom<ClauseAttr>($memory_order_val) `)`
| `hint` `(` custom<SynchronizationHint>($hint_val) `)`)
`:` type($x) attr-dict
`:` type($x) `,` $element_type attr-dict
}];
let hasVerifier = 1;
let extraClassDeclaration = [{
Expand Down Expand Up @@ -1535,11 +1536,7 @@ def ReductionDeclareOp : OpenMP_Op<"reduction.declare", [Symbol,
// 2.19.5.4 reduction clause
//===----------------------------------------------------------------------===//

def ReductionOp : OpenMP_Op<"reduction", [
TypesMatchWith<"value types matches accumulator element type",
"accumulator", "operand",
"$_self.cast<::mlir::omp::PointerLikeType>().getElementType()">
]> {
def ReductionOp : OpenMP_Op<"reduction"> {
let summary = "reduction construct";
let description = [{
Indicates the value that is produced by the current reduction-participating
Expand All @@ -1549,8 +1546,9 @@ def ReductionOp : OpenMP_Op<"reduction", [
}];

let arguments= (ins AnyType:$operand, OpenMP_PointerLikeType:$accumulator);
let assemblyFormat =
"$operand `,` $accumulator attr-dict `:` type($accumulator)";
let assemblyFormat = [{
$operand `,` $accumulator attr-dict `:` type($operand) `,` type($accumulator)
}];
let hasVerifier = 1;
}

Expand Down
4 changes: 3 additions & 1 deletion mlir/include/mlir/Dialect/OpenMP/OpenMPTypeInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def OpenMP_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {

let methods = [
InterfaceMethod<
/*description=*/"Returns the pointee type.",
/*description=*/[{
Returns the pointee type or null if the pointer has no pointee type
}],
/*retTy=*/"::mlir::Type",
/*methodName=*/"getElementType"
>,
Expand Down
340 changes: 226 additions & 114 deletions mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ static LogicalResult verifyReductionVarList(Operation *op,
if (!accumulators.insert(accum).second)
return op->emitOpError() << "accumulator variable used more than once";

Type varType = accum.getType().cast<PointerLikeType>();
Type varType = accum.getType();
auto symbolRef = std::get<1>(args).cast<SymbolRefAttr>();
auto decl =
SymbolTable::lookupNearestSymbolFrom<ReductionDeclareOp>(op, symbolRef);
Expand Down Expand Up @@ -932,7 +932,8 @@ LogicalResult ReductionDeclareOp::verifyRegions() {
"arguments of the same type";
auto ptrType = atomicReductionEntryBlock.getArgumentTypes()[0]
.dyn_cast<PointerLikeType>();
if (!ptrType || ptrType.getElementType() != getType())
if (!ptrType ||
(ptrType.getElementType() && ptrType.getElementType() != getType()))
return emitOpError() << "expects atomic reduction region arguments to "
"be accumulators containing the reduction type";
return success();
Expand Down Expand Up @@ -1115,8 +1116,9 @@ LogicalResult AtomicWriteOp::verify() {
"memory-order must not be acq_rel or acquire for atomic writes");
}
}
if (getAddress().getType().cast<PointerLikeType>().getElementType() !=
getValue().getType())
Type elementType =
getAddress().getType().cast<PointerLikeType>().getElementType();
if (elementType && elementType != getValue().getType())
return emitError("address must dereference to value type");
return verifySynchronizationHint(*this, getHintVal());
}
Expand Down Expand Up @@ -1166,8 +1168,8 @@ LogicalResult AtomicUpdateOp::verify() {
if (getRegion().getNumArguments() != 1)
return emitError("the region must accept exactly one argument");

if (getX().getType().cast<PointerLikeType>().getElementType() !=
getRegion().getArgument(0).getType()) {
Type elementType = getX().getType().cast<PointerLikeType>().getElementType();
if (elementType && elementType != getRegion().getArgument(0).getType()) {
return emitError("the type of the operand must be a pointer type whose "
"element type is the same as that of the region argument");
}
Expand Down
39 changes: 13 additions & 26 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,10 +759,8 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
for (unsigned i = 0; i < numReductions; ++i) {
auto reductionType =
loop.getReductionVars()[i].getType().cast<LLVM::LLVMPointerType>();
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionType.getElementType()));
moduleTranslation.convertType(reductionDecls[i].getType()));
privateReductionVariables.push_back(var);
reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
}
Expand Down Expand Up @@ -893,14 +891,11 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
if (owningAtomicReductionGens[i])
atomicGen = owningAtomicReductionGens[i];
auto reductionType =
loop.getReductionVars()[i].getType().cast<LLVM::LLVMPointerType>();
llvm::Value *variable =
moduleTranslation.lookupValue(loop.getReductionVars()[i]);
reductionInfos.push_back(
{moduleTranslation.convertType(reductionType.getElementType()),
variable, privateReductionVariables[i], owningReductionGens[i],
atomicGen});
{moduleTranslation.convertType(reductionDecls[i].getType()), variable,
privateReductionVariables[i], owningReductionGens[i], atomicGen});
}

// The call to createReductions below expects the block to have a
Expand Down Expand Up @@ -1041,15 +1036,13 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,

llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
Type xTy =
readOp.getX().getType().cast<omp::PointerLikeType>().getElementType();
llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
Type vTy =
readOp.getV().getType().cast<omp::PointerLikeType>().getElementType();
llvm::OpenMPIRBuilder::AtomicOpValue V = {
v, moduleTranslation.convertType(vTy), false, false};
llvm::OpenMPIRBuilder::AtomicOpValue X = {
x, moduleTranslation.convertType(xTy), false, false};

llvm::Type *elementType =
moduleTranslation.convertType(readOp.getElementType());

llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
return success();
}
Expand Down Expand Up @@ -1120,10 +1113,8 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
: innerUpdateOp.getOperand(0));
llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.getX());
LLVM::LLVMPointerType mlirXType =
opInst.getX().getType().cast<LLVM::LLVMPointerType>();
llvm::Type *llvmXElementType =
moduleTranslation.convertType(mlirXType.getElementType());
llvm::Type *llvmXElementType = moduleTranslation.convertType(
opInst.getRegion().getArgument(0).getType());
llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
/*isSigned=*/false,
/*isVolatile=*/false};
Expand Down Expand Up @@ -1208,12 +1199,8 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
llvm::Value *llvmV =
moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
auto mlirXType = atomicCaptureOp.getAtomicReadOp()
.getX()
.getType()
.cast<LLVM::LLVMPointerType>();
llvm::Type *llvmXElementType =
moduleTranslation.convertType(mlirXType.getElementType());
llvm::Type *llvmXElementType = moduleTranslation.convertType(
atomicCaptureOp.getAtomicReadOp().getElementType());
llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
/*isSigned=*/false,
/*isVolatile=*/false};
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -convert-async-to-llvm | FileCheck %s
// RUN: mlir-opt %s -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s

// CHECK-LABEL: @coro_id
func.func @coro_id() {
// CHECK: %0 = llvm.mlir.constant(0 : i32) : i32
// CHECK: %1 = llvm.mlir.null : !llvm.ptr<i8>
// CHECK: %1 = llvm.mlir.null : !llvm.ptr
// CHECK: %2 = llvm.intr.coro.id %0, %1, %1, %1 : !llvm.token
%0 = async.coro.id
return
Expand Down
10 changes: 4 additions & 6 deletions mlir/test/Conversion/AsyncToLLVM/convert-runtime-to-llvm.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -convert-async-to-llvm | FileCheck %s --dump-input=always
// RUN: mlir-opt %s -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s --dump-input=always

// CHECK-LABEL: @create_token
func.func @create_token() {
Expand All @@ -9,7 +9,7 @@ func.func @create_token() {

// CHECK-LABEL: @create_value
func.func @create_value() {
// CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr<f32>
// CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr
// CHECK: %[[OFFSET:.*]] = llvm.getelementptr %[[NULL]][1]
// CHECK: %[[SIZE:.*]] = llvm.ptrtoint %[[OFFSET]]
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue(%[[SIZE]])
Expand Down Expand Up @@ -152,8 +152,7 @@ func.func @store() {
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
%1 = async.runtime.create : !async.value<f32>
// CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr<i8> to !llvm.ptr<f32>
// CHECK: llvm.store %[[CST]], %[[P1]]
// CHECK: llvm.store %[[CST]], %[[P0]] : f32, !llvm.ptr
async.runtime.store %0, %1 : !async.value<f32>
return
}
Expand All @@ -163,8 +162,7 @@ func.func @load() -> f32 {
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
%0 = async.runtime.create : !async.value<f32>
// CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr<i8> to !llvm.ptr<f32>
// CHECK: %[[VALUE:.*]] = llvm.load %[[P1]]
// CHECK: %[[VALUE:.*]] = llvm.load %[[P0]] : !llvm.ptr -> f32
%1 = async.runtime.load %0 : !async.value<f32>
// CHECK: return %[[VALUE]] : f32
return %1 : f32
Expand Down
27 changes: 12 additions & 15 deletions mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm | FileCheck %s
// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s

// CHECK-LABEL: reference_counting
func.func @reference_counting(%arg0: !async.token) {
Expand Down Expand Up @@ -35,7 +35,7 @@ func.func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) {

// Function outlined from the async.execute operation.
// CHECK-LABEL: func private @async_execute_fn(%arg0: f32, %arg1: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr<i8>
// CHECK-SAME: -> !llvm.ptr

// Create token for return op, and mark a function as a coroutine.
// CHECK: %[[RET:.*]] = call @mlirAsyncRuntimeCreateToken()
Expand Down Expand Up @@ -97,7 +97,7 @@ func.func @nested_async_execute(%arg0: f32, %arg1: f32, %arg2: memref<1xf32>) {

// Function outlined from the inner async.execute operation.
// CHECK-LABEL: func private @async_execute_fn(%arg0: f32, %arg1: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr<i8>
// CHECK-SAME: -> !llvm.ptr
// CHECK: %[[RET_0:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL_0:.*]] = llvm.intr.coro.begin
// CHECK: call @mlirAsyncRuntimeExecute
Expand All @@ -108,7 +108,7 @@ func.func @nested_async_execute(%arg0: f32, %arg1: f32, %arg2: memref<1xf32>) {

// Function outlined from the outer async.execute operation.
// CHECK-LABEL: func private @async_execute_fn_0(%arg0: f32, %arg1: memref<1xf32>, %arg2: f32)
// CHECK-SAME: -> !llvm.ptr<i8>
// CHECK-SAME: -> !llvm.ptr
// CHECK: %[[RET_1:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL_1:.*]] = llvm.intr.coro.begin

Expand Down Expand Up @@ -147,7 +147,7 @@ func.func @async_execute_token_dependency(%arg0: f32, %arg1: memref<1xf32>) {

// Function outlined from the first async.execute operation.
// CHECK-LABEL: func private @async_execute_fn(%arg0: f32, %arg1: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr<i8>
// CHECK-SAME: -> !llvm.ptr
// CHECK: %[[RET_0:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL_0:.*]] = llvm.intr.coro.begin
// CHECK: call @mlirAsyncRuntimeExecute
Expand All @@ -156,8 +156,8 @@ func.func @async_execute_token_dependency(%arg0: f32, %arg1: memref<1xf32>) {
// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_0]])

// Function outlined from the second async.execute operation with dependency.
// CHECK-LABEL: func private @async_execute_fn_0(%arg0: !llvm.ptr<i8>, %arg1: f32, %arg2: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr<i8>
// CHECK-LABEL: func private @async_execute_fn_0(%arg0: !llvm.ptr, %arg1: f32, %arg2: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr
// CHECK: %[[RET_1:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL_1:.*]] = llvm.intr.coro.begin

Expand Down Expand Up @@ -200,7 +200,7 @@ func.func @async_group_await_all(%arg0: f32, %arg1: memref<1xf32>) {
}

// Function outlined from the async.execute operation.
// CHECK: func private @async_execute_fn_0(%arg0: !llvm.ptr<i8>)
// CHECK: func private @async_execute_fn_0(%arg0: !llvm.ptr)
// CHECK: %[[RET_1:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL_1:.*]] = llvm.intr.coro.begin

Expand All @@ -227,8 +227,7 @@ func.func @execute_and_return_f32() -> f32 {
}

// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[RET]]#1)
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: %[[LOADED:.*]] = llvm.load %[[ST_F32]] : !llvm.ptr<f32>
// CHECK: %[[LOADED:.*]] = llvm.load %[[STORAGE]] : !llvm.ptr -> f32
%0 = async.await %result : !async.value<f32>

return %0 : f32
Expand All @@ -247,8 +246,7 @@ func.func @execute_and_return_f32() -> f32 {
// Emplace result value.
// CHECK: %[[CST:.*]] = arith.constant 1.230000e+02 : f32
// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: llvm.store %[[CST]], %[[ST_F32]] : !llvm.ptr<f32>
// CHECK: llvm.store %[[CST]], %[[STORAGE]] : f32, !llvm.ptr
// CHECK: call @mlirAsyncRuntimeEmplaceValue(%[[VALUE]])

// Emplace result token.
Expand Down Expand Up @@ -280,7 +278,7 @@ func.func @async_value_operands() {
// CHECK-LABEL: func private @async_execute_fn()

// Function outlined from the second async.execute operation.
// CHECK-LABEL: func private @async_execute_fn_0(%arg0: !llvm.ptr<i8>)
// CHECK-LABEL: func private @async_execute_fn_0(%arg0: !llvm.ptr)
// CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin

Expand All @@ -295,8 +293,7 @@ func.func @async_value_operands() {

// Get the operand value storage, cast to f32 and add the value.
// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%arg0)
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: %[[LOADED:.*]] = llvm.load %[[ST_F32]] : !llvm.ptr<f32>
// CHECK: %[[LOADED:.*]] = llvm.load %[[STORAGE]] : !llvm.ptr -> f32
// CHECK: arith.addf %[[LOADED]], %[[LOADED]] : f32

// Emplace result token.
Expand Down
138 changes: 138 additions & 0 deletions mlir/test/Conversion/AsyncToLLVM/typed-pointers.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm='use-opaque-pointers=0' | FileCheck %s



// CHECK-LABEL: @store
func.func @store() {
// CHECK: %[[CST:.*]] = arith.constant 1.0
%0 = arith.constant 1.0 : f32
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
%1 = async.runtime.create : !async.value<f32>
// CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr<i8> to !llvm.ptr<f32>
// CHECK: llvm.store %[[CST]], %[[P1]]
async.runtime.store %0, %1 : !async.value<f32>
return
}

// CHECK-LABEL: @load
func.func @load() -> f32 {
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
%0 = async.runtime.create : !async.value<f32>
// CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr<i8> to !llvm.ptr<f32>
// CHECK: %[[VALUE:.*]] = llvm.load %[[P1]]
%1 = async.runtime.load %0 : !async.value<f32>
// CHECK: return %[[VALUE]] : f32
return %1 : f32
}

// -----

// CHECK-LABEL: execute_no_async_args
func.func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) {
// CHECK: %[[TOKEN:.*]] = call @async_execute_fn(%arg0, %arg1)
%token = async.execute {
%c0 = arith.constant 0 : index
memref.store %arg0, %arg1[%c0] : memref<1xf32>
async.yield
}
// CHECK: call @mlirAsyncRuntimeAwaitToken(%[[TOKEN]])
// CHECK: %[[IS_ERROR:.*]] = call @mlirAsyncRuntimeIsTokenError(%[[TOKEN]])
// CHECK: %[[TRUE:.*]] = arith.constant true
// CHECK: %[[NOT_ERROR:.*]] = arith.xori %[[IS_ERROR]], %[[TRUE]] : i1
// CHECK: cf.assert %[[NOT_ERROR]]
// CHECK-NEXT: return
async.await %token : !async.token
return
}

// Function outlined from the async.execute operation.
// CHECK-LABEL: func private @async_execute_fn(%arg0: f32, %arg1: memref<1xf32>)
// CHECK-SAME: -> !llvm.ptr<i8>

// Create token for return op, and mark a function as a coroutine.
// CHECK: %[[RET:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin

// Pass a suspended coroutine to the async runtime.
// CHECK: %[[STATE:.*]] = llvm.intr.coro.save
// CHECK: %[[RESUME:.*]] = llvm.mlir.addressof @__resume
// CHECK: call @mlirAsyncRuntimeExecute(%[[HDL]], %[[RESUME]])
// CHECK: %[[SUSPENDED:.*]] = llvm.intr.coro.suspend %[[STATE]]

// Decide the next block based on the code returned from suspend.
// CHECK: %[[SEXT:.*]] = llvm.sext %[[SUSPENDED]] : i8 to i32
// CHECK: llvm.switch %[[SEXT]] : i32, ^[[SUSPEND:[b0-9]+]]
// CHECK-NEXT: 0: ^[[RESUME:[b0-9]+]]
// CHECK-NEXT: 1: ^[[CLEANUP:[b0-9]+]]

// Resume coroutine after suspension.
// CHECK: ^[[RESUME]]:
// CHECK: memref.store %arg0, %arg1[%c0] : memref<1xf32>
// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET]])

// Delete coroutine.
// CHECK: ^[[CLEANUP]]:
// CHECK: %[[MEM:.*]] = llvm.intr.coro.free
// CHECK: llvm.call @free(%[[MEM]])

// Suspend coroutine, and also a return statement for ramp function.
// CHECK: ^[[SUSPEND]]:
// CHECK: llvm.intr.coro.end
// CHECK: return %[[RET]]

// -----

// CHECK-LABEL: execute_and_return_f32
func.func @execute_and_return_f32() -> f32 {
// CHECK: %[[RET:.*]]:2 = call @async_execute_fn
%token, %result = async.execute -> !async.value<f32> {
%c0 = arith.constant 123.0 : f32
async.yield %c0 : f32
}

// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[RET]]#1)
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: %[[LOADED:.*]] = llvm.load %[[ST_F32]] : !llvm.ptr<f32>
%0 = async.await %result : !async.value<f32>

return %0 : f32
}

// Function outlined from the async.execute operation.
// CHECK-LABEL: func private @async_execute_fn()
// CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin

// Suspend coroutine in the beginning.
// CHECK: call @mlirAsyncRuntimeExecute(%[[HDL]],
// CHECK: llvm.intr.coro.suspend

// Emplace result value.
// CHECK: %[[CST:.*]] = arith.constant 1.230000e+02 : f32
// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: llvm.store %[[CST]], %[[ST_F32]] : !llvm.ptr<f32>
// CHECK: call @mlirAsyncRuntimeEmplaceValue(%[[VALUE]])

// Emplace result token.
// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[TOKEN]])

// -----

// CHECK-LABEL: @await_and_resume_group
func.func @await_and_resume_group() {
%c = arith.constant 1 : index
%0 = async.coro.id
// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin
%1 = async.coro.begin %0
// CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateGroup
%2 = async.runtime.create_group %c : !async.group
// CHECK: %[[RESUME:.*]] = llvm.mlir.addressof @__resume
// CHECK: call @mlirAsyncRuntimeAwaitAllInGroupAndExecute
// CHECK-SAME: (%[[TOKEN]], %[[HDL]], %[[RESUME]])
async.runtime.await_and_resume %2, %1 : !async.group
return
}
2 changes: 1 addition & 1 deletion mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func.func @atomic_write(%a: !llvm.ptr<i32>) -> () {
// CHECK: (%[[ARG0:.*]]: !llvm.ptr<i32>, %[[ARG1:.*]]: !llvm.ptr<i32>)
// CHECK: omp.atomic.read %[[ARG1]] = %[[ARG0]] memory_order(acquire) hint(contended) : !llvm.ptr<i32>
func.func @atomic_read(%a: !llvm.ptr<i32>, %b: !llvm.ptr<i32>) -> () {
omp.atomic.read %b = %a memory_order(acquire) hint(contended) : !llvm.ptr<i32>
omp.atomic.read %b = %a memory_order(acquire) hint(contended) : !llvm.ptr<i32>, i32
return
}

Expand Down
48 changes: 24 additions & 24 deletions mlir/test/Dialect/OpenMP/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
for (%iv) : index = (%lb) to (%ub) step (%step) {
%2 = arith.constant 2.0 : f32
// expected-error @below {{accumulator is not used by the parent}}
omp.reduction %2, %1 : !llvm.ptr<f32>
omp.reduction %2, %1 : f32, !llvm.ptr<f32>
omp.yield
}
return
Expand All @@ -474,7 +474,7 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
omp.wsloop reduction(@foo -> %0 : !llvm.ptr<f32>)
for (%iv) : index = (%lb) to (%ub) step (%step) {
%2 = arith.constant 2.0 : f32
omp.reduction %2, %1 : !llvm.ptr<f32>
omp.reduction %2, %1 : f32, !llvm.ptr<f32>
omp.yield
}
return
Expand Down Expand Up @@ -502,7 +502,7 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %0 : !llvm.ptr<f32>)
for (%iv) : index = (%lb) to (%ub) step (%step) {
%2 = arith.constant 2.0 : f32
omp.reduction %2, %0 : !llvm.ptr<f32>
omp.reduction %2, %0 : f32, !llvm.ptr<f32>
omp.yield
}
return
Expand Down Expand Up @@ -535,7 +535,7 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
omp.wsloop reduction(@add_f32 -> %mem : memref<1xf32>)
for (%iv) : index = (%lb) to (%ub) step (%step) {
%2 = arith.constant 2.0 : f32
omp.reduction %2, %mem : memref<1xf32>
omp.reduction %2, %mem : f32, memref<1xf32>
omp.yield
}
return
Expand Down Expand Up @@ -630,55 +630,55 @@ func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec

func.func @omp_atomic_read1(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{the hints omp_sync_hint_nonspeculative and omp_sync_hint_speculative cannot be combined.}}
omp.atomic.read %v = %x hint(speculative, nonspeculative) : memref<i32>
omp.atomic.read %v = %x hint(speculative, nonspeculative) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read2(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{invalid clause value: 'xyz'}}
omp.atomic.read %v = %x memory_order(xyz) : memref<i32>
omp.atomic.read %v = %x memory_order(xyz) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read3(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{memory-order must not be acq_rel or release for atomic reads}}
omp.atomic.read %v = %x memory_order(acq_rel) : memref<i32>
omp.atomic.read %v = %x memory_order(acq_rel) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read4(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{memory-order must not be acq_rel or release for atomic reads}}
omp.atomic.read %v = %x memory_order(release) : memref<i32>
omp.atomic.read %v = %x memory_order(release) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read5(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{`memory_order` clause can appear at most once in the expansion of the oilist directive}}
omp.atomic.read %v = %x memory_order(acquire) memory_order(relaxed) : memref<i32>
omp.atomic.read %v = %x memory_order(acquire) memory_order(relaxed) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read6(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{`hint` clause can appear at most once in the expansion of the oilist directive}}
omp.atomic.read %v = %x hint(speculative) hint(contended) : memref<i32>
omp.atomic.read %v = %x hint(speculative) hint(contended) : memref<i32>, i32
return
}

// -----

func.func @omp_atomic_read6(%x: memref<i32>, %v: memref<i32>) {
// expected-error @below {{read and write must not be to the same location for atomic reads}}
omp.atomic.read %x = %x hint(speculative) : memref<i32>
omp.atomic.read %x = %x hint(speculative) : memref<i32>, i32
return
}

Expand Down Expand Up @@ -876,7 +876,7 @@ func.func @omp_atomic_update(%x: memref<i32>, %expr: i32) {
func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
// expected-error @below {{expected three operations in omp.atomic.capture region}}
omp.atomic.capture {
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
omp.terminator
}
return
Expand All @@ -887,8 +887,8 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
omp.atomic.capture {
// expected-error @below {{invalid sequence of operations in the capture region}}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
omp.atomic.read %v = %x : memref<i32>, i32
omp.terminator
}
return
Expand Down Expand Up @@ -964,7 +964,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
omp.atomic.capture {
// expected-error @below {{invalid sequence of operations in the capture region}}
omp.atomic.write %x = %expr : memref<i32>, i32
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
omp.terminator
}
return
Expand All @@ -980,7 +980,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>,
%newval = llvm.add %xval, %expr : i32
omp.yield (%newval : i32)
}
omp.atomic.read %v = %y : memref<i32>
omp.atomic.read %v = %y : memref<i32>, i32
omp.terminator
}
}
Expand All @@ -990,7 +990,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>,
func.func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>, %expr: i32) {
omp.atomic.capture {
// expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
omp.atomic.read %v = %y : memref<i32>
omp.atomic.read %v = %y : memref<i32>, i32
omp.atomic.update %x : memref<i32> {
^bb0(%xval: i32):
%newval = llvm.add %xval, %expr : i32
Expand All @@ -1005,7 +1005,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>,
func.func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>, %expr: i32) {
omp.atomic.capture {
// expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
omp.atomic.write %y = %expr : memref<i32>, i32
omp.terminator
}
Expand All @@ -1021,7 +1021,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
}
return
}
Expand All @@ -1036,7 +1036,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
}
return
}
Expand All @@ -1051,7 +1051,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
}
return
}
Expand All @@ -1066,7 +1066,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
}
return
}
Expand All @@ -1081,7 +1081,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x : memref<i32>
omp.atomic.read %v = %x : memref<i32>, i32
}
return
}
Expand All @@ -1096,7 +1096,7 @@ func.func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
%newval = llvm.add %xval, %expr : i32
omp.yield(%newval : i32)
}
omp.atomic.read %v = %x memory_order(seq_cst) : memref<i32>
omp.atomic.read %v = %x memory_order(seq_cst) : memref<i32>, i32
}
return
}
Expand Down
161 changes: 102 additions & 59 deletions mlir/test/Dialect/OpenMP/ops.mlir

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %expr: i32
llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
// expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
omp.atomic.capture memory_order(seq_cst) {
omp.atomic.read %v = %x : !llvm.ptr<i32>
omp.atomic.read %v = %x : !llvm.ptr<i32>, i32
// expected-error @+1 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}}
omp.atomic.update %x : !llvm.ptr<i32> {
^bb0(%xval: i32):
Expand All @@ -55,7 +55,7 @@ llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.
llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
// expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
omp.atomic.capture memory_order(seq_cst) {
omp.atomic.read %v = %x : !llvm.ptr<i32>
omp.atomic.read %v = %x : !llvm.ptr<i32>, i32
// expected-error @+1 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}}
omp.atomic.update %x : !llvm.ptr<i32> {
^bb0(%xval: i32):
Expand Down
112 changes: 67 additions & 45 deletions mlir/test/Target/LLVMIR/openmp-llvm.mlir

Large diffs are not rendered by default.

75 changes: 75 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-reduction-typed-pointers.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s

// Only check the overall shape of the code and the presence of relevant
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.

omp.reduction.declare @add_f32 : f32
init {
^bb0(%arg: f32):
%0 = llvm.mlir.constant(0.0 : f32) : f32
omp.yield (%0 : f32)
}
combiner {
^bb1(%arg0: f32, %arg1: f32):
%1 = llvm.fadd %arg0, %arg1 : f32
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
omp.yield
}

// CHECK-LABEL: @simple_reduction
llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : f32, !llvm.ptr<f32>
omp.yield
}
omp.terminator
}
llvm.return
}

// Call to the outlined function.
// CHECK: call void {{.*}} @__kmpc_fork_call
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

// Outlined function.
// CHECK: define internal void @[[OUTLINED]]

// Private reduction variable and its initialization.
// CHECK: %[[PRIVATE:.+]] = alloca float
// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]

// Call to the reduction function.
// CHECK: call i32 @__kmpc_reduce
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

// Atomic reduction.
// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]

// Non-atomic reduction:
// CHECK: fadd float
// CHECK: call void @__kmpc_end_reduce
// CHECK: br label %[[FINALIZE:.+]]

// CHECK: [[FINALIZE]]:
// CHECK: call void @__kmpc_barrier

// Update of the private variable using the reduction region
// (the body block currently comes after all the other blocks).
// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]

// Reduction function.
// CHECK: define internal void @[[REDFUNC]]
// CHECK: fadd float
72 changes: 36 additions & 36 deletions mlir/test/Target/LLVMIR/openmp-reduction.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,21 @@ combiner {
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
%2 = llvm.load %arg3 : !llvm.ptr -> f32
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}

// CHECK-LABEL: @simple_reduction
llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %0 : f32, !llvm.ptr
omp.yield
}
omp.terminator
Expand Down Expand Up @@ -88,9 +88,9 @@ combiner {
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
%2 = llvm.load %arg3 : !llvm.ptr -> f32
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}

Expand All @@ -100,14 +100,14 @@ atomic {
// CHECK-LABEL: @reuse_declaration
llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>)
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @add_f32 -> %2 : !llvm.ptr)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %2 : !llvm.ptr<f32>
omp.reduction %1, %0 : f32, !llvm.ptr
omp.reduction %1, %2 : f32, !llvm.ptr
omp.yield
}
omp.terminator
Expand Down Expand Up @@ -176,23 +176,23 @@ combiner {
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
%2 = llvm.load %arg3 : !llvm.ptr -> f32
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}

// It's okay not to reference the reduction variable in the body.
// CHECK-LABEL: @missing_omp_reduction
llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>)
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @add_f32 -> %2 : !llvm.ptr)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %0 : f32, !llvm.ptr
omp.yield
}
omp.terminator
Expand Down Expand Up @@ -259,9 +259,9 @@ combiner {
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
%2 = llvm.load %arg3 : !llvm.ptr -> f32
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}

Expand All @@ -270,13 +270,13 @@ atomic {
// CHECK-LABEL: @double_reference
llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %0 : f32, !llvm.ptr
omp.reduction %1, %0 : f32, !llvm.ptr
omp.yield
}
omp.terminator
Expand Down Expand Up @@ -338,9 +338,9 @@ combiner {
omp.yield (%1 : f32)
}
atomic {
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
%2 = llvm.load %arg3 : !llvm.ptr<f32>
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr<f32>, f32
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
%2 = llvm.load %arg3 : !llvm.ptr -> f32
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}

Expand All @@ -359,14 +359,14 @@ combiner {
// CHECK-LABEL: @no_atomic
llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
%c1 = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
omp.parallel {
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>)
omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @mul_f32 -> %2 : !llvm.ptr)
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
%1 = llvm.mlir.constant(2.0 : f32) : f32
omp.reduction %1, %0 : !llvm.ptr<f32>
omp.reduction %1, %2 : !llvm.ptr<f32>
omp.reduction %1, %0 : f32, !llvm.ptr
omp.reduction %1, %2 : f32, !llvm.ptr
omp.yield
}
omp.terminator
Expand Down