diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index 462a10d45fbf0..39dfa19002da8 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -326,16 +326,30 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create( static std::pair getBinaryOpArgs(const Expr *Op) { if (const auto *BO = dyn_cast(Op)) { - assert(BO->getOpcode() == BO_Assign); + assert(BO->isAssignmentOp()); return {BO->getLHS(), BO->getRHS()}; } const auto *OO = cast(Op); - assert(OO->getOperator() == OO_Equal); - + assert(OO->isAssignmentOp()); return {OO->getArg(0), OO->getArg(1)}; } +static std::pair getUnaryOpArgs(const Expr *Op) { + if (const auto *UO = dyn_cast(Op)) + return {true, UO->getSubExpr()}; + + if (const auto *OpCall = dyn_cast(Op)) { + // Post-inc/dec have a second unused argument to differentiate it, so we + // accept -- or ++ as unary, or any operator call with only 1 arg. + if (OpCall->getNumArgs() == 1 || OpCall->getOperator() != OO_PlusPlus || + OpCall->getOperator() != OO_MinusMinus) + return {true, OpCall->getArg(0)}; + } + + return {false, nullptr}; +} + const OpenACCAtomicConstruct::StmtInfo OpenACCAtomicConstruct::getAssociatedStmtInfo() const { // This ends up being a vastly simplified version of SemaOpenACCAtomic, since @@ -343,18 +357,17 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const { // asserts to ensure we don't get off into the weeds. assert(getAssociatedStmt() && "invalid associated stmt?"); + const Expr *AssocStmt = cast(getAssociatedStmt()); switch (AtomicKind) { - case OpenACCAtomicKind::None: - case OpenACCAtomicKind::Update: case OpenACCAtomicKind::Capture: - assert(false && "Only 'read'/'write' have been implemented here"); + assert(false && "Only 'read'/'write'/'update' have been implemented here"); return {}; case OpenACCAtomicKind::Read: { // Read only supports the format 'v = x'; where both sides are a scalar // expression. This can come in 2 forms; BinaryOperator or // CXXOperatorCallExpr (rarely). std::pair BinaryArgs = - getBinaryOpArgs(cast(getAssociatedStmt())); + getBinaryOpArgs(AssocStmt); // We want the L-value for each side, so we ignore implicit casts. return {BinaryArgs.first->IgnoreImpCasts(), BinaryArgs.second->IgnoreImpCasts(), /*expr=*/nullptr}; @@ -364,13 +377,28 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const { // type, and 'x' is a scalar l value. As above, this can come in 2 forms; // Binary Operator or CXXOperatorCallExpr. std::pair BinaryArgs = - getBinaryOpArgs(cast(getAssociatedStmt())); + getBinaryOpArgs(AssocStmt); // We want the L-value for ONLY the X side, so we ignore implicit casts. For // the right side (the expr), we emit it as an r-value so we need to // maintain implicit casts. return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(), BinaryArgs.second}; } + case OpenACCAtomicKind::None: + case OpenACCAtomicKind::Update: { + std::pair UnaryArgs = getUnaryOpArgs(AssocStmt); + if (UnaryArgs.first) + return {/*v=*/nullptr, UnaryArgs.second->IgnoreImpCasts(), + /*expr=*/nullptr}; + + std::pair BinaryArgs = + getBinaryOpArgs(AssocStmt); + // For binary args, we just store the RHS as an expression (in the + // expression slot), since the codegen just wants the whole thing for a + // recipe. + return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(), + BinaryArgs.second}; + } } llvm_unreachable("unknown OpenACC atomic kind"); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 5f9dbdc64b9e5..a8ffab79e8398 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -665,6 +665,12 @@ class CIRGenFunction : public CIRGenTypeCache { symbolTable.insert(vd, addr.getPointer()); } + // Replaces the address of the local variable, if it exists. Else does the + // same thing as setAddrOfLocalVar. + void replaceAddrOfLocalVar(const clang::VarDecl *vd, Address addr) { + localDeclMap.insert_or_assign(vd, addr); + } + // A class to allow reverting changes to a var-decl's registration to the // localDeclMap. This is used in cases where things are being inserted into // the variable list but don't follow normal lookup/search rules, like in diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index 349b111c0d8fd..9e55bd5b7ae71 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -304,12 +304,21 @@ CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) { return mlir::success(); } +const VarDecl *getLValueDecl(const Expr *e) { + // We are going to assume that after stripping implicit casts, that the LValue + // is just a DRE around the var-decl. + + e = e->IgnoreImpCasts(); + + const auto *dre = cast(e); + return cast(dre->getDecl()); +} + mlir::LogicalResult CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { - // For now, we are only support 'read'/'write', so diagnose. We can switch on - // the kind later once we start implementing the other 2 forms. While we - if (s.getAtomicKind() != OpenACCAtomicKind::Read && - s.getAtomicKind() != OpenACCAtomicKind::Write) { + // For now, we are only support 'read'/'write'/'update', so diagnose. We can + // switch on the kind later once we implement the 'capture' form. + if (s.getAtomicKind() == OpenACCAtomicKind::Capture) { cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct"); return mlir::failure(); } @@ -318,11 +327,10 @@ CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { // expression it is associated with rather than emitting it inside of it. So // it has custom emit logic. mlir::Location start = getLoc(s.getSourceRange().getBegin()); + mlir::Location end = getLoc(s.getSourceRange().getEnd()); OpenACCAtomicConstruct::StmtInfo inf = s.getAssociatedStmtInfo(); switch (s.getAtomicKind()) { - case OpenACCAtomicKind::None: - case OpenACCAtomicKind::Update: case OpenACCAtomicKind::Capture: llvm_unreachable("Unimplemented atomic construct type, should have " "diagnosed/returned above"); @@ -353,6 +361,50 @@ CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { s.clauses()); return mlir::success(); } + case OpenACCAtomicKind::None: + case OpenACCAtomicKind::Update: { + mlir::Value x = emitLValue(inf.X).getPointer(); + auto op = + mlir::acc::AtomicUpdateOp::create(builder, start, x, /*ifCond=*/{}); + emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(), + s.clauses()); + mlir::LogicalResult res = mlir::success(); + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + mlir::Type argTy = cast(x.getType()).getPointee(); + std::array recipeType{argTy}; + std::array recipeLoc{start}; + mlir::Block *recipeBlock = builder.createBlock( + &op.getRegion(), op.getRegion().end(), recipeType, recipeLoc); + builder.setInsertionPointToEnd(recipeBlock); + + // Since we have an initial value that we know is a scalar type, we can + // just emit the entire statement here after sneaking-in our 'alloca' in + // the right place, then loading out of it. Flang does a lot less work + // (probably does its own emitting!), but we have more complicated AST + // nodes to worry about, so we can just count on opt to remove the extra + // alloca/load/store set. + auto alloca = cir::AllocaOp::create( + builder, start, x.getType(), argTy, "x_var", + cgm.getSize(getContext().getTypeAlignInChars(inf.X->getType()))); + + alloca.setInitAttr(mlir::UnitAttr::get(&getMLIRContext())); + builder.CIRBaseBuilderTy::createStore(start, recipeBlock->getArgument(0), + alloca); + + const VarDecl *xval = getLValueDecl(inf.X); + CIRGenFunction::DeclMapRevertingRAII declMapRAII{*this, xval}; + replaceAddrOfLocalVar( + xval, Address{alloca, argTy, getContext().getDeclAlign(xval)}); + + res = emitStmt(s.getAssociatedStmt(), /*useCurrentScope=*/true); + + auto load = cir::LoadOp::create(builder, start, {alloca}); + mlir::acc::YieldOp::create(builder, end, {load}); + } + + return res; + } } llvm_unreachable("unknown OpenACC atomic kind"); diff --git a/clang/test/CIR/CodeGenOpenACC/atomic-update.cpp b/clang/test/CIR/CodeGenOpenACC/atomic-update.cpp new file mode 100644 index 0000000000000..7ab6b62c4b41e --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/atomic-update.cpp @@ -0,0 +1,151 @@ +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s + +struct HasOps { + operator float(); + int thing(); +}; + +void use(int x, unsigned int y, float f, HasOps ops) { + // CHECK: cir.func{{.*}}(%[[X_ARG:.*]]: !s32i{{.*}}, %[[Y_ARG:.*]]: !u32i{{.*}}, %[[F_ARG:.*]]: !cir.float{{.*}}){{.*}}, %[[OPS_ARG:.*]]: !rec_HasOps{{.*}}) { + // CHECK-NEXT: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["x", init] + // CHECK-NEXT: %[[Y_ALLOCA:.*]] = cir.alloca !u32i, !cir.ptr, ["y", init] + // CHECK-NEXT: %[[F_ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr, ["f", init] + // CHECK-NEXT: %[[OPS_ALLOCA:.*]] = cir.alloca !rec_HasOps, !cir.ptr, ["ops", init] + // CHECK-NEXT: cir.store %[[X_ARG]], %[[X_ALLOCA]] : !s32i, !cir.ptr + // CHECK-NEXT: cir.store %[[Y_ARG]], %[[Y_ALLOCA]] : !u32i, !cir.ptr + // CHECK-NEXT: cir.store %[[F_ARG]], %[[F_ALLOCA]] : !cir.float, !cir.ptr + // CHECK-NEXT: cir.store %[[OPS_ARG]], %[[OPS_ALLOCA]] : !rec_HasOps, !cir.ptr + + // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !s32i{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[TEMP_LOAD]]) nsw : !s32i, !s32i + // CHECK-NEXT: cir.store {{.*}}%[[INC]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !s32i + // CHECK-NEXT: } +#pragma acc atomic update + ++x; + + // CHECK-NEXT: acc.atomic.update %[[Y_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !u32i{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !u32i, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !u32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !u32i + // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[TEMP_LOAD]]) : !u32i, !u32i + // CHECK-NEXT: cir.store {{.*}}%[[INC]], %[[TEMP_ALLOCA]] : !u32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !u32i + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !u32i + // CHECK-NEXT: } +#pragma acc atomic update + y++; + + // CHECK-NEXT: acc.atomic.update %[[F_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !cir.float{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: %[[INC:.*]] = cir.unary(dec, %[[TEMP_LOAD]]) : !cir.float, !cir.float + // CHECK-NEXT: cir.store {{.*}}%[[INC]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !cir.float + // CHECK-NEXT: } +#pragma acc atomic update + f--; + + // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !s32i{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: %[[INT_TO_F:.*]] = cir.cast int_to_float %[[TEMP_LOAD]] : !s32i -> !cir.float + // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[INT_TO_F]], %[[F_LOAD]]) : !cir.float + // CHECK-NEXT: %[[F_TO_INT:.*]] = cir.cast float_to_int %[[ADD]] : !cir.float -> !s32i + // CHECK-NEXT: cir.store{{.*}} %[[F_TO_INT]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !s32i + // CHECK-NEXT: } +#pragma acc atomic update + x += f; + + // CHECK-NEXT: acc.atomic.update %[[F_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !cir.float{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[Y_LOAD:.*]] = cir.load{{.*}} %[[Y_ALLOCA]] : !cir.ptr, !u32i + // CHECK-NEXT: %[[INT_TO_F:.*]] = cir.cast int_to_float %[[Y_LOAD]] : !u32i -> !cir.float + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: %[[DIV:.*]] = cir.binop(div, %[[TEMP_LOAD]], %[[INT_TO_F]]) : !cir.float + // CHECK-NEXT: cir.store{{.*}} %[[DIV]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !cir.float + // CHECK-NEXT: } +#pragma acc atomic update + f /= y; + + // CHECK-NEXT: acc.atomic.update %[[Y_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !u32i{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !u32i, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !u32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !u32i + // CHECK-NEXT: %[[CALL:.*]] = cir.call {{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr) -> !s32i + // CHECK-NEXT: %[[CALL_CAST:.*]] = cir.cast integral %[[CALL]] : !s32i -> !u32i + // CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[TEMP_LOAD]], %[[CALL_CAST]]) : !u32i + // CHECK-NEXT: cir.store{{.*}} %[[MUL]], %[[TEMP_ALLOCA]] : !u32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !u32i + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !u32i + // CHECK-NEXT: } + +#pragma acc atomic update + y = y * ops.thing(); + + // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !s32i{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[CALL:.*]] = cir.call {{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr) -> !s32i + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[CALL]], %[[INT_TO_F]]) : !s32i + // CHECK-NEXT: cir.store{{.*}} %[[OR]], %[[TEMP_ALLOCA]] : !s32i, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !s32i + // CHECK-NEXT: } +#pragma acc atomic update + x = ops.thing() | x; + + // CHECK-NEXT: %[[X_LOAD:.*]] = cir.load{{.*}} %[[X_ALLOCA]] : !cir.ptr, !s32i + // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[X_LOAD]] : !s32i -> !cir.bool + // CHECK-NEXT: %[[X_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 + // CHECK-NEXT: acc.atomic.update if(%[[X_CAST]]) %[[F_ALLOCA]] : !cir.ptr { + // CHECK-NEXT: ^bb0(%[[RECIPE_ARG:.*]]: !cir.float{{.*}}): + // CHECK-NEXT: %[[TEMP_ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr, ["x_var", init] + // CHECK-NEXT: cir.store %[[RECIPE_ARG]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: %[[CALL:.*]] = cir.call {{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr) -> !cir.float + // CHECK-NEXT: %[[SUB:.*]] = cir.binop(sub, %[[TEMP_LOAD]], %[[CALL]]) : !cir.float + // CHECK-NEXT: cir.store{{.*}} %[[SUB]], %[[TEMP_ALLOCA]] : !cir.float, !cir.ptr + // + // CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load{{.*}} %[[TEMP_ALLOCA]] : !cir.ptr, !cir.float + // CHECK-NEXT: acc.yield %[[TEMP_LOAD]] : !cir.float + // CHECK-NEXT: } +#pragma acc atomic update if (x) + f = f - ops; +} diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp index 33e12fe1cd833..b4d76e18bf345 100644 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp +++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp @@ -3,8 +3,8 @@ void HelloWorld(int *A, int *B, int *C, int N) { // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Atomic Construct}} -#pragma acc atomic - N = N + 1; +#pragma acc atomic capture + B = A += ++N; // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}} #pragma acc declare create(A)