diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 3336ac935e101..68af52f1b8dc7 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,4 +2,5 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenACC) add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt new file mode 100644 index 0000000000000..a032488569b19 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FIROpenACC) + +add_public_tablegen_target(FIROpenACCPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h new file mode 100644 index 0000000000000..0627cc8ce4a6d --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/Passes.h @@ -0,0 +1,33 @@ +//===- Passes.h - OpenACC pass entry points -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header declares the OpenACC passes specific to Fortran and FIR. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES_H +#define FORTRAN_OPTIMIZER_OPENACC_PASSES_H + +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" + +#include + +namespace fir { +namespace acc { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenACC/Passes.h.inc" + +std::unique_ptr createACCRecipeBufferizationPass(); + +} // namespace acc +} // namespace fir + +#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td new file mode 100644 index 0000000000000..3c127b30aa9b8 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/Passes.td @@ -0,0 +1,36 @@ +//===-- Passes.td - flang OpenACC pass definitions -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES +#define FORTRAN_OPTIMIZER_OPENACC_PASSES + +include "mlir/Pass/PassBase.td" + +def ACCRecipeBufferization + : Pass<"fir-acc-recipe-bufferization", "mlir::ModuleOp"> { + let summary = "Rewrite acc.*.recipe box values to ref and update uses"; + let description = [{ + Bufferizes OpenACC recipes that operate on fir.box so their type and + region block arguments become fir.ref> instead. This applies to + acc.private.recipe, acc.firstprivate.recipe (including copy region), and + acc.reduction.recipe (including combiner region). + + For affected regions, the pass inserts required loads at the beginning of + the region to preserve original uses after argument type changes. For yields + of box values, the pass allocates a local fir.ref> and stores the + yielded fir.box into it so the region yields a reference to a box. + + For acc.private, acc.firstprivate, and acc.reduction operations that use a + bufferized recipe, the pass allocates a host-side fir.ref> before + the data op and rewires the data op to use the new memory. Other users of + the original data operation result (outside the paired compute op) are + updated to load through the reference. + }]; +} + +#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt index fc23e64eeb7a4..790b9fdb1589a 100644 --- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Support) +add_subdirectory(Transforms) diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp new file mode 100644 index 0000000000000..4840a999ecd27 --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp @@ -0,0 +1,191 @@ +//===- ACCRecipeBufferization.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bufferize OpenACC recipes that yield fir.box to operate on +// fir.ref> and update uses accordingly. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/OpenACC/Passes.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Value.h" +#include "mlir/IR/Visitors.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace fir::acc { +#define GEN_PASS_DEF_ACCRECIPEBUFFERIZATION +#include "flang/Optimizer/OpenACC/Passes.h.inc" +} // namespace fir::acc + +namespace { + +class BufferizeInterface { +public: + static std::optional mustBufferize(mlir::Type recipeType) { + if (auto boxTy = llvm::dyn_cast(recipeType)) + return fir::ReferenceType::get(boxTy); + return std::nullopt; + } + + static mlir::Operation *load(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + return builder.create(loc, value); + } + + static mlir::Value placeInMemory(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + auto alloca = builder.create(loc, value.getType()); + builder.create(loc, value, alloca); + return alloca; + } +}; + +static void bufferizeRegionArgsAndYields(mlir::Region ®ion, + mlir::Location loc, mlir::Type oldType, + mlir::Type newType) { + if (region.empty()) + return; + + mlir::OpBuilder builder(®ion); + for (mlir::BlockArgument arg : region.getArguments()) { + if (arg.getType() == oldType) { + arg.setType(newType); + if (!arg.use_empty()) { + mlir::Operation *loadOp = BufferizeInterface::load(builder, loc, arg); + arg.replaceAllUsesExcept(loadOp->getResult(0), loadOp); + } + } + } + if (auto yield = + llvm::dyn_cast(region.back().getTerminator())) { + llvm::SmallVector newOperands; + newOperands.reserve(yield.getNumOperands()); + bool changed = false; + for (mlir::Value oldYieldArg : yield.getOperands()) { + if (oldYieldArg.getType() == oldType) { + builder.setInsertionPoint(yield); + mlir::Value alloca = + BufferizeInterface::placeInMemory(builder, loc, oldYieldArg); + newOperands.push_back(alloca); + changed = true; + } else { + newOperands.push_back(oldYieldArg); + } + } + if (changed) + yield->setOperands(newOperands); + } +} + +static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands, + llvm::StringRef recipeSymName, + mlir::Operation *computeOp) { + if (!recipes) + return; + for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) { + if (llvm::cast(recipeSym).getLeafReference() != + recipeSymName) + continue; + + mlir::Operation *dataOp = oldRes.getDefiningOp(); + assert(dataOp && "dataOp must be paired with computeOp"); + mlir::Location loc = dataOp->getLoc(); + mlir::OpBuilder builder(dataOp); + llvm::TypeSwitch(dataOp) + .Case([&](auto privateOp) { + builder.setInsertionPointAfterValue(privateOp.getVar()); + mlir::Value alloca = BufferizeInterface::placeInMemory( + builder, loc, privateOp.getVar()); + privateOp.getVarMutable().assign(alloca); + privateOp.getAccVar().setType(alloca.getType()); + }); + + llvm::SmallVector users(oldRes.getUsers().begin(), + oldRes.getUsers().end()); + for (mlir::Operation *useOp : users) { + if (useOp == computeOp) + continue; + builder.setInsertionPoint(useOp); + mlir::Operation *load = BufferizeInterface::load(builder, loc, oldRes); + useOp->replaceUsesOfWith(oldRes, load->getResult(0)); + } + } +} + +class ACCRecipeBufferization + : public fir::acc::impl::ACCRecipeBufferizationBase< + ACCRecipeBufferization> { +public: + void runOnOperation() override { + mlir::ModuleOp module = getOperation(); + + llvm::SmallVector recipeNames; + module.walk([&](mlir::Operation *recipe) { + llvm::TypeSwitch(recipe) + .Case([&](auto recipe) { + mlir::Type oldType = recipe.getType(); + auto bufferizedType = + BufferizeInterface::mustBufferize(recipe.getType()); + if (!bufferizedType) + return; + recipe.setTypeAttr(mlir::TypeAttr::get(*bufferizedType)); + mlir::Location loc = recipe.getLoc(); + using RecipeOp = decltype(recipe); + bufferizeRegionArgsAndYields(recipe.getInitRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v) + bufferizeRegionArgsAndYields(recipe.getCopyRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v) + bufferizeRegionArgsAndYields(recipe.getCombinerRegion(), loc, + oldType, *bufferizedType); + bufferizeRegionArgsAndYields(recipe.getDestroyRegion(), loc, + oldType, *bufferizedType); + recipeNames.push_back(recipe.getSymName()); + }); + }); + if (recipeNames.empty()) + return; + + module.walk([&](mlir::Operation *op) { + llvm::TypeSwitch(op) + .Case( + [&](auto computeOp) { + for (llvm::StringRef recipeName : recipeNames) { + if (computeOp.getPrivatizationRecipes()) + updateRecipeUse(computeOp.getPrivatizationRecipesAttr(), + computeOp.getPrivateOperands(), recipeName, + op); + if (computeOp.getFirstprivatizationRecipes()) + updateRecipeUse( + computeOp.getFirstprivatizationRecipesAttr(), + computeOp.getFirstprivateOperands(), recipeName, op); + if (computeOp.getReductionRecipes()) + updateRecipeUse(computeOp.getReductionRecipesAttr(), + computeOp.getReductionOperands(), + recipeName, op); + } + }); + }); + } +}; + +} // namespace + +std::unique_ptr fir::acc::createACCRecipeBufferizationPass() { + return std::make_unique(); +} diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..2427da03e1a3c --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt @@ -0,0 +1,12 @@ +add_flang_library(FIROpenACCTransforms + ACCRecipeBufferization.cpp + + DEPENDS + FIROpenACCPassesIncGen + + LINK_LIBS + MLIRIR + MLIRPass + FIRDialect + MLIROpenACCDialect +) diff --git a/flang/test/Fir/OpenACC/recipe-bufferization.mlir b/flang/test/Fir/OpenACC/recipe-bufferization.mlir new file mode 100644 index 0000000000000..c4f96f63d5076 --- /dev/null +++ b/flang/test/Fir/OpenACC/recipe-bufferization.mlir @@ -0,0 +1,316 @@ +// RUN: fir-opt %s --fir-acc-recipe-bufferization -split-input-file | FileCheck %s + +// ----- + +acc.private.recipe @priv_ref_box : !fir.box init { +^bb0(%arg0: !fir.box): + %1 = fir.allocmem i32 + %2 = fir.embox %1 : (!fir.heap) -> !fir.box + acc.yield %2 : !fir.box +} destroy { +^bb0(%arg0: !fir.box, %arg1: !fir.box): + %0 = fir.box_addr %arg1 : (!fir.box) -> !fir.ref + %1 = fir.convert %0 : (!fir.ref) -> !fir.heap + fir.freemem %1 : !fir.heap + acc.yield +} + +// CHECK-LABEL: acc.private.recipe @priv_ref_box : !fir.ref> init +// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref>) +// CHECK: %[[EMBOX:.*]] = fir.embox +// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCA]] : !fir.ref> +// CHECK: } destroy { +// CHECK: ^bb0(%[[DARG0:.*]]: !fir.ref>, %[[DARG1:.*]]: !fir.ref>) +// CHECK: %[[LD1:.*]] = fir.load %[[DARG1]] : !fir.ref> +// CHECK: %[[ADDR:.*]] = fir.box_addr %[[LD1]] : (!fir.box) -> !fir.ref +// CHECK: %[[CVT:.*]] = fir.convert %[[ADDR]] : (!fir.ref) -> !fir.heap + +// ----- + +// Test private recipe without destroy region. + +acc.private.recipe @priv_ref_box_no_destroy : !fir.box init { +^bb0(%arg0: !fir.box): + %1 = fir.alloca i32 + %2 = fir.embox %1 : (!fir.ref) -> !fir.box + acc.yield %2 : !fir.box +} + +// CHECK-LABEL: acc.private.recipe @priv_ref_box_no_destroy : !fir.ref> init +// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref>) +// CHECK: %[[EMBOX:.*]] = fir.embox +// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCA]] : !fir.ref> +// CHECK: } + +// ----- + +// Firstprivate recipe with destroy region. +acc.firstprivate.recipe @fp_ref_box : !fir.box init { +^bb0(%arg0: !fir.box): + %0 = fir.allocmem i32 + %1 = fir.embox %0 : (!fir.heap) -> !fir.box + acc.yield %1 : !fir.box +} copy { +^bb0(%src: !fir.box, %dst: !fir.box): + %s_addr = fir.box_addr %src : (!fir.box) -> !fir.ref + %val = fir.load %s_addr : !fir.ref + %d_addr = fir.box_addr %dst : (!fir.box) -> !fir.ref + fir.store %val to %d_addr : !fir.ref + acc.yield +} destroy { +^bb0(%arg0: !fir.box, %arg1: !fir.box): + acc.yield +} + +// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box : !fir.ref> init +// CHECK: ^bb0(%[[IARG:.*]]: !fir.ref>) +// CHECK: %[[EMBOX_FP:.*]] = fir.embox +// CHECK: %[[ALLOCA_FP:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOX_FP]] to %[[ALLOCA_FP]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCA_FP]] : !fir.ref> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref>, %[[DST:.*]]: !fir.ref>) +// CHECK: %[[LSRC:.*]] = fir.load %[[SRC]] : !fir.ref> +// CHECK: %[[LDST:.*]] = fir.load %[[DST]] : !fir.ref> +// CHECK: %[[SADDR:.*]] = fir.box_addr %[[LSRC]] : (!fir.box) -> !fir.ref +// CHECK: %[[VAL:.*]] = fir.load %[[SADDR]] : !fir.ref +// CHECK: %[[DADDR:.*]] = fir.box_addr %[[LDST]] : (!fir.box) -> !fir.ref +// CHECK: fir.store %[[VAL]] to %[[DADDR]] : !fir.ref +// CHECK: } destroy { +// CHECK: ^bb0(%[[FDARG0:.*]]: !fir.ref>, %[[FDARG1:.*]]: !fir.ref>) + +// ----- + +// Firstprivate recipe without destroy region. +acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.box init { +^bb0(%arg0: !fir.box): + %0 = fir.alloca i32 + %1 = fir.embox %0 : (!fir.ref) -> !fir.box + acc.yield %1 : !fir.box +} copy { +^bb0(%src: !fir.box, %dst: !fir.box): + %s_addr = fir.box_addr %src : (!fir.box) -> !fir.ref + %val = fir.load %s_addr : !fir.ref + %d_addr = fir.box_addr %dst : (!fir.box) -> !fir.ref + fir.store %val to %d_addr : !fir.ref + acc.yield +} + +// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.ref> init +// CHECK: ^bb0(%[[IARG2:.*]]: !fir.ref>) +// CHECK: %[[EMBOX_FP2:.*]] = fir.embox +// CHECK: %[[ALLOCA_FP2:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOX_FP2]] to %[[ALLOCA_FP2]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCA_FP2]] : !fir.ref> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC2:.*]]: !fir.ref>, %[[DST2:.*]]: !fir.ref>) +// CHECK: %[[LSRC2:.*]] = fir.load %[[SRC2]] : !fir.ref> +// CHECK: %[[LDST2:.*]] = fir.load %[[DST2]] : !fir.ref> +// CHECK: %[[SADDR2:.*]] = fir.box_addr %[[LSRC2]] : (!fir.box) -> !fir.ref +// CHECK: %[[VAL2:.*]] = fir.load %[[SADDR2]] : !fir.ref +// CHECK: %[[DADDR2:.*]] = fir.box_addr %[[LDST2]] : (!fir.box) -> !fir.ref +// CHECK: fir.store %[[VAL2]] to %[[DADDR2]] : !fir.ref + +// ----- + +// Reduction recipe with destroy region. +acc.reduction.recipe @red_ref_box : !fir.box reduction_operator init { +^bb0(%arg0: !fir.box): + %0 = fir.allocmem i32 + %1 = fir.embox %0 : (!fir.heap) -> !fir.box + acc.yield %1 : !fir.box +} combiner { +^bb0(%lhs: !fir.box, %rhs: !fir.box): + %l_addr = fir.box_addr %lhs : (!fir.box) -> !fir.ref + %l_val = fir.load %l_addr : !fir.ref + %r_addr = fir.box_addr %rhs : (!fir.box) -> !fir.ref + %r_val = fir.load %r_addr : !fir.ref + %sum = arith.addi %l_val, %r_val : i32 + %tmp = fir.alloca i32 + fir.store %sum to %tmp : !fir.ref + %new = fir.embox %tmp : (!fir.ref) -> !fir.box + acc.yield %new : !fir.box +} destroy { +^bb0(%arg0: !fir.box, %arg1: !fir.box): + acc.yield +} + +// CHECK-LABEL: acc.reduction.recipe @red_ref_box : !fir.ref> reduction_operator init +// CHECK: ^bb0(%[[IARGR:.*]]: !fir.ref>) +// CHECK: %[[EMBOXR:.*]] = fir.embox +// CHECK: %[[ALLOCAR:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOXR]] to %[[ALLOCAR]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCAR]] : !fir.ref> +// CHECK: } combiner { +// CHECK: ^bb0(%[[LHS:.*]]: !fir.ref>, %[[RHS:.*]]: !fir.ref>) +// CHECK: %[[LLHS:.*]] = fir.load %[[LHS]] : !fir.ref> +// CHECK: %[[LRHS:.*]] = fir.load %[[RHS]] : !fir.ref> +// CHECK: %[[LADDR:.*]] = fir.box_addr %[[LLHS]] : (!fir.box) -> !fir.ref +// CHECK: %[[LVAL:.*]] = fir.load %[[LADDR]] : !fir.ref +// CHECK: %[[RADDR:.*]] = fir.box_addr %[[LRHS]] : (!fir.box) -> !fir.ref +// CHECK: %[[RVAL:.*]] = fir.load %[[RADDR]] : !fir.ref +// CHECK: %[[SUM:.*]] = arith.addi %[[LVAL]], %[[RVAL]] : i32 +// CHECK: %[[I32ALLOCA:.*]] = fir.alloca i32 +// CHECK: fir.store %[[SUM]] to %[[I32ALLOCA]] : !fir.ref +// CHECK: %[[NEWBOX:.*]] = fir.embox %[[I32ALLOCA]] : (!fir.ref) -> !fir.box +// CHECK: %[[BOXALLOCA:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[NEWBOX]] to %[[BOXALLOCA]] : !fir.ref> +// CHECK: acc.yield %[[BOXALLOCA]] : !fir.ref> +// CHECK: } destroy { +// CHECK: ^bb0(%[[RD0:.*]]: !fir.ref>, %[[RD1:.*]]: !fir.ref>) + +// ----- + +// Reduction recipe without destroy region. +acc.reduction.recipe @red_ref_box_no_destroy : !fir.box reduction_operator init { +^bb0(%arg0: !fir.box): + %0 = fir.alloca i32 + %1 = fir.embox %0 : (!fir.ref) -> !fir.box + acc.yield %1 : !fir.box +} combiner { +^bb0(%lhs: !fir.box, %rhs: !fir.box): + %l_addr = fir.box_addr %lhs : (!fir.box) -> !fir.ref + %l_val = fir.load %l_addr : !fir.ref + %r_addr = fir.box_addr %rhs : (!fir.box) -> !fir.ref + %r_val = fir.load %r_addr : !fir.ref + %sum = arith.addi %l_val, %r_val : i32 + %tmp = fir.alloca i32 + fir.store %sum to %tmp : !fir.ref + %new = fir.embox %tmp : (!fir.ref) -> !fir.box + acc.yield %new : !fir.box +} + +// CHECK-LABEL: acc.reduction.recipe @red_ref_box_no_destroy : !fir.ref> reduction_operator init +// CHECK: ^bb0(%[[IARGR2:.*]]: !fir.ref>) +// CHECK: %[[EMBOXR2:.*]] = fir.embox +// CHECK: %[[ALLOCAR2:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[EMBOXR2]] to %[[ALLOCAR2]] : !fir.ref> +// CHECK: acc.yield %[[ALLOCAR2]] : !fir.ref> +// CHECK: } combiner { +// CHECK: ^bb0(%[[LHS2:.*]]: !fir.ref>, %[[RHS2:.*]]: !fir.ref>) +// CHECK: %[[LLHS2:.*]] = fir.load %[[LHS2]] : !fir.ref> +// CHECK: %[[LRHS2:.*]] = fir.load %[[RHS2]] : !fir.ref> +// CHECK: %[[LADDR2:.*]] = fir.box_addr %[[LLHS2]] : (!fir.box) -> !fir.ref +// CHECK: %[[LVAL2:.*]] = fir.load %[[LADDR2]] : !fir.ref +// CHECK: %[[RADDR2:.*]] = fir.box_addr %[[LRHS2]] : (!fir.box) -> !fir.ref +// CHECK: %[[RVAL2:.*]] = fir.load %[[RADDR2]] : !fir.ref +// CHECK: %[[SUM2:.*]] = arith.addi %[[LVAL2]], %[[RVAL2]] : i32 +// CHECK: %[[I32ALLOCA2:.*]] = fir.alloca i32 +// CHECK: fir.store %[[SUM2]] to %[[I32ALLOCA2]] : !fir.ref +// CHECK: %[[NEWBOX2:.*]] = fir.embox %[[I32ALLOCA2]] : (!fir.ref) -> !fir.box +// CHECK: %[[BOXALLOCA2:.*]] = fir.alloca !fir.box +// CHECK: fir.store %[[NEWBOX2]] to %[[BOXALLOCA2]] : !fir.ref> +// CHECK: acc.yield %[[BOXALLOCA2]] : !fir.ref> + +// ----- + +// Comprehensive tests that also test recipe usages updates. + +acc.private.recipe @privatization_ref_i32 : !fir.ref init { +^bb0(%arg0: !fir.ref): + %0 = fir.alloca i32 + %1 = fir.declare %0 {uniq_name = "acc.private.init"} : (!fir.ref) -> !fir.ref + acc.yield %1 : !fir.ref +} +acc.private.recipe @privatization_box_Uxf32 : !fir.box> init { +^bb0(%arg0: !fir.box>): + %c0 = arith.constant 0 : index + %0:3 = fir.box_dims %arg0, %c0 : (!fir.box>, index) -> (index, index, index) + %1 = fir.shape %0#1 : (index) -> !fir.shape<1> + %2 = fir.allocmem !fir.array, %0#1 {bindc_name = ".tmp", uniq_name = ""} + %3 = fir.declare %2(%1) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> + %4 = fir.embox %3(%1) : (!fir.heap>, !fir.shape<1>) -> !fir.box> + acc.yield %4 : !fir.box> +} destroy { +^bb0(%arg0: !fir.box>, %arg1: !fir.box>): + %0 = fir.box_addr %arg1 : (!fir.box>) -> !fir.ref> + %1 = fir.convert %0 : (!fir.ref>) -> !fir.heap> + fir.freemem %1 : !fir.heap> + acc.terminator +} +func.func @_QPfoo(%arg0: !fir.box> {fir.bindc_name = "x"}) { + %c200_i32 = arith.constant 200 : i32 + %c1_i32 = arith.constant 1 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"} + %2 = fir.declare %1 {uniq_name = "_QFfooEi"} : (!fir.ref) -> !fir.ref + %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFfooEx"} : (!fir.box>, !fir.dscope) -> !fir.box> + acc.parallel combined(loop) { + %4 = acc.private var(%3 : !fir.box>) -> !fir.box> {name = "x"} + %5 = acc.private varPtr(%2 : !fir.ref) -> !fir.ref {implicit = true, name = "i"} + acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %4 : !fir.box>, @privatization_ref_i32 -> %5 : !fir.ref) control(%arg1 : i32) = (%c1_i32 : i32) to (%c200_i32 : i32) step (%c1_i32 : i32) { + %6 = fir.dummy_scope : !fir.dscope + %7 = fir.declare %4 dummy_scope %6 {uniq_name = "_QFfooEx"} : (!fir.box>, !fir.dscope) -> !fir.box> + %8 = fir.declare %5 {uniq_name = "_QFfooEi"} : (!fir.ref) -> !fir.ref + %9 = fir.convert %arg1 : (i32) -> f32 + %10 = fir.convert %arg1 : (i32) -> i64 + %11 = fir.array_coor %7 %10 : (!fir.box>, i64) -> !fir.ref + fir.store %9 to %11 : !fir.ref + acc.yield + } attributes {inclusiveUpperbound = array, independent = [#acc.device_type]} + acc.yield + } + return +} + +// CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref): +// CHECK: %[[VAL_1:.*]] = fir.alloca i32 +// CHECK: %[[VAL_2:.*]] = fir.declare %[[VAL_1]] {uniq_name = "acc.private.init"} : (!fir.ref) -> !fir.ref +// CHECK: acc.yield %[[VAL_2]] : !fir.ref +// CHECK: } + +// CHECK-LABEL: acc.private.recipe @privatization_box_Uxf32 : !fir.ref>> init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +// CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array, %[[VAL_3]]#1 {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_6:.*]] = fir.declare %[[VAL_5]](%[[VAL_4]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> +// CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]](%[[VAL_4]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> +// CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref>> +// CHECK: acc.yield %[[VAL_8]] : !fir.ref>> + +// CHECK-LABEL: } destroy { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): +// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref>> +// CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>) -> !fir.heap> +// CHECK: fir.freemem %[[VAL_4]] : !fir.heap> +// CHECK: acc.terminator +// CHECK: } + +// CHECK-LABEL: func.func @_QPfoo( +// CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "x"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 200 : i32 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"} +// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFfooEi"} : (!fir.ref) -> !fir.ref +// CHECK: %[[VAL_5:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFfooEx"} : (!fir.box>, !fir.dscope) -> !fir.box> +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box> +// CHECK: fir.store %[[VAL_5]] to %[[VAL_6]] : !fir.ref>> +// CHECK: acc.parallel combined(loop) { +// CHECK: %[[VAL_7:.*]] = acc.private varPtr(%[[VAL_6]] : !fir.ref>>) -> !fir.ref>> {name = "x"} +// CHECK: %[[VAL_8:.*]] = acc.private varPtr(%[[VAL_4]] : !fir.ref) -> !fir.ref {implicit = true, name = "i"} +// CHECK: acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %[[VAL_7]] : !fir.ref>>, @privatization_ref_i32 -> %[[VAL_8]] : !fir.ref) control(%[[VAL_9:.*]] : i32) = (%[[VAL_1]] : i32) to (%[[VAL_0]] : i32) step (%[[VAL_1]] : i32) { +// CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref>> +// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_11]] dummy_scope %[[VAL_10]] {uniq_name = "_QFfooEx"} : (!fir.box>, !fir.dscope) -> !fir.box> +// CHECK: %[[VAL_13:.*]] = fir.declare %[[VAL_8]] {uniq_name = "_QFfooEi"} : (!fir.ref) -> !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32 +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64 +// CHECK: %[[VAL_16:.*]] = fir.array_coor %[[VAL_12]] %[[VAL_15]] : (!fir.box>, i64) -> !fir.ref +// CHECK: fir.store %[[VAL_14]] to %[[VAL_16]] : !fir.ref +// CHECK: acc.yield +// CHECK: } attributes {inclusiveUpperbound = array, independent = [#acc.device_type]} +// CHECK: acc.yield +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index 4ee9752727b87..c5bd4390a4b78 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -22,6 +22,7 @@ target_link_libraries(fir-opt PRIVATE HLFIRDialect HLFIRTransforms FIROpenACCSupport + FIROpenACCTransforms FIROpenMPSupport FlangOpenMPTransforms FIRAnalysis diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index d66fc3f08bdf8..b0b277b88dbe2 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -14,6 +14,7 @@ #include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenACC/Passes.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Transforms/Passes.h" @@ -37,6 +38,7 @@ int main(int argc, char **argv) { fir::registerOptTransformPasses(); hlfir::registerHLFIRPasses(); flangomp::registerFlangOpenMPPasses(); + fir::acc::registerFIROpenACCPasses(); #ifdef FLANG_INCLUDE_TESTS fir::test::registerTestFIRAliasAnalysisPass(); fir::test::registerTestFIROpenACCInterfacesPass();