diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 92bc7246eca70..c418874141002 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -60,6 +60,7 @@ createExternalNameConversionPass(bool appendUnderscore); std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); std::unique_ptr createMemoryAllocationPass(); +std::unique_ptr createConstantArgumentGlobalisationPass(); std::unique_ptr createStackArraysPass(); std::unique_ptr createAliasTagsPass(); std::unique_ptr createSimplifyIntrinsicsPass(); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index c3768fd2d689c..732b9e53cf030 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -242,6 +242,16 @@ def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> { let constructor = "::fir::createMemoryAllocationPass()"; } +// This needs to be a "mlir::ModuleOp" pass, because it inserts global constants +def ConstantArgumentGlobalisationOpt : Pass<"constant-argument-globalisation-opt", "mlir::ModuleOp"> { + let summary = "Convert constant function arguments to global constants."; + let description = [{ + Convert scalar literals of function arguments to global constants. + }]; + let dependentDialects = [ "fir::FIROpsDialect" ]; + let constructor = "::fir::createConstantArgumentGlobalisationPass()"; +} + def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> { let summary = "Move local array allocations from heap memory into stack memory"; let description = [{ diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index d3e4dc6cd4a24..22fa510b57363 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -84,6 +84,8 @@ DisableOption(BoxedProcedureRewrite, "boxed-procedure-rewrite", DisableOption(ExternalNameConversion, "external-name-interop", "convert names with external convention"); +DisableOption(ConstantArgumentGlobalisation, "constant-argument-globalisation", + "disable the local constants to global constant conversion"); /// Generic for adding a pass to the pass manager if it is not disabled. template @@ -204,6 +206,8 @@ inline void createDefaultFIROptimizerPassPipeline( // These passes may increase code size. pm.addPass(fir::createSimplifyIntrinsicsPass()); pm.addPass(fir::createAlgebraicSimplificationPass(config)); + if (!disableConstantArgumentGlobalisation) + pm.addPass(fir::createConstantArgumentGlobalisationPass()); } if (pc.LoopVersioning) diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 03b67104a93b5..709b2d4e23d5b 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_flang_library(FIRTransforms AffineDemotion.cpp AnnotateConstant.cpp CharacterConversion.cpp + ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp ArrayValueCopy.cpp ExternalNameConversion.cpp diff --git a/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp b/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp new file mode 100644 index 0000000000000..2859a57226f16 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp @@ -0,0 +1,190 @@ +//===- ConstExtruder.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Dominance.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace fir { +#define GEN_PASS_DEF_CONSTANTARGUMENTGLOBALISATIONOPT +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +#define DEBUG_TYPE "flang-const-extruder-opt" + +namespace { +unsigned uniqueLitId = 1; + +class CallOpRewriter : public mlir::OpRewritePattern { +protected: + const mlir::DominanceInfo &di; + +public: + using OpRewritePattern::OpRewritePattern; + + CallOpRewriter(mlir::MLIRContext *ctx, const mlir::DominanceInfo &_di) + : OpRewritePattern(ctx), di(_di) {} + + mlir::LogicalResult + matchAndRewrite(fir::CallOp callOp, + mlir::PatternRewriter &rewriter) const override { + LLVM_DEBUG(llvm::dbgs() << "Processing call op: " << callOp << "\n"); + auto module = callOp->getParentOfType(); + bool needUpdate = false; + fir::FirOpBuilder builder(rewriter, module); + llvm::SmallVector newOperands; + llvm::SmallVector toErase; + for (const mlir::Value &a : callOp.getArgs()) { + auto alloca = mlir::dyn_cast_or_null(a.getDefiningOp()); + // We can convert arguments that are alloca, and that has + // the value by reference attribute. All else is just added + // to the argument list. + if (!alloca || !alloca->hasAttr(fir::getAdaptToByRefAttrName())) { + newOperands.push_back(a); + continue; + } + + mlir::Type varTy = alloca.getInType(); + assert(!fir::hasDynamicSize(varTy) && + "only expect statically sized scalars to be by value"); + + // Find immediate store with const argument + mlir::Operation *store = nullptr; + for (mlir::Operation *s : alloca->getUsers()) { + if (mlir::isa(s) && di.dominates(s, callOp)) { + // We can only deal with ONE store - if already found one, + // set to nullptr and exit the loop. + if (store) { + store = nullptr; + break; + } + store = s; + } + } + + // If we didn't find one signle store, add argument as is, and move on. + if (!store) { + newOperands.push_back(a); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << " found store " << *store << "\n"); + + mlir::Operation *constant_def = store->getOperand(0).getDefiningOp(); + // Expect constant definition operation or force legalisation of the + // callOp and continue with its next argument + if (!mlir::isa(constant_def)) { + // Unable to remove alloca arg + newOperands.push_back(a); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << " found define " << *constant_def << "\n"); + + std::string globalName = "_extruded_." + std::to_string(uniqueLitId++); + assert(!builder.getNamedGlobal(globalName) && + "We should have a unique name here"); + + unsigned count = 0; + for (mlir::Operation *s : alloca->getUsers()) + if (di.dominates(store, s)) + ++count; + + // Delete if dominates itself and one more operation (which should + // be callOp) + if (count == 2) + toErase.push_back(store); + + auto loc = callOp.getLoc(); + fir::GlobalOp global = builder.createGlobalConstant( + loc, varTy, globalName, + [&](fir::FirOpBuilder &builder) { + mlir::Operation *cln = constant_def->clone(); + builder.insert(cln); + mlir::Value val = + builder.createConvert(loc, varTy, cln->getResult(0)); + builder.create(loc, val); + }, + builder.createInternalLinkage()); + mlir::Value addr = {builder.create( + loc, global.resultType(), global.getSymbol())}; + newOperands.push_back(addr); + needUpdate = true; + } + + if (needUpdate) { + auto loc = callOp.getLoc(); + llvm::SmallVector newResultTypes; + newResultTypes.append(callOp.getResultTypes().begin(), + callOp.getResultTypes().end()); + fir::CallOp newOp = builder.create( + loc, newResultTypes, + callOp.getCallee().has_value() ? callOp.getCallee().value() + : mlir::SymbolRefAttr{}, + newOperands, callOp.getFastmathAttr()); + rewriter.replaceOp(callOp, newOp); + + for (auto e : toErase) + rewriter.eraseOp(e); + LLVM_DEBUG(llvm::dbgs() << "extruded constant for " << callOp << " as " + << newOp << '\n'); + return mlir::success(); + } + + // Failure here just means "we couldn't do the conversion", which is + // perfectly acceptable to the upper layers of this function. + return mlir::failure(); + } +}; + +// this pass attempts to convert immediate scalar literals in function calls +// to global constants to allow transformations as Dead Argument Elimination +class ConstantArgumentGlobalisationOpt + : public fir::impl::ConstantArgumentGlobalisationOptBase< + ConstantArgumentGlobalisationOpt> { +public: + ConstantArgumentGlobalisationOpt() = default; + + void runOnOperation() override { + mlir::ModuleOp mod = getOperation(); + mlir::DominanceInfo *di = &getAnalysis(); + mod.walk([di, this](mlir::func::FuncOp func) { runOnFunc(func, di); }); + } + + void runOnFunc(mlir::func::FuncOp &func, const mlir::DominanceInfo *di) { + // If func is a declaration, skip it. + if (func.empty()) + return; + + auto *context = &getContext(); + mlir::RewritePatternSet patterns(context); + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = false; + config.strictMode = mlir::GreedyRewriteStrictness::ExistingOps; + + patterns.insert(context, *di); + if (mlir::failed(mlir::applyPatternsAndFoldGreedily( + func, std::move(patterns), config))) { + mlir::emitError(func.getLoc(), + "error in constant extrusion optimization\n"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr fir::createConstantArgumentGlobalisationPass() { + return std::make_unique(); +} diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 index 243a620a9fd00..ee2ee0254269e 100644 --- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 +++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 @@ -25,6 +25,7 @@ ! CHECK-NEXT: SimplifyRegionLite ! CHECK-NEXT: SimplifyIntrinsics ! CHECK-NEXT: AlgebraicSimplification +! CHECK-NEXT: ConstantArgumentGlobalisationOpt ! CHECK-NEXT: CSE ! CHECK-NEXT: (S) 0 num-cse'd - Number of operations CSE'd ! CHECK-NEXT: (S) 0 num-dce'd - Number of operations DCE'd diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index 3d8c42f123e2e..5d5712deed2a4 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -36,6 +36,7 @@ ! ALL-NEXT: SimplifyRegionLite ! O2-NEXT: SimplifyIntrinsics ! O2-NEXT: AlgebraicSimplification +! O2-NEXT: ConstantArgumentGlobalisationOpt ! ALL-NEXT: CSE ! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd ! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index d8a9e74c318ce..70f6d298857dc 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -42,6 +42,7 @@ func.func @_QQmain() { // PASSES-NEXT: SimplifyRegionLite // PASSES-NEXT: SimplifyIntrinsics // PASSES-NEXT: AlgebraicSimplification +// PASSES-NEXT: ConstantArgumentGlobalisationOpt // PASSES-NEXT: CSE // PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd // PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index 1fed16a808af0..2ddc0ef525ac4 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -16,9 +16,7 @@ // CHECK-LABEL: define void @_QPtest_proc_dummy_other(ptr // CHECK-SAME: %[[VAL_0:.*]]) -// CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4 -// CHECK: store i32 4, ptr %[[VAL_1]], align 4 -// CHECK: call void %[[VAL_0]](ptr %[[VAL_1]]) +// CHECK: call void %[[VAL_0]](ptr @{{.*}}) func.func @_QPtest_proc_dummy() { %c0_i32 = arith.constant 0 : i32 diff --git a/flang/test/Lower/character-local-variables.f90 b/flang/test/Lower/character-local-variables.f90 index 0cf61a2623c4e..b1cfc540f4389 100644 --- a/flang/test/Lower/character-local-variables.f90 +++ b/flang/test/Lower/character-local-variables.f90 @@ -116,8 +116,7 @@ subroutine dyn_array_dyn_len_lb(l, n) subroutine assumed_length_param(n) character(*), parameter :: c(1)=(/"abcd"/) integer :: n - ! CHECK: %[[c4:.*]] = arith.constant 4 : i64 - ! CHECK: fir.store %[[c4]] to %[[tmp:.*]] : !fir.ref + ! CHECK: %[[tmp:.*]] = fir.address_of(@_extruded_.{{.*}}) : !fir.ref ! CHECK: fir.call @_QPtake_int(%[[tmp]]) {{.*}}: (!fir.ref) -> () call take_int(len(c(n), kind=8)) end diff --git a/flang/test/Lower/dummy-arguments.f90 b/flang/test/Lower/dummy-arguments.f90 index 43d8e3c1e5d44..46e4323e88620 100644 --- a/flang/test/Lower/dummy-arguments.f90 +++ b/flang/test/Lower/dummy-arguments.f90 @@ -2,9 +2,7 @@ ! CHECK-LABEL: _QQmain program test1 - ! CHECK-DAG: %[[TMP:.*]] = fir.alloca - ! CHECK-DAG: %[[TEN:.*]] = arith.constant - ! CHECK: fir.store %[[TEN]] to %[[TMP]] + ! CHECK-DAG: %[[TEN:.*]] = fir.address_of(@_extruded_.{{.*}}) : !fir.ref ! CHECK-NEXT: fir.call @_QFPfoo call foo(10) contains diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index 073493d7fe28a..c9cf5dd10123b 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -448,11 +448,10 @@ subroutine bar() ! CHECK-LABEL: func @_QPtest_proc_dummy_other( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.boxproc<() -> ()>) { -! CHECK: %[[VAL_1:.*]] = arith.constant 4 : i32 -! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref} -! CHECK: fir.store %[[VAL_1]] to %[[VAL_2]] : !fir.ref ! CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref) -> ()) -! CHECK: fir.call %[[VAL_3]](%[[VAL_2]]) {{.*}}: (!fir.ref) -> () +! CHECK: %[[VAL_1:.*]] = fir.address_of(@_extruded_.{{.*}}) : !fir.ref +! CHECK: fir.call %[[VAL_3]](%[[VAL_1]]) {{.*}}: (!fir.ref) -> () + ! CHECK: return ! CHECK: } diff --git a/flang/test/Transforms/constant-argument-globalisation-2.fir b/flang/test/Transforms/constant-argument-globalisation-2.fir new file mode 100644 index 0000000000000..03855b5bfb762 --- /dev/null +++ b/flang/test/Transforms/constant-argument-globalisation-2.fir @@ -0,0 +1,80 @@ +// RUN: fir-opt --split-input-file --constant-argument-globalisation-opt < %s | FileCheck %s + +module { +// Test for "two conditional writes to the same alloca doesn't get replaced." + func.func @func(%arg0: i32, %arg1: i1) { + %c2_i32 = arith.constant 2 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.if %arg1 { + fir.store %c2_i32 to %addr : !fir.ref + } else { + fir.store %arg0 to %addr : !fir.ref + } + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK-SAME: [[ARG0:%.*]]: i32 +// CHECK-SAME: [[ARG1:%.*]]: i1) +// CHECK: [[CONST:%.*]] = arith.constant +// CHECK: [[ADDR:%.*]] = fir.alloca i32 +// CHECK: fir.if [[ARG1]] +// CHECK: fir.store [[CONST]] to [[ADDR]] +// CHECK: } else { +// CHECK: fir.store [[ARG0]] to [[ADDR]] +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return + +} + +// ----- + +module { +// Test for "two writes to the same alloca doesn't get replaced." + func.func @func() { + %c1_i32 = arith.constant 1 : i32 + %c2_i32 = arith.constant 2 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.store %c1_i32 to %addr : !fir.ref + fir.store %c2_i32 to %addr : !fir.ref + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK: [[CONST1:%.*]] = arith.constant +// CHECK: [[CONST2:%.*]] = arith.constant +// CHECK: [[ADDR:%.*]] = fir.alloca i32 +// CHECK: fir.store [[CONST1]] to [[ADDR]] +// CHECK: fir.store [[CONST2]] to [[ADDR]] +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return + +} + +// ----- + +module { +// Test for "one write to the the alloca gets replaced." + func.func @func() { + %c1_i32 = arith.constant 1 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.store %c1_i32 to %addr : !fir.ref + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK: [[ADDR:%.*]] = fir.address_of([[EXTR:@.*]]) : !fir.ref +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return +// CHECK: fir.global internal [[EXTR]] constant : i32 { +// CHECK: %{{.*}} = arith.constant 1 : i32 +// CHECK: fir.has_value %{{.*}} : i32 +// CHECK: } + +} diff --git a/flang/test/Transforms/constant-argument-globalisation.fir b/flang/test/Transforms/constant-argument-globalisation.fir new file mode 100644 index 0000000000000..1598f303755cb --- /dev/null +++ b/flang/test/Transforms/constant-argument-globalisation.fir @@ -0,0 +1,66 @@ +// RUN: fir-opt --constant-argument-globalisation-opt < %s | FileCheck %s +// RUN: %flang_fc1 -emit-llvm -flang-deprecated-no-hlfir -O2 -mllvm --disable-constant-argument-globalisation -o - %s | FileCheck --check-prefix=DISABLE %s +module { + func.func @sub1(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) { + %0 = fir.alloca i32 {adapt.valuebyref} + %1 = fir.alloca f64 {adapt.valuebyref} + %2 = fir.alloca f64 {adapt.valuebyref} + %c1_i32 = arith.constant 1 : i32 + %cst = arith.constant 1.000000e+00 : f64 + %cst_0 = arith.constant 0.000000e+00 : f64 + %3 = fir.declare %arg0 {uniq_name = "_QFsub1Ex"} : (!fir.ref) -> !fir.ref + %4 = fir.declare %arg1 {uniq_name = "_QFsub1Ey"} : (!fir.ref) -> !fir.ref + fir.store %cst_0 to %2 : !fir.ref + %false = arith.constant false + fir.store %cst to %1 : !fir.ref + %false_1 = arith.constant false + fir.store %c1_i32 to %0 : !fir.ref + %false_2 = arith.constant false + fir.call @sub2(%2, %1, %3, %4, %0) fastmath : (!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) -> () + return + } + func.func private @sub2(!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) + +// CHECK-LABEL: func.func @sub1( +// CHECK-SAME: [[ARG0:%.*]]: !fir.ref {{{.*}}}, +// CHECK-SAME: [[ARG1:%.*]]: !fir.ref {{{.*}}}) { +// CHECK: [[X:%.*]] = fir.declare [[ARG0]] {{.*}} +// CHECK: [[Y:%.*]] = fir.declare [[ARG1]] {{.*}} +// CHECK: [[CONST_R0:%.*]] = fir.address_of([[EXTR_0:@.*]]) : !fir.ref +// CHECK: [[CONST_R1:%.*]] = fir.address_of([[EXTR_1:@.*]]) : !fir.ref +// CHECK: [[CONST_I:%.*]] = fir.address_of([[EXTR_2:@.*]]) : !fir.ref +// CHECK: fir.call @sub2([[CONST_R0]], [[CONST_R1]], [[X]], [[Y]], [[CONST_I]]) +// CHECK: return + +// CHECK: fir.global internal [[EXTR_0]] constant : f64 { +// CHECK: %{{.*}} = arith.constant 0.000000e+00 : f64 +// CHECK: fir.has_value %{{.*}} : f64 +// CHECK: } +// CHECK: fir.global internal [[EXTR_1]] constant : f64 { +// CHECK: %{{.*}} = arith.constant 1.000000e+00 : f64 +// CHECK: fir.has_value %{{.*}} : f64 +// CHECK: } +// CHECK: fir.global internal [[EXTR_2]] constant : i32 { +// CHECK: %{{.*}} = arith.constant 1 : i32 +// CHECK: fir.has_value %{{.*}} : i32 +// CHECK: } + +// DISABLE-LABEL: ; ModuleID = +// DISABLE-NOT: @_extruded +// DISABLE: define void @sub1( +// DISABLE-SAME: ptr [[ARG0:%.*]], +// DISABLE-SAME: ptr [[ARG1:%.*]]) +// DISABLE-SMAE: { +// DISABLE: [[CONST_I:%.*]] = alloca i32 +// DISABLE: [[CONST_R1:%.*]] = alloca double +// DISABLE: [[CONST_R0:%.*]] = alloca double +// DISABLE: store double 0.0{{.*}}+00, ptr [[CONST_R0]] +// DISABLE: store double 1.0{{.*}}+00, ptr [[CONST_R1]] +// DISABLE: store i32 1, ptr [[CONST_I]] +// DISABLE: call void @sub2(ptr nonnull [[CONST_R0]], +// DISABLE-SAME: ptr nonnull [[CONST_R1]], +// DISABLE-SAME: ptr [[ARG0]], ptr [[ARG1]], +// DISABLE-SAME: ptr nonnull [[CONST_I]]) +// DISABLE: ret void +// DISABLE: } +}