Skip to content

Commit

Permalink
[flang] add -fstack-arrays flag
Browse files Browse the repository at this point in the history
The implementation of -fstack-arrays was added in
https://reviews.llvm.org/D140415

The new macro BoolOptionWithoutMarshalling in Options.td avoids
generating code to store the flags in clang data structures. For
example, writing something like

  defm stack_arrays : BoolOption<"f", "stack-arrays",
                                 CodeGenOpts<"StackArrays">, [...]

Would generate code referring to `clang::CodeGenOpts::StackArrays`, which
does not exist.

Differential Revision: https://reviews.llvm.org/D140972
  • Loading branch information
tblah committed Feb 7, 2023
1 parent cc14bf2 commit bf81ba3
Show file tree
Hide file tree
Showing 11 changed files with 102 additions and 18 deletions.
39 changes: 38 additions & 1 deletion clang/include/clang/Driver/Options.td
Expand Up @@ -482,6 +482,40 @@ multiclass BoolGOption<string flag_base, KeyPathAndMacro kpm,
Group<g_Group>;
}

// Works like BoolOption except without marshalling
multiclass BoolOptionWithoutMarshalling<string prefix = "", string spelling_base,
FlagDef flag1_base, FlagDef flag2_base,
BothFlags suffix = BothFlags<[], "">> {
defvar flag1 = FlagDefExpanded<ApplySuffix<flag1_base, suffix>.Result, prefix,
NAME, spelling_base>;

defvar flag2 = FlagDefExpanded<ApplySuffix<flag2_base, suffix>.Result, prefix,
NAME, spelling_base>;

// The flags must have different polarity, different values, and only
// one can be implied.
assert !xor(flag1.Polarity, flag2.Polarity),
"the flags must have different polarity: flag1: " #
flag1.Polarity # ", flag2: " # flag2.Polarity;
assert !ne(flag1.Value, flag2.Value),
"the flags must have different values: flag1: " #
flag1.Value # ", flag2: " # flag2.Value;
assert !not(!and(flag1.CanBeImplied, flag2.CanBeImplied)),
"only one of the flags can be implied: flag1: " #
flag1.CanBeImplied # ", flag2: " # flag2.CanBeImplied;

defvar implied = !if(flag1.CanBeImplied, flag1, flag2);

def flag1.RecordName : Flag<["-"], flag1.Spelling>, Flags<flag1.OptionFlags>,
HelpText<flag1.Help>,
ImpliedByAnyOf<implied.ImpliedBy, implied.ValueAsCode>
{}
def flag2.RecordName : Flag<["-"], flag2.Spelling>, Flags<flag2.OptionFlags>,
HelpText<flag2.Help>,
ImpliedByAnyOf<implied.ImpliedBy, implied.ValueAsCode>
{}
}

// FIXME: Diagnose if target does not support protected visibility.
class MarshallingInfoVisibility<KeyPathAndMacro kpm, code default>
: MarshallingInfoEnum<kpm, default>,
Expand Down Expand Up @@ -4986,7 +5020,6 @@ defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>;
defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>;
defm second_underscore : BooleanFFlag<"second-underscore">, Group<gfortran_Group>;
defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>;
defm stack_arrays : BooleanFFlag<"stack-arrays">, Group<gfortran_Group>;
defm underscoring : BooleanFFlag<"underscoring">, Group<gfortran_Group>;
defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>;

Expand Down Expand Up @@ -5066,6 +5099,10 @@ defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed
def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;

defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
PosFlag<SetTrue, [], "Attempt to allocate array temporaries on the stack, no matter their size">,
NegFlag<SetFalse, [], "Allocate array temporaries on the heap (default)">>;

} // let Flags = [FC1Option, FlangOption, FlangOnlyOption]

def J : JoinedOrSeparate<["-"], "J">,
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Expand Up @@ -57,6 +57,13 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
options::OPT_fintrinsic_modules_path, options::OPT_pedantic,
options::OPT_std_EQ, options::OPT_W_Joined,
options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ});

Arg *stackArrays =
Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
options::OPT_fno_stack_arrays);
if (stackArrays &&
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");
}

void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Expand Down
8 changes: 3 additions & 5 deletions flang/docs/FlangDriver.md
Expand Up @@ -546,8 +546,7 @@ See the
documentation for more details.

## Ofast and Fast Math
`-Ofast` in Flang means `-O3 -ffast-math`. `-fstack-arrays` will be added to
`-Ofast` in the future (https://github.com/llvm/llvm-project/issues/59231).
`-Ofast` in Flang means `-O3 -ffast-math -fstack-arrays`.

`-ffast-math` means the following:
- `-fno-honor-infinities`
Expand All @@ -570,9 +569,8 @@ to zero.

### Comparison with GCC/GFortran
GCC/GFortran translate `-Ofast` to
`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. `-fstack-arrays`
is TODO for Flang.
`-fno-semantic-interposition` is not used because clang does not enable this as
`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`.
`-fno-semantic-interposition` is not used because Clang does not enable this as
part of `-Ofast` as the default behaviour is similar.

GCC/GFortran has a wider definition of `-ffast-math`: also including
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Expand Up @@ -26,6 +26,7 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new

CODEGENOPT(PICLevel, 2, 0) ///< PIC level of the LLVM module.
CODEGENOPT(IsPIE, 1, 0) ///< PIE level is the same as PIC Level.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)

ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.

Expand Down
18 changes: 12 additions & 6 deletions flang/include/flang/Tools/CLOptions.inc
Expand Up @@ -166,8 +166,9 @@ inline void addExternalNameConversionPass(mlir::PassManager &pm) {
/// incremental conversion of FIR.
///
/// \param pm - MLIR pass manager that will hold the pipeline definition
inline void createDefaultFIROptimizerPassPipeline(
mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
llvm::OptimizationLevel optLevel = defaultOptLevel,
bool stackArrays = false) {
// simplify the IR
mlir::GreedyRewriteConfig config;
config.enableRegionSimplification = false;
Expand All @@ -182,7 +183,11 @@ inline void createDefaultFIROptimizerPassPipeline(
pm.addPass(fir::createAlgebraicSimplificationPass(config));
}
pm.addPass(mlir::createCSEPass());
fir::addMemoryAllocationOpt(pm);

if (stackArrays)
pm.addPass(fir::createStackArraysPass());
else
fir::addMemoryAllocationOpt(pm);

// The default inliner pass adds the canonicalizer pass with the default
// configuration. Create the inliner pass with tco config.
Expand Down Expand Up @@ -220,10 +225,11 @@ inline void createDefaultFIRCodeGenPassPipeline(
/// \param pm - MLIR pass manager that will hold the pipeline definition
/// \param optLevel - optimization level used for creating FIR optimization
/// passes pipeline
inline void createMLIRToLLVMPassPipeline(
mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
llvm::OptimizationLevel optLevel = defaultOptLevel,
bool stackArrays = false) {
// Add default optimizer pass pipeline.
fir::createDefaultFIROptimizerPassPipeline(pm, optLevel);
fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays);

// Add codegen pass pipeline.
fir::createDefaultFIRCodeGenPassPipeline(pm, optLevel);
Expand Down
5 changes: 5 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -126,6 +126,11 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_debug_pass_manager, false))
opts.DebugPassManager = 1;

if (args.hasFlag(clang::driver::options::OPT_fstack_arrays,
clang::driver::options::OPT_fno_stack_arrays, false)) {
opts.StackArrays = 1;
}

for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
opts.LLVMPassPlugins.push_back(a->getValue());

Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Frontend/FrontendActions.cpp
Expand Up @@ -545,7 +545,7 @@ void CodeGenAction::generateLLVMIR() {
pm.enableVerifier(/*verifyPasses=*/true);

// Create the pass pipeline
fir::createMLIRToLLVMPassPipeline(pm, level);
fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays);
mlir::applyPassManagerCLOptions(pm);

// run the pass manager
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Driver/driver-help-hidden.f90
Expand Up @@ -47,10 +47,12 @@
! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler
! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! CHECK-NEXT: -fopenacc Enable OpenACC
! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated
! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! CHECK-NEXT: -help Display available options
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Driver/driver-help.f90
Expand Up @@ -45,10 +45,12 @@
! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! HELP-NEXT: -fno-integrated-as Disable the integrated assembler
! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-NEXT: -fopenacc Enable OpenACC
! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-NEXT: -help Display available options
Expand Down Expand Up @@ -133,10 +135,12 @@
! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager
! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode
! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-FC1-NEXT: -fopenacc Enable OpenACC
! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-FC1-NEXT: -help Display available options
Expand Down
20 changes: 15 additions & 5 deletions flang/test/Driver/fast_math.f90
@@ -1,25 +1,35 @@
! Test for correct forwarding of fast-math flags from the compiler driver to the
! frontend driver

! -Ofast => -ffast-math -O3
! -Ofast => -ffast-math -O3 -fstack-arrays
! RUN: %flang -Ofast -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST %s
! CHECK-OFAST: -fc1
! CHECK-OFAST-SAME: -ffast-math
! CHECK-OFAST-SAME: -fstack-arrays
! CHECK-OFAST-SAME: -O3

! TODO: update once -fstack-arays is added
! RUN: %flang -fstack-arrays -fsyntax-only %s -o %t 2>&1 \
! RUN: %flang -fstack-arrays -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-STACK-ARRAYS %s
! CHECK-STACK-ARRAYS: warning: argument unused during compilation: '-fstack-arrays'
! CHECK-STACK-ARRAYS: -fc1
! CHECK-STACK-ARRAYS-SAME: -fstack-arrays

! -Ofast -fno-fast-math => -O3
! -Ofast -fno-fast-math => -O3 -fstack-arrays
! RUN: %flang -Ofast -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-FAST %s
! CHECK-OFAST-NO-FAST: -fc1
! CHECK-OFAST-NO-FAST-NOT: -ffast-math
! CHECK-OFAST-NO-FAST-SAME: -fstack-arrays
! CHECK-OFAST-NO-FAST-SAME: -O3

! -Ofast -fno-stack-arrays -> -O3 -ffast-math
! RUN: %flang -Ofast -fno-stack-arrays -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-SA %s
! CHECK-OFAST-NO-SA: -fc1
! CHECK-OFAST-NO-SA-SAME: -ffast-math
! CHECK-OFAST-NO-SA-NOT: -fstack-arrays
! CHECK-OFAST-NO-SA-SAME: -O3

! -ffast-math => -ffast-math
! RUN: %flang -ffast-math -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-FFAST %s
Expand Down
14 changes: 14 additions & 0 deletions flang/test/Transforms/stack-arrays.f90
@@ -1,5 +1,10 @@
! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s

! In order to verify the whole MLIR pipeline, make the driver generate LLVM IR.
! This is only to check that -fstack-arrays enables the stack-arrays pass so
! only check the first example
! RUN: %flang_fc1 -emit-llvm -o - -fstack-arrays %s | FileCheck --check-prefix=LLVM-IR %s

! check simple array value copy case
subroutine array_value_copy_simple(arr)
integer, intent(inout) :: arr(4)
Expand All @@ -14,6 +19,15 @@ subroutine array_value_copy_simple(arr)
! CHECK: return
! CHECK-NEXT: }

! LLVM-IR: array_value_copy_simple
! LLVM-IR-NOT: malloc
! LLVM-IR-NOT: free
! LLVM-IR: alloca [4 x i32]
! LLVM-IR-NOT: malloc
! LLVM-IR-NOT: free
! LLVM-IR: ret void
! LLVM-IR-NEXT: }

! check complex array value copy case
module stuff
type DerivedWithAllocatable
Expand Down

0 comments on commit bf81ba3

Please sign in to comment.