diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 096df56d0f183..2ef609831637e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2750,6 +2750,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations"> Group; def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group; +def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">, + Group, Visibility<[FlangOption, FC1Option]>, + HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, PosFlagsetAttr( + mlir::StringAttr::get(mod.getContext(), + llvm::Twine{"fir.no_fast_real_mod"}), + mlir::BoolAttr::get(mod.getContext(), true)); + } + // Create a parse tree and lower it to FIR parseAndLowerTree(ci, lb); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 71d35e37bbe94..de7694ffd468c 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -6989,8 +6989,33 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, } // MOD +static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value a, mlir::Value p) { + auto fastmathFlags = mlir::arith::FastMathFlags::contract; + auto fastmathAttr = + mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags); + mlir::Value divResult = + mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr); + mlir::Type intType = builder.getIntegerType( + a.getType().getIntOrFloatBitWidth(), /*signed=*/true); + mlir::Value intResult = builder.createConvert(loc, intType, divResult); + mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult); + mlir::Value mulResult = + mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr); + mlir::Value subResult = + mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr); + return subResult; +} + mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { + auto mod = builder.getModule(); + bool dontUseFastRealMod = false; + bool canUseApprox = mlir::arith::bitEnumContainsAny( + builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn); + if (auto attr = mod->getAttrOfType("fir.no_fast_real_mod")) + dontUseFastRealMod = attr.getValue(); + assert(args.size() == 2); if (resultType.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( @@ -7002,9 +7027,16 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - // Use runtime. - return builder.createConvert( - loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) { + // Treat MOD as an approximate function and code-gen inline code + // instead of calling into the Fortran runtime library. + return builder.createConvert(loc, resultType, + genFastMod(builder, loc, args[0], args[1])); + } else { + // Use runtime. + return builder.createConvert( + loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + } } // MODULO diff --git a/flang/test/Driver/fast-real-mod.f90 b/flang/test/Driver/fast-real-mod.f90 new file mode 100644 index 0000000000000..4ea9b26e64753 --- /dev/null +++ b/flang/test/Driver/fast-real-mod.f90 @@ -0,0 +1,7 @@ +! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD + +! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod" + +program test + ! nothing to be done in here +end program test diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 new file mode 100644 index 0000000000000..f80f7203ad1a2 --- /dev/null +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -0,0 +1,83 @@ +! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} +! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%} + +! TODO: check line that fir.fast_real_mod is not there +! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}} + +! CHECK-LABEL: @_QPmod_real4 +subroutine mod_real4(r, a, p) + implicit none + real(kind=4) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f32 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref, i32) -> f32 + r = mod(a, p) +end subroutine mod_real4 + +! CHECK-LABEL: @_QPmod_real8 +subroutine mod_real8(r, a, p) + implicit none + real(kind=8) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f64 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref, i32) -> f64 + r = mod(a, p) +end subroutine mod_real8 + +! CHECK-LABEL: @_QPmod_real10 +subroutine mod_real10(r, a, p) + implicit none + integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10) + real(kind=kind10) :: r, a, p +! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 +! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 +! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f80 +! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref, i32) -> f80 + r = mod(a, p) +end subroutine mod_real10 + +! CHECK-LABEL: @_QPmod_real16 +subroutine mod_real16(r, a, p) + implicit none + integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16) + real(kind=kind16) :: r, a, p +! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128 +! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128 +! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f128 +! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref, i32) -> f128 + r = mod(a, p) +end subroutine mod_real16