18 changes: 13 additions & 5 deletions flang/lib/Lower/OpenMP/ReductionProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define FORTRAN_LOWER_REDUCTIONPROCESSOR_H

#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/type.h"
Expand Down Expand Up @@ -71,11 +72,15 @@ class ReductionProcessor {
static const Fortran::semantics::SourceName
getRealName(const Fortran::parser::ProcedureDesignator &pd);

static std::string getReductionName(llvm::StringRef name, mlir::Type ty);
static bool
doReductionByRef(const llvm::SmallVectorImpl<mlir::Value> &reductionVars);

static std::string getReductionName(llvm::StringRef name, mlir::Type ty,
bool isByRef);

static std::string getReductionName(
Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
mlir::Type ty);
mlir::Type ty, bool isByRef);

/// This function returns the identity value of the operator \p
/// reductionOpName. For example:
Expand Down Expand Up @@ -103,9 +108,11 @@ class ReductionProcessor {
/// symbol table. The declaration has a constant initializer with the neutral
/// value `initValue`, and the reduction combiner carried over from `reduce`.
/// TODO: Generalize this for non-integer types, add atomic region.
static mlir::omp::ReductionDeclareOp createReductionDecl(
fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
const ReductionIdentifier redId, mlir::Type type, mlir::Location loc);
static mlir::omp::ReductionDeclareOp
createReductionDecl(fir::FirOpBuilder &builder,
llvm::StringRef reductionOpName,
const ReductionIdentifier redId, mlir::Type type,
mlir::Location loc, bool isByRef);

/// Creates a reduction declaration and associates it with an OpenMP block
/// directive.
Expand All @@ -124,6 +131,7 @@ mlir::Value
ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder,
mlir::Type type, mlir::Location loc,
mlir::Value op1, mlir::Value op2) {
type = fir::unwrapRefType(type);
assert(type.isIntOrIndexOrFloat() &&
"only integer and float types are currently supported");
if (type.isIntOrIndex())
Expand Down
9 changes: 6 additions & 3 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,8 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc,
constexpr auto FuncTypeReal16Real16 = genFuncType<Ty::Real<16>, Ty::Real<16>>;
constexpr auto FuncTypeReal16Real16Real16 =
genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Real<16>>;
constexpr auto FuncTypeReal16Real16Real16Real16 =
genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Real<16>, Ty::Real<16>>;
constexpr auto FuncTypeReal16Integer4Real16 =
genFuncType<Ty::Real<16>, Ty::Integer<4>, Ty::Real<16>>;
constexpr auto FuncTypeInteger4Real16 =
Expand Down Expand Up @@ -1143,6 +1145,8 @@ static constexpr MathOperation mathOperations[] = {
{"fma", "llvm.fma.f64",
genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
genMathOp<mlir::math::FmaOp>},
{"fma", RTNAME_STRING(FmaF128), FuncTypeReal16Real16Real16Real16,
genLibF128Call},
{"gamma", "tgammaf", genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
{"gamma", "tgamma", genFuncType<Ty::Real<8>, Ty::Real<8>>, genLibCall},
{"gamma", RTNAME_STRING(TgammaF128), FuncTypeReal16Real16, genLibF128Call},
Expand Down Expand Up @@ -5208,6 +5212,8 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
// MODULO
mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
// TODO: we'd better generate a runtime call here, when runtime error
// checking is needed (to detect 0 divisor) or when precise math is requested.
assert(args.size() == 2);
// No floored modulo op in LLVM/MLIR yet. TODO: add one to MLIR.
// In the meantime, use a simple inlined implementation based on truncated
Expand All @@ -5233,10 +5239,7 @@ mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
return builder.create<mlir::arith::SelectOp>(loc, mustAddP, remPlusP,
remainder);
}
// Real case
if (resultType == mlir::FloatType::getF128(builder.getContext()))

TODO(loc, "REAL(KIND=16): in MODULO intrinsic");
auto remainder = builder.create<mlir::arith::RemFOp>(loc, args[0], args[1]);
mlir::Value zero = builder.createRealZeroConstant(loc, remainder.getType());
auto remainderIsNotZero = builder.create<mlir::arith::CmpFOp>(
Expand Down
29 changes: 27 additions & 2 deletions flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@

using namespace Fortran::runtime;

namespace {
/// Placeholder for real*16 version of RandomNumber Intrinsic
struct ForcedRandomNumberReal16 {
static constexpr const char *name = ExpandAndQuoteKey(RTNAME(RandomNumber16));
static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
return [](mlir::MLIRContext *ctx) {
auto boxTy =
fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
auto strTy = fir::runtime::getModel<const char *>()(ctx);
auto intTy = fir::runtime::getModel<int>()(ctx);
;
return mlir::FunctionType::get(ctx, {boxTy, strTy, intTy},
mlir::NoneType::get(ctx));
};
}
};
} // namespace

mlir::Value fir::runtime::genAssociated(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value pointer,
mlir::Value target) {
Expand Down Expand Up @@ -100,8 +118,15 @@ void fir::runtime::genRandomInit(fir::FirOpBuilder &builder, mlir::Location loc,

void fir::runtime::genRandomNumber(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value harvest) {
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(RandomNumber)>(loc, builder);
mlir::func::FuncOp func;
auto boxEleTy = fir::dyn_cast_ptrOrBoxEleTy(harvest.getType());
auto eleTy = fir::unwrapSequenceType(boxEleTy);
if (eleTy.isF128()) {
func = fir::runtime::getRuntimeFunc<ForcedRandomNumberReal16>(loc, builder);
} else {
func = fir::runtime::getRuntimeFunc<mkRTKey(RandomNumber)>(loc, builder);
}

mlir::FunctionType funcTy = func.getFunctionType();
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Semantics/check-omp-structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2948,7 +2948,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::UseDevicePtr &x) {
if (name->symbol) {
if (!(IsBuiltinCPtr(*(name->symbol)))) {
context_.Say(itr->second->source,
"'%s' in USE_DEVICE_PTR clause must be of type C_PTR"_err_en_US,
"Use of non-C_PTR type '%s' in USE_DEVICE_PTR is deprecated, use USE_DEVICE_ADDR instead"_warn_en_US,
name->ToString());
} else {
useDevicePtrNameList.push_back(*name);
Expand Down
2 changes: 2 additions & 0 deletions flang/runtime/Float128Math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ set(sources
exp.cpp
exponent.cpp
floor.cpp
fma.cpp
fraction.cpp
hypot.cpp
j0.cpp
Expand All @@ -48,6 +49,7 @@ set(sources
nearest.cpp
norm2.cpp
pow.cpp
random.cpp
round.cpp
rrspacing.cpp
scale.cpp
Expand Down
23 changes: 23 additions & 0 deletions flang/runtime/Float128Math/fma.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- runtime/Float128Math/fma.cpp --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "math-entries.h"

namespace Fortran::runtime {
extern "C" {

#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
CppTypeFor<TypeCategory::Real, 16> RTDEF(FmaF128)(
CppTypeFor<TypeCategory::Real, 16> x, CppTypeFor<TypeCategory::Real, 16> y,
CppTypeFor<TypeCategory::Real, 16> z) {
return Fma<true>::invoke(x, y, z);
}
#endif

} // extern "C"
} // namespace Fortran::runtime
3 changes: 3 additions & 0 deletions flang/runtime/Float128Math/math-entries.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ DEFINE_FALLBACK_F128(Erf)
DEFINE_FALLBACK_F128(Erfc)
DEFINE_FALLBACK_F128(Exp)
DEFINE_FALLBACK_F128(Floor)
DEFINE_FALLBACK_F128(Fma)
DEFINE_FALLBACK_F128(Frexp)
DEFINE_FALLBACK_F128(Hypot)
DEFINE_FALLBACK_I32(Ilogb)
Expand Down Expand Up @@ -124,6 +125,7 @@ DEFINE_SIMPLE_ALIAS(Erf, erfq)
DEFINE_SIMPLE_ALIAS(Erfc, erfcq)
DEFINE_SIMPLE_ALIAS(Exp, expq)
DEFINE_SIMPLE_ALIAS(Floor, floorq)
DEFINE_SIMPLE_ALIAS(Fma, fmaq)
DEFINE_SIMPLE_ALIAS(Frexp, frexpq)
DEFINE_SIMPLE_ALIAS(Hypot, hypotq)
DEFINE_SIMPLE_ALIAS(Ilogb, ilogbq)
Expand Down Expand Up @@ -177,6 +179,7 @@ DEFINE_SIMPLE_ALIAS(Erf, std::erf)
DEFINE_SIMPLE_ALIAS(Erfc, std::erfc)
DEFINE_SIMPLE_ALIAS(Exp, std::exp)
DEFINE_SIMPLE_ALIAS(Floor, std::floor)
DEFINE_SIMPLE_ALIAS(Fma, std::fma)
DEFINE_SIMPLE_ALIAS(Frexp, std::frexp)
DEFINE_SIMPLE_ALIAS(Hypot, std::hypot)
DEFINE_SIMPLE_ALIAS(Ilogb, std::ilogb)
Expand Down
23 changes: 23 additions & 0 deletions flang/runtime/Float128Math/random.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- runtime/Float128Math/random.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "math-entries.h"
#include "numeric-template-specs.h"
#include "random-templates.h"

using namespace Fortran::runtime::random;
extern "C" {

#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
void RTDEF(RandomNumber16)(
const Descriptor &harvest, const char *source, int line) {
return Generate<CppTypeFor<TypeCategory::Real, 16>, 113>(harvest);
}
#endif

} // extern "C"
87 changes: 87 additions & 0 deletions flang/runtime/random-templates.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//===-- runtime/random-templates.h ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_
#define FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_

#include "lock.h"
#include "numeric-templates.h"
#include "flang/Runtime/descriptor.h"
#include <algorithm>
#include <random>

namespace Fortran::runtime::random {

// Newer "Minimum standard", recommended by Park, Miller, and Stockmeyer in
// 1993. Same as C++17 std::minstd_rand, but explicitly instantiated for
// permanence.
using Generator =
std::linear_congruential_engine<std::uint_fast32_t, 48271, 0, 2147483647>;

using GeneratedWord = typename Generator::result_type;
static constexpr std::uint64_t range{
static_cast<std::uint64_t>(Generator::max() - Generator::min() + 1)};
static constexpr bool rangeIsPowerOfTwo{(range & (range - 1)) == 0};
static constexpr int rangeBits{
64 - common::LeadingZeroBitCount(range) - !rangeIsPowerOfTwo};

extern Lock lock;
extern Generator generator;
extern std::optional<GeneratedWord> nextValue;

// Call only with lock held
static GeneratedWord GetNextValue() {
GeneratedWord result;
if (nextValue.has_value()) {
result = *nextValue;
nextValue.reset();
} else {
result = generator();
}
return result;
}

template <typename REAL, int PREC>
inline void Generate(const Descriptor &harvest) {
static constexpr std::size_t minBits{
std::max<std::size_t>(PREC, 8 * sizeof(GeneratedWord))};
using Int = common::HostUnsignedIntType<minBits>;
static constexpr std::size_t words{
static_cast<std::size_t>(PREC + rangeBits - 1) / rangeBits};
std::size_t elements{harvest.Elements()};
SubscriptValue at[maxRank];
harvest.GetLowerBounds(at);
{
CriticalSection critical{lock};
for (std::size_t j{0}; j < elements; ++j) {
while (true) {
Int fraction{GetNextValue()};
if constexpr (words > 1) {
for (std::size_t k{1}; k < words; ++k) {
static constexpr auto rangeMask{
(GeneratedWord{1} << rangeBits) - 1};
GeneratedWord word{(GetNextValue() - generator.min()) & rangeMask};
fraction = (fraction << rangeBits) | word;
}
}
fraction >>= words * rangeBits - PREC;
REAL next{
LDEXPTy<REAL>::compute(static_cast<REAL>(fraction), -(PREC + 1))};
if (next >= 0.0 && next < 1.0) {
*harvest.Element<REAL>(at) = next;
break;
}
}
harvest.IncrementSubscripts(at);
}
}
}

} // namespace Fortran::runtime::random

#endif // FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_
81 changes: 6 additions & 75 deletions flang/runtime/random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,85 +11,24 @@

#include "flang/Runtime/random.h"
#include "lock.h"
#include "random-templates.h"
#include "terminator.h"
#include "flang/Common/float128.h"
#include "flang/Common/leading-zero-bit-count.h"
#include "flang/Common/uint128.h"
#include "flang/Runtime/cpp-type.h"
#include "flang/Runtime/descriptor.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include <memory>
#include <random>
#include <time.h>

namespace Fortran::runtime {
namespace Fortran::runtime::random {

// Newer "Minimum standard", recommended by Park, Miller, and Stockmeyer in
// 1993. Same as C++17 std::minstd_rand, but explicitly instantiated for
// permanence.
using Generator =
std::linear_congruential_engine<std::uint_fast32_t, 48271, 0, 2147483647>;

using GeneratedWord = typename Generator::result_type;
static constexpr std::uint64_t range{
static_cast<std::uint64_t>(Generator::max() - Generator::min() + 1)};
static constexpr bool rangeIsPowerOfTwo{(range & (range - 1)) == 0};
static constexpr int rangeBits{
64 - common::LeadingZeroBitCount(range) - !rangeIsPowerOfTwo};

static Lock lock;
static Generator generator;
static std::optional<GeneratedWord> nextValue;

// Call only with lock held
static GeneratedWord GetNextValue() {
GeneratedWord result;
if (nextValue.has_value()) {
result = *nextValue;
nextValue.reset();
} else {
result = generator();
}
return result;
}

template <typename REAL, int PREC>
inline void Generate(const Descriptor &harvest) {
static constexpr std::size_t minBits{
std::max<std::size_t>(PREC, 8 * sizeof(GeneratedWord))};
using Int = common::HostUnsignedIntType<minBits>;
static constexpr std::size_t words{
static_cast<std::size_t>(PREC + rangeBits - 1) / rangeBits};
std::size_t elements{harvest.Elements()};
SubscriptValue at[maxRank];
harvest.GetLowerBounds(at);
{
CriticalSection critical{lock};
for (std::size_t j{0}; j < elements; ++j) {
while (true) {
Int fraction{GetNextValue()};
if constexpr (words > 1) {
for (std::size_t k{1}; k < words; ++k) {
static constexpr auto rangeMask{
(GeneratedWord{1} << rangeBits) - 1};
GeneratedWord word{(GetNextValue() - generator.min()) & rangeMask};
fraction = (fraction << rangeBits) | word;
}
}
fraction >>= words * rangeBits - PREC;
REAL next{std::ldexp(static_cast<REAL>(fraction), -(PREC + 1))};
if (next >= 0.0 && next < 1.0) {
*harvest.Element<REAL>(at) = next;
break;
}
}
harvest.IncrementSubscripts(at);
}
}
}
Lock lock;
Generator generator;
std::optional<GeneratedWord> nextValue;

extern "C" {

Expand Down Expand Up @@ -130,14 +69,6 @@ void RTNAME(RandomNumber)(
#if LDBL_MANT_DIG == 64
Generate<CppTypeFor<TypeCategory::Real, 10>, 64>(harvest);
return;
#endif
}
break;
case 16:
if constexpr (HasCppTypeFor<TypeCategory::Real, 16>) {
#if LDBL_MANT_DIG == 113
Generate<CppTypeFor<TypeCategory::Real, 16>, 113>(harvest);
return;
#endif
}
break;
Expand Down Expand Up @@ -263,4 +194,4 @@ void RTNAME(RandomSeed)(const Descriptor *size, const Descriptor *put,
}

} // extern "C"
} // namespace Fortran::runtime
} // namespace Fortran::runtime::random
15 changes: 15 additions & 0 deletions flang/test/Driver/cuda-option.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
! Test -fcuda option
! RUN: %flang -fc1 -cpp -x cuda -fdebug-unparse %s -o - | FileCheck %s
! RUN: not %flang -fc1 -cpp %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
program main
#if _CUDA
integer :: var = _CUDA
#endif
integer, device :: dvar
end program

! CHECK-LABEL: PROGRAM main
! CHECK: INTEGER :: var = 1
! CHECK: INTEGER, DEVICE :: dvar

! ERROR: cuda-option.f90:8:19: error: expected end of statement
9 changes: 9 additions & 0 deletions flang/test/Lower/Intrinsics/fma_real16.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
! RUN: bbc -emit-fir %s -o - | FileCheck %s
! RUN: bbc --math-runtime=precise -emit-fir %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s

! CHECK: fir.call @_FortranAFmaF128({{.*}}){{.*}}: (f128, f128, f128) -> f128
use ieee_arithmetic, only: ieee_fma
real(16) :: x, y, z
x = ieee_fma(x, y, z)
end
82 changes: 50 additions & 32 deletions flang/test/Lower/Intrinsics/modulo.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,54 @@
! CHECK-LABEL: func @_QPmodulo_testr(
! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
subroutine modulo_testr(r, a, p)
real(8) :: r, a, p
! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64
! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64
! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
r = modulo(a, p)
end subroutine

! CHECK-LABEL: func @_QPmodulo_testi(
! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
subroutine modulo_testi(r, a, p)
integer(8) :: r, a, p
! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<i64>
! CHECK-DAG: %[[rem:.*]] = arith.remsi %[[a]], %[[p]] : i64
! CHECK-DAG: %[[argXor:.*]] = arith.xori %[[a]], %[[p]] : i64
! CHECK-DAG: %[[signDifferent:.*]] = arith.cmpi slt, %[[argXor]], %c0{{.*}} : i64
! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpi ne, %[[rem]], %c0{{.*}} : i64
! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
! CHECK-DAG: %[[remPlusP:.*]] = arith.addi %[[rem]], %[[p]] : i64
! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : i64
! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<i64>
r = modulo(a, p)
end subroutine
real(8) :: r, a, p
! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64
! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f64
! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64
! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
r = modulo(a, p)
end subroutine

! CHECK-LABEL: func @_QPmodulo_testi(
! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
subroutine modulo_testi(r, a, p)
integer(8) :: r, a, p
! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<i64>
! CHECK-DAG: %[[rem:.*]] = arith.remsi %[[a]], %[[p]] : i64
! CHECK-DAG: %[[argXor:.*]] = arith.xori %[[a]], %[[p]] : i64
! CHECK-DAG: %[[signDifferent:.*]] = arith.cmpi slt, %[[argXor]], %c0{{.*}} : i64
! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpi ne, %[[rem]], %c0{{.*}} : i64
! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
! CHECK-DAG: %[[remPlusP:.*]] = arith.addi %[[rem]], %[[p]] : i64
! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : i64
! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<i64>
r = modulo(a, p)
end subroutine

! CHECK-LABEL: func @_QPmodulo_testr16(
! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f128>{{.*}}, %[[arg1:.*]]: !fir.ref<f128>{{.*}}, %[[arg2:.*]]: !fir.ref<f128>{{.*}}) {
subroutine modulo_testr16(r, a, p)
real(16) :: r, a, p
! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f128>
! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f128>
! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f128
! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f128
! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f128
! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f128
! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f128
! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f128
! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f128
! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f128>
r = modulo(a, p)
end subroutine
16 changes: 16 additions & 0 deletions flang/test/Lower/Intrinsics/random_number_real16.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
! RUN: bbc -emit-fir %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s

! CHECK-LABEL: func @_QPtest_scalar
! CHECK: fir.call @_FortranARandomNumber16({{.*}}){{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
subroutine test_scalar
real(16) :: r
call random_number(r)
end

! CHECK-LABEL: func @_QPtest_array
! CHECK: fir.call @_FortranARandomNumber16({{.*}}){{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
subroutine test_array(r)
real(16) :: r(:)
call random_number(r)
end
117 changes: 117 additions & 0 deletions flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s

!CHECK-LABEL: omp.reduction.declare
!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
!CHECK-SAME: init {
!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: %[[REF:.*]] = fir.alloca f32
!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
!CHECK: } combiner {
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
!CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
!CHECK: }

!CHECK-LABEL: omp.reduction.declare
!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
!CHECK: %[[REF:.*]] = fir.alloca i32
!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
!CHECK: } combiner {
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
!CHECK: }

!CHECK-LABEL: func.func @_QPsimple_int_add
!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
!CHECK: fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[I_INCR:.+]] = arith.constant 1 : i32
!CHECK: %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]]
!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine simple_int_add
integer :: i
i = 0

!$omp parallel reduction(+:i)
i = i + 1
!$omp end parallel

print *, i
end subroutine

!CHECK-LABEL: func.func @_QPsimple_real_add
!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
!CHECK: omp.parallel byref reduction(@[[RED_F32_NAME]] %[[RREF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
!CHECK: %[[R_INCR:.+]] = arith.constant 1.500000e+00 : f32
!CHECK: %[[RES]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<f32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine simple_real_add
real :: r
r = 0.0

!$omp parallel reduction(+:r)
r = r + 1.5
!$omp end parallel

print *, r
end subroutine

!CHECK-LABEL: func.func @_QPint_real_add
!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
!CHECK: fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV0:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[RREF]] -> %[[PRV1:.+]] : !fir.ref<f32>) {
!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
!CHECK: %[[LPRV1:.+]] = fir.load %[[PRV1]] : !fir.ref<f32>
!CHECK: %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[LPRV1]] {{.*}} : f32
!CHECK: fir.store %[[RES1]] to %[[PRV1]]
!CHECK: %[[LPRV0:.+]] = fir.load %[[PRV0]] : !fir.ref<i32>
!CHECK: %[[I_INCR:.*]] = arith.constant 3 : i32
!CHECK: %[[RES0:.+]] = arith.addi %[[LPRV0]], %[[I_INCR]]
!CHECK: fir.store %[[RES0]] to %[[PRV0]]
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine int_real_add
real :: r
integer :: i

r = 0.0
i = 0

!$omp parallel reduction(+:i,r)
r = 1.5 + r
i = i + 3
!$omp end parallel

print *, r
print *, i
end subroutine
392 changes: 392 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

!CHECK-LABEL: omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
!CHECK: %[[C0_1:.*]] = arith.constant -1 : i32
!CHECK: %[[REF:.*]] = fir.alloca i32
!CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

!CHECK-LABEL: } combiner {
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK: %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
!CHECK: }


!CHECK-LABEL: @_QPreduction_iand
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@iand_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.andi %[[LPRV]], %[[Y_I]] : i32
!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_iand(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(iand:x)
do i=1, 100
x = iand(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
45 changes: 45 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp %s -o - | FileCheck %s

! CHECK-LABEL: omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

!CHECK-LABEL: @_QPreduction_ieor
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_ieor(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(ieor:x)
do i=1, 100
x = ieor(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
45 changes: 45 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! CHECK-LABEL: omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

!CHECK-LABEL: @_QPreduction_ior
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@ior_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.ori %[[LPRV]], %[[Y_I]] : i32
!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_ior(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(ior:x)
do i=1, 100
x = ior(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
187 changes: 187 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py

! CHECK-LABEL: omp.reduction.declare @eqv_reduction : !fir.ref<!fir.logical<4>>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
! CHECK: %[[VAL_1:.*]] = arith.constant true
! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
! CHECK: %[[RES:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1
! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
! CHECK: }

! CHECK-LABEL: func.func @_QPsimple_reduction(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
! CHECK: %[[VAL_3:.*]] = arith.constant true
! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return

subroutine simple_reduction(y)
logical :: x, y(100)
x = .true.
!$omp parallel
!$omp do reduction(.eqv.:x)
do i=1, 100
x = x .eqv. y(i)
end do
!$omp end do
!$omp end parallel
end subroutine

! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
! CHECK: %[[VAL_3:.*]] = arith.constant true
! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return

subroutine simple_reduction_switch_order(y)
logical :: x, y(100)
x = .true.
!$omp parallel
!$omp do reduction(.eqv.:x)
do i=1, 100
x = y(i) .eqv. x
end do
!$omp end do
!$omp end parallel
end subroutine

! CHECK-LABEL: func.func @_QPmultiple_reductions(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
! CHECK: %[[VAL_5:.*]] = arith.constant true
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_7:.*]] = arith.constant true
! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_9:.*]] = arith.constant true
! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return

subroutine multiple_reductions(w)
logical :: x,y,z,w(100)
x = .true.
y = .true.
z = .true.
!$omp parallel
!$omp do reduction(.eqv.:x,y,z)
do i=1, 100
x = x .eqv. w(i)
y = y .eqv. w(i)
z = z .eqv. w(i)
end do
!$omp end do
!$omp end parallel
end subroutine
189 changes: 189 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py


! CHECK-LABEL: omp.reduction.declare @neqv_reduction : !fir.ref<!fir.logical<4>>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
! CHECK: %[[VAL_1:.*]] = arith.constant false
! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
! CHECK: %[[RES:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1
! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
! CHECK: }

! CHECK-LABEL: func.func @_QPsimple_reduction(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
! CHECK: %[[VAL_3:.*]] = arith.constant true
! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return

subroutine simple_reduction(y)
logical :: x, y(100)
x = .true.
!$omp parallel
!$omp do reduction(.neqv.:x)
do i=1, 100
x = x .neqv. y(i)
end do
!$omp end do
!$omp end parallel
end subroutine

! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
! CHECK: %[[VAL_3:.*]] = arith.constant true
! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return

subroutine simple_reduction_switch_order(y)
logical :: x, y(100)
x = .true.
!$omp parallel
!$omp do reduction(.neqv.:x)
do i=1, 100
x = y(i) .neqv. x
end do
!$omp end do
!$omp end parallel
end subroutine

! CHECK-LABEL: func.func @_QPmultiple_reductions(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
! CHECK: %[[VAL_5:.*]] = arith.constant true
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_7:.*]] = arith.constant true
! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_9:.*]] = arith.constant true
! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64
! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
! CHECK: %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return


subroutine multiple_reductions(w)
logical :: x,y,z,w(100)
x = .true.
y = .true.
z = .true.
!$omp parallel
!$omp do reduction(.neqv.:x,y,z)
do i=1, 100
x = x .neqv. w(i)
y = y .neqv. w(i)
z = z .neqv. w(i)
end do
!$omp end do
!$omp end parallel
end subroutine
90 changes: 90 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s

!CHECK: omp.reduction.declare @max_f_32_byref : !fir.ref<f32>
!CHECK-SAME: init {
!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -3.40282347E+38 : f32
!CHECK: %[[REF:.*]] = fir.alloca f32
!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
!CHECK: combiner
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
!CHECK: %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)

!CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
!CHECK: combiner
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK: %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)

!CHECK-LABEL: @_QPreduction_max_int
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@max_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.terminator

!CHECK-LABEL: @_QPreduction_max_real
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@max_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_max_int(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(max:x)
do i=1, 100
x = max(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine

subroutine reduction_max_real(y)
real :: x, y(:)
x = 0.0
!$omp parallel
!$omp do reduction(max:x)
do i=1, 100
x = max(y(i), x)
end do
!$omp end do
!$omp end parallel
print *, x

!$omp parallel
!$omp do reduction(max:x)
do i=1, 100
!CHECK-NOT: omp.reduction
if (y(i) .gt. x) x = y(i)
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
91 changes: 91 additions & 0 deletions flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s

!CHECK: omp.reduction.declare @min_f_32_byref : !fir.ref<f32>
!CHECK-SAME: init {
!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 3.40282347E+38 : f32
!CHECK: %[[REF:.*]] = fir.alloca f32
!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
!CHECK: combiner
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
!CHECK: %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)

!CHECK-LABEL: omp.reduction.declare @min_i_32_byref : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 2147483647 : i32
!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
!CHECK: combiner
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK: %[[RES:.*]] = arith.minsi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)

!CHECK-LABEL: @_QPreduction_min_int
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@min_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
!CHECK: omp.yield
!CHECK: omp.terminator

!CHECK-LABEL: @_QPreduction_min_real
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
!CHECK: omp.parallel
!CHECK: omp.wsloop byref reduction(@min_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_min_int(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(min:x)
do i=1, 100
x = min(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine

subroutine reduction_min_real(y)
real :: x, y(:)
x = 0.0
!$omp parallel
!$omp do reduction(min:x)
do i=1, 100
x = min(y(i), x)
end do
!$omp end do
!$omp end parallel
print *, x

!$omp parallel
!$omp do reduction(min:x)
do i=1, 100
!CHECK-NOT: omp.reduction
if (y(i) .gt. x) x = y(i)
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
385 changes: 385 additions & 0 deletions flang/test/Lower/OpenMP/default-clause-byref.f90

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
! Test that reductions and delayed privatization work properly togehter. Since
! both types of clauses add block arguments to the OpenMP region, we make sure
! that the block arguments are added in the proper order (reductions first and
! then delayed privatization.

! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s

subroutine red_and_delayed_private
integer :: red
integer :: prv

red = 0
prv = 10

!$omp parallel reduction(+:red) private(prv)
red = red + 1
prv = 20
!$omp end parallel
end subroutine

! CHECK-LABEL: omp.private {type = private}
! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {

! CHECK-LABEL: omp.reduction.declare
! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref<i32> init

! CHECK-LABEL: _QPred_and_delayed_private
! CHECK: omp.parallel
! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref<i32>)
! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref<i32>) {
125 changes: 125 additions & 0 deletions flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
! RUN: bbc -emit-hlfir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s

!CHECK-LABEL: omp.reduction.declare
!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
!CHECK-SAME: init {
!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: %[[REF:.*]] = fir.alloca f32
!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
!CHECK: } combiner {
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
!CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
!CHECK: }

!CHECK-LABEL: omp.reduction.declare
!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
!CHECK: %[[REF:.*]] = fir.alloca i32
!CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
!CHECK: } combiner {
!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
!CHECK: }

!CHECK-LABEL: func.func @_QPsimple_int_add
!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFsimple_int_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
!CHECK: hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[I_INCR:.*]] = arith.constant 1 : i32
!CHECK: %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]] : i32
!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine simple_int_add
integer :: i
i = 0

!$omp parallel reduction(+:i)
i = i + 1
!$omp end parallel

print *, i
end subroutine

!CHECK-LABEL: func.func @_QPsimple_real_add
!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
!CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFsimple_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
!CHECK: omp.parallel byref reduction(@[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<f32>) {
!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<f32>
!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
!CHECK: %[[RES:.+]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : f32, !fir.ref<f32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine simple_real_add
real :: r
r = 0.0

!$omp parallel reduction(+:r)
r = r + 1.5
!$omp end parallel

print *, r
end subroutine

!CHECK-LABEL: func.func @_QPint_real_add
!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFint_real_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
!CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFint_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
!CHECK: hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
!CHECK: hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[IPRV:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[RPRV:.+]] : !fir.ref<f32>) {
!CHECK: %[[IP_DECL:.+]]:2 = hlfir.declare %[[IPRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[RP_DECL:.+]]:2 = hlfir.declare %[[RPRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
!CHECK: %[[R_LPRV:.+]] = fir.load %[[RP_DECL]]#0 : !fir.ref<f32>
!CHECK: %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[R_LPRV]] {{.*}} : f32
!CHECK: hlfir.assign %[[RES1]] to %[[RP_DECL]]#0 : f32, !fir.ref<f32>
!CHECK: %[[I_LPRV:.+]] = fir.load %[[IP_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[I_INCR:.*]] = arith.constant 3 : i32
!CHECK: %[[RES0:.+]] = arith.addi %[[I_LPRV]], %[[I_INCR]] : i32
!CHECK: hlfir.assign %[[RES0]] to %[[IP_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
subroutine int_real_add
real :: r
integer :: i

r = 0.0
i = 0

!$omp parallel reduction(+:i,r)
r = 1.5 + r
i = i + 3
!$omp end parallel

print *, r
print *, i
end subroutine
44 changes: 44 additions & 0 deletions flang/test/Lower/OpenMP/parallel-reduction-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s

!CHECK: omp.reduction.declare @[[REDUCTION_DECLARE:[_a-z0-9]+]] : !fir.ref<i32>
!CHECK-SAME: init {
!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
!CHECK: %[[I0:[_a-z0-9]+]] = arith.constant 0 : i32
!CHECK: %[[REF:.*]] = fir.alloca i32
!CHECKL fir.store [[%I0]] to %[[REF]] : !fir.ref<i32>
!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
!CHECK: } combiner {
!CHECK: ^bb0(%[[C0:[_a-z0-9]+]]: !fir.ref<i32>, %[[C1:[_a-z0-9]+]]: !fir.ref<i32>):
!CHECK: %[[LD0:.*]] = fir.load %[[C0]] : !fir.ref<i32>
!CHECK: %[[LD1:.*]] = fir.load %[[C1]] : !fir.ref<i32>
!CHECK: %[[CR:[_a-z0-9]+]] = arith.addi %[[LD0]], %[[LD1]] : i32
!CHECK: fir.store %[[CR]] to %[[C0]] : !fir.ref<i32>
!CHECK: omp.yield(%[[C0]] : !fir.ref<i32>)
!CHECK: }
!CHECK: func.func @_QQmain() attributes {fir.bindc_name = "mn"} {
!CHECK: %[[RED_ACCUM_REF:[_a-z0-9]+]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
!CHECK: %[[RED_ACCUM_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[RED_ACCUM_REF]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[C0:[_a-z0-9]+]] = arith.constant 0 : i32
!CHECK: hlfir.assign %[[C0]] to %[[RED_ACCUM_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.parallel byref reduction(@[[REDUCTION_DECLARE]] %[[RED_ACCUM_DECL]]#0 -> %[[PRIVATE_RED:[a-z0-9]+]] : !fir.ref<i32>) {
!CHECK: %[[PRIVATE_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[PRIVATE_RED]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[C1:[_a-z0-9]+]] = arith.constant 1 : i32
!CHECK: hlfir.assign %[[C1]] to %[[PRIVATE_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: %[[RED_ACCUM_VAL:[_a-z0-9]+]] = fir.load %[[RED_ACCUM_DECL]]#0 : !fir.ref<i32>
!CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[RED_ACCUM_VAL]]) fastmath<contract> : (!fir.ref<i8>, i32) -> i1
!CHECK: return
!CHECK: }

program mn
integer :: i
i = 0

!$omp parallel reduction(+:i)
i = 1
!$omp end parallel

print *, i
end program
16 changes: 16 additions & 0 deletions flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
! Check that for parallel do, reduction is only processed for the loop

! RUN: bbc -fopenmp --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
! RUN: flang-new -fc1 -fopenmp -mmlir --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s

! CHECK: omp.parallel {
! CHECK: omp.wsloop byref reduction(@add_reduction_i_32
subroutine sb
integer :: x
x = 0
!$omp parallel do reduction(+:x)
do i=1,100
x = x + 1
end do
!$omp end parallel do
end subroutine
72 changes: 72 additions & 0 deletions flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s

! This tests primary goal is to check the promotion of
! non-CPTR arguments from use_device_ptr to
! use_device_addr works, without breaking any
! functionality

!CHECK: func.func @{{.*}}only_use_device_ptr()
!CHECK: omp.target_data use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine only_use_device_ptr
use iso_c_binding
integer, pointer, dimension(:) :: array
real, pointer :: pa(:)
type(c_ptr) :: cptr

!$omp target data use_device_ptr(pa, cptr, array)
!$omp end target data
end subroutine

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
!CHECK: omp.target_data use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
subroutine mix_use_device_ptr_and_addr
use iso_c_binding
integer, pointer, dimension(:) :: array
real, pointer :: pa(:)
type(c_ptr) :: cptr

!$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
!$omp end target data
end subroutine

!CHECK: func.func @{{.*}}only_use_device_addr()
!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine only_use_device_addr
use iso_c_binding
integer, pointer, dimension(:) :: array
real, pointer :: pa(:)
type(c_ptr) :: cptr

!$omp target data use_device_addr(pa, cptr, array)
!$omp end target data
end subroutine

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
subroutine mix_use_device_ptr_and_addr_and_map
use iso_c_binding
integer :: i, j
integer, pointer, dimension(:) :: array
real, pointer :: pa(:)
type(c_ptr) :: cptr

!$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
!$omp end target data
end subroutine

!CHECK: func.func @{{.*}}only_use_map()
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
subroutine only_use_map
use iso_c_binding
integer, pointer, dimension(:) :: array
real, pointer :: pa(:)
type(c_ptr) :: cptr

!$omp target data map(pa, cptr, array)
!$omp end target data
end subroutine
433 changes: 433 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py

! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

! CHECK-LABEL: func.func @_QPsimple_int_reduction()
! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
! CHECK: omp.parallel {
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
! CHECK: omp.yield
! CHECK: omp.terminator
! CHECK: return


subroutine simple_int_reduction
integer :: x
x = 0
!$omp parallel
!$omp do reduction(+:x)
do i=1, 100
x = x + i
end do
!$omp end do
!$omp end parallel
end subroutine
64 changes: 64 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py

! CHECK-LABEL: omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant -1 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

! CHECK-LABEL: func.func @_QPreduction_iand(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iandEi"}
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iandEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
! CHECK: omp.parallel {
! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@iand_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
! CHECK: omp.yield
! CHECK: omp.terminator



subroutine reduction_iand(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(iand:x)
do i=1, 100
x = iand(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
55 changes: 55 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! CHECK-LABEL: omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

!CHECK-LABEL: @_QPreduction_ieor
!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_ieorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_ieorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)


!CHECK: omp.parallel
!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
!CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64
!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
!CHECK: %[[LPRV:.+]] = fir.load %[[PRV_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
!CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.yield
!CHECK: omp.terminator

subroutine reduction_ieor(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(ieor:x)
do i=1, 100
x = ieor(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
64 changes: 64 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s

! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py

! CHECK-LABEL: omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
! CHECK-SAME: init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
! CHECK: %[[REF:.*]] = fir.alloca i32
! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)

! CHECK-LABEL: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK: %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
! CHECK: }

! CHECK-LABEL: func.func @_QPreduction_ior(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iorEi"}
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
! CHECK: omp.parallel
! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop byref reduction(@ior_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
! CHECK: omp.yield
! CHECK: omp.terminator



subroutine reduction_ior(y)
integer :: x, y(:)
x = 0
!$omp parallel
!$omp do reduction(ior:x)
do i=1, 100
x = ior(x, y(i))
end do
!$omp end do
!$omp end parallel
print *, x
end subroutine
Loading