diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index ce87941d5382c..035035601e2f2 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -434,6 +434,11 @@ std::pair<hlfir::Entity, mlir::Value>
 createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
                    hlfir::Entity mold);
 
+// TODO: this does not support polymorphic molds
+hlfir::Entity createStackTempFromMold(mlir::Location loc,
+                                      fir::FirOpBuilder &builder,
+                                      hlfir::Entity mold);
+
 hlfir::EntityWithAttributes convertCharacterKind(mlir::Location loc,
                                                  fir::FirOpBuilder &builder,
                                                  hlfir::Entity scalarChar,
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 6dc467c4f69bc..54d9fc556973f 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -13,6 +13,7 @@
 #include "ReductionProcessor.h"
 
 #include "flang/Lower/AbstractConverter.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
@@ -90,10 +91,42 @@ std::string ReductionProcessor::getReductionName(llvm::StringRef name,
   if (isByRef)
     byrefAddition = "_byref";
 
-  return (llvm::Twine(name) +
-          (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
-          llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
-      .str();
+  if (fir::isa_trivial(ty))
+    return (llvm::Twine(name) +
+            (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
+            llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
+        .str();
+
+  // creates a name like reduction_i_64_box_ux4x3
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
+    // TODO: support for allocatable boxes:
+    // !fir.box<!fir.heap<!fir.array<...>>>
+    fir::SequenceType seqTy = fir::unwrapRefType(boxTy.getEleTy())
+                                  .dyn_cast_or_null<fir::SequenceType>();
+    if (!seqTy)
+      return {};
+
+    std::string prefix = getReductionName(
+        name, fir::unwrapSeqOrBoxedSeqType(ty), /*isByRef=*/false);
+    if (prefix.empty())
+      return {};
+    std::stringstream tyStr;
+    tyStr << prefix << "_box_";
+    bool first = true;
+    for (std::int64_t extent : seqTy.getShape()) {
+      if (first)
+        first = false;
+      else
+        tyStr << "x";
+      if (extent == seqTy.getUnknownExtent())
+        tyStr << 'u'; // I'm not sure that '?' is safe in symbol names
+      else
+        tyStr << extent;
+    }
+    return (tyStr.str() + byrefAddition).str();
+  }
+
+  return {};
 }
 
 std::string ReductionProcessor::getReductionName(
@@ -281,6 +314,148 @@ mlir::Value ReductionProcessor::createScalarCombiner(
   return reductionOp;
 }
 
+/// Create reduction combiner region for reduction variables which are boxed
+/// arrays
+static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
+                           ReductionProcessor::ReductionIdentifier redId,
+                           fir::BaseBoxType boxTy, mlir::Value lhs,
+                           mlir::Value rhs) {
+  fir::SequenceType seqTy =
+      mlir::dyn_cast_or_null<fir::SequenceType>(boxTy.getEleTy());
+  // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
+  if (!seqTy || seqTy.hasUnknownShape())
+    TODO(loc, "Unsupported boxed type in OpenMP reduction");
+
+  // load fir.ref<fir.box<...>>
+  mlir::Value lhsAddr = lhs;
+  lhs = builder.create<fir::LoadOp>(loc, lhs);
+  rhs = builder.create<fir::LoadOp>(loc, rhs);
+
+  const unsigned rank = seqTy.getDimension();
+  llvm::SmallVector<mlir::Value> extents;
+  extents.reserve(rank);
+  llvm::SmallVector<mlir::Value> lbAndExtents;
+  lbAndExtents.reserve(rank * 2);
+
+  // Get box lowerbounds and extents:
+  mlir::Type idxTy = builder.getIndexType();
+  for (unsigned i = 0; i < rank; ++i) {
+    // TODO: ideally we want to hoist box reads out of the critical section.
+    // We could do this by having box dimensions in block arguments like
+    // OpenACC does
+    mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+    auto dimInfo =
+        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, lhs, dim);
+    extents.push_back(dimInfo.getExtent());
+    lbAndExtents.push_back(dimInfo.getLowerBound());
+    lbAndExtents.push_back(dimInfo.getExtent());
+  }
+
+  auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
+  auto shapeShift =
+      builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
+
+  // Iterate over array elements, applying the equivalent scalar reduction:
+
+  // A hlfir::elemental here gets inlined with a temporary so create the
+  // loop nest directly.
+  // This function already controls all of the code in this region so we
+  // know this won't miss any opportuinties for clever elemental inlining
+  hlfir::LoopNest nest =
+      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
+  auto lhsEleAddr = builder.create<fir::ArrayCoorOp>(
+      loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
+      nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
+  auto rhsEleAddr = builder.create<fir::ArrayCoorOp>(
+      loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{},
+      nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
+  auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr);
+  auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr);
+  mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner(
+      builder, loc, redId, refTy, lhsEle, rhsEle);
+  builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr);
+
+  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.create<mlir::omp::YieldOp>(loc, lhsAddr);
+}
+
+// generate combiner region for reduction operations
+static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
+                        ReductionProcessor::ReductionIdentifier redId,
+                        mlir::Type ty, mlir::Value lhs, mlir::Value rhs,
+                        bool isByRef) {
+  ty = fir::unwrapRefType(ty);
+
+  if (fir::isa_trivial(ty)) {
+    mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs);
+    mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs);
+
+    mlir::Value result = ReductionProcessor::createScalarCombiner(
+        builder, loc, redId, ty, lhsLoaded, rhsLoaded);
+    if (isByRef) {
+      builder.create<fir::StoreOp>(loc, result, lhs);
+      builder.create<mlir::omp::YieldOp>(loc, lhs);
+    } else {
+      builder.create<mlir::omp::YieldOp>(loc, result);
+    }
+    return;
+  }
+  // all arrays should have been boxed
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) {
+    genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs);
+    return;
+  }
+
+  TODO(loc, "OpenMP genCombiner for unsupported reduction variable type");
+}
+
+static mlir::Value
+createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc,
+                          const ReductionProcessor::ReductionIdentifier redId,
+                          mlir::Type type, bool isByRef) {
+  mlir::Type ty = fir::unwrapRefType(type);
+  mlir::Value initValue = ReductionProcessor::getReductionInitValue(
+      loc, fir::unwrapSeqOrBoxedSeqType(ty), redId, builder);
+
+  if (fir::isa_trivial(ty)) {
+    if (isByRef) {
+      mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
+      builder.createStoreWithConvert(loc, initValue, alloca);
+      return alloca;
+    }
+    // by val
+    return initValue;
+  }
+
+  // all arrays are boxed
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
+    assert(isByRef && "passing arrays by value is unsupported");
+    // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
+    mlir::Type innerTy = fir::extractSequenceType(boxTy);
+    if (!mlir::isa<fir::SequenceType>(innerTy))
+      TODO(loc, "Unsupported boxed type for reduction");
+    // Create the private copy from the initial fir.box:
+    hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)};
+
+    // TODO: if the whole reduction is nested inside of a loop, this alloca
+    // could lead to a stack overflow (the memory is only freed at the end of
+    // the stack frame). The reduction declare operation needs a deallocation
+    // region to undo the init region.
+    hlfir::Entity temp = createStackTempFromMold(loc, builder, source);
+
+    // Put the temporary inside of a box:
+    hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp);
+    builder.create<hlfir::AssignOp>(loc, initValue, box);
+    mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty);
+    builder.create<fir::StoreOp>(loc, box, boxAlloca);
+    return boxAlloca;
+  }
+
+  TODO(loc, "createReductionInitRegion for unsupported type");
+}
+
 mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
     fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
     const ReductionIdentifier redId, mlir::Type type, mlir::Location loc,
@@ -288,6 +463,9 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
   mlir::OpBuilder::InsertionGuard guard(builder);
   mlir::ModuleOp module = builder.getModule();
 
+  if (reductionOpName.empty())
+    TODO(loc, "Reduction of some types is not supported");
+
   auto decl =
       module.lookupSymbol<mlir::omp::ReductionDeclareOp>(reductionOpName);
   if (decl)
@@ -304,14 +482,9 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
                       decl.getInitializerRegion().end(), {type}, {loc});
   builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());
 
-  mlir::Value init = getReductionInitValue(loc, type, redId, builder);
-  if (isByRef) {
-    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, valTy);
-    builder.createStoreWithConvert(loc, init, alloca);
-    builder.create<mlir::omp::YieldOp>(loc, alloca);
-  } else {
-    builder.create<mlir::omp::YieldOp>(loc, init);
-  }
+  mlir::Value init =
+      createReductionInitRegion(builder, loc, redId, type, isByRef);
+  builder.create<mlir::omp::YieldOp>(loc, init);
 
   builder.createBlock(&decl.getReductionRegion(),
                       decl.getReductionRegion().end(), {type, type},
@@ -320,19 +493,7 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
   builder.setInsertionPointToEnd(&decl.getReductionRegion().back());
   mlir::Value op1 = decl.getReductionRegion().front().getArgument(0);
   mlir::Value op2 = decl.getReductionRegion().front().getArgument(1);
-  mlir::Value outAddr = op1;
-
-  op1 = builder.loadIfRef(loc, op1);
-  op2 = builder.loadIfRef(loc, op2);
-
-  mlir::Value reductionOp =
-      createScalarCombiner(builder, loc, redId, type, op1, op2);
-  if (isByRef) {
-    builder.create<fir::StoreOp>(loc, reductionOp, outAddr);
-    builder.create<mlir::omp::YieldOp>(loc, outAddr);
-  } else {
-    builder.create<mlir::omp::YieldOp>(loc, reductionOp);
-  }
+  genCombiner(builder, loc, redId, type, op1, op2, isByRef);
 
   return decl;
 }
@@ -387,13 +548,33 @@ void ReductionProcessor::addReductionDecl(
 
   // initial pass to collect all reduction vars so we can figure out if this
   // should happen byref
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   for (const Object &object : objectList) {
     const Fortran::semantics::Symbol *symbol = object.id();
     if (reductionSymbols)
       reductionSymbols->push_back(symbol);
     mlir::Value symVal = converter.getSymbolAddress(*symbol);
-    if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+    auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+
+    // all arrays must be boxed so that we have convenient access to all the
+    // information needed to iterate over the array
+    if (mlir::isa<fir::SequenceType>(redType.getEleTy())) {
+      hlfir::Entity entity{symVal};
+      entity = genVariableBox(currentLocation, builder, entity);
+      mlir::Value box = entity.getBase();
+
+      // Always pass the box by reference so that the OpenMP dialect
+      // verifiers don't need to know anything about fir.box
+      auto alloca =
+          builder.create<fir::AllocaOp>(currentLocation, box.getType());
+      builder.create<fir::StoreOp>(currentLocation, box, alloca);
+
+      symVal = alloca;
+      redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+    } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
       symVal = declOp.getBase();
+    }
+
     reductionVars.push_back(symVal);
   }
   const bool isByRef = doReductionByRef(reductionVars);
@@ -418,24 +599,17 @@ void ReductionProcessor::addReductionDecl(
       break;
     }
 
-    for (const Object &object : objectList) {
-      const Fortran::semantics::Symbol *symbol = object.id();
-      mlir::Value symVal = converter.getSymbolAddress(*symbol);
-      if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-        symVal = declOp.getBase();
-      auto redType = symVal.getType().cast<fir::ReferenceType>();
+    for (mlir::Value symVal : reductionVars) {
+      auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
       if (redType.getEleTy().isa<fir::LogicalType>())
         decl = createReductionDecl(
             firOpBuilder,
             getReductionName(intrinsicOp, firOpBuilder.getI1Type(), isByRef),
             redId, redType, currentLocation, isByRef);
-      else if (redType.getEleTy().isIntOrIndexOrFloat()) {
+      else
         decl = createReductionDecl(
             firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
             redId, redType, currentLocation, isByRef);
-      } else {
-        TODO(currentLocation, "Reduction of some types is not supported");
-      }
       reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
           firOpBuilder.getContext(), decl.getSymName()));
     }
@@ -452,8 +626,8 @@ void ReductionProcessor::addReductionDecl(
         if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
           symVal = declOp.getBase();
         auto redType = symVal.getType().cast<fir::ReferenceType>();
-        assert(redType.getEleTy().isIntOrIndexOrFloat() &&
-               "Unsupported reduction type");
+        if (!redType.getEleTy().isIntOrIndexOrFloat())
+          TODO(currentLocation, "User Defined Reduction on non-trivial type");
         decl = createReductionDecl(
             firOpBuilder,
             getReductionName(getRealName(*reductionIntrinsic).ToString(),
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index ef6339407c135..f481d4cbd1c51 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -108,7 +108,7 @@ class ReductionProcessor {
   /// Creates an OpenMP reduction declaration and inserts it into the provided
   /// symbol table. The declaration has a constant initializer with the neutral
   /// value `initValue`, and the reduction combiner carried over from `reduce`.
-  /// TODO: Generalize this for non-integer types, add atomic region.
+  /// TODO: add atomic region.
   static mlir::omp::ReductionDeclareOp
   createReductionDecl(fir::FirOpBuilder &builder,
                       llvm::StringRef reductionOpName,
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index c7a550814e1d5..db638ceb40700 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -1111,6 +1111,35 @@ hlfir::createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
   return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc};
 }
 
+hlfir::Entity hlfir::createStackTempFromMold(mlir::Location loc,
+                                             fir::FirOpBuilder &builder,
+                                             hlfir::Entity mold) {
+  llvm::SmallVector<mlir::Value> lenParams;
+  hlfir::genLengthParameters(loc, builder, mold, lenParams);
+  llvm::StringRef tmpName{".tmp"};
+  mlir::Value alloc;
+  mlir::Value shape{};
+  fir::FortranVariableFlagsAttr declAttrs;
+
+  if (mold.isPolymorphic()) {
+    // genAllocatableApplyMold does heap allocation
+    TODO(loc, "createStackTempFromMold for polymorphic type");
+  } else if (mold.isArray()) {
+    mlir::Type sequenceType =
+        hlfir::getFortranElementOrSequenceType(mold.getType());
+    shape = hlfir::genShape(loc, builder, mold);
+    auto extents = hlfir::getIndexExtents(loc, builder, shape);
+    alloc =
+        builder.createTemporary(loc, sequenceType, tmpName, extents, lenParams);
+  } else {
+    alloc = builder.createTemporary(loc, mold.getFortranElementType(), tmpName,
+                                    /*shape=*/std::nullopt, lenParams);
+  }
+  auto declareOp = builder.create<hlfir::DeclareOp>(loc, alloc, tmpName, shape,
+                                                    lenParams, declAttrs);
+  return hlfir::Entity{declareOp.getBase()};
+}
+
 hlfir::EntityWithAttributes
 hlfir::convertCharacterKind(mlir::Location loc, fir::FirOpBuilder &builder,
                             hlfir::Entity scalarChar, int toKind) {
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-arrays.f90 b/flang/test/Lower/OpenMP/Todo/reduction-arrays.f90
deleted file mode 100644
index a21611faf248c..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-arrays.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction of some types is not supported
-subroutine reduction_array(y)
-  integer :: x(100), y(100,100)
-  !$omp parallel
-  !$omp do reduction(+:x)
-  do i=1, 100
-    x = x + y(:,i)
-  end do
-  !$omp end do
-  !$omp end parallel
-  print *, x
-end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array.f90
new file mode 100644
index 0000000000000..19c4586310851
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array.f90
@@ -0,0 +1,74 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+program reduce
+integer, dimension(3) :: i = 0
+
+!$omp parallel reduction(+:i)
+i(1) = 1
+i(2) = 2
+i(3) = 3
+!$omp end parallel
+
+print *,i
+end program
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_32_box_3_byref : !fir.ref<!fir.box<!fir.array<3xi32>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 3 : index
+! CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+! CHECK:           %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
+! CHECK:           fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:         } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QQmain()
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
+! CHECK:           %[[VAL_1:.*]] = arith.constant 3 : index
+! CHECK:           %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+! CHECK:           %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           omp.parallel byref reduction(@add_reduction_i_32_box_3_byref %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : index
+! CHECK:             %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_10]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_8]] to %[[VAL_11]] : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = arith.constant 2 : i32
+! CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_14:.*]] = arith.constant 2 : index
+! CHECK:             %[[VAL_15:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_14]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_12]] to %[[VAL_15]] : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_16:.*]] = arith.constant 3 : i32
+! CHECK:             %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_18:.*]] = arith.constant 3 : index
+! CHECK:             %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90
new file mode 100644
index 0000000000000..04c4045a6729e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90
@@ -0,0 +1,89 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+program reduce
+integer, dimension(3) :: i = 0
+
+!$omp parallel reduction(+:i)
+i(1) = i(1) + 1
+i(2) = i(2) + 2
+i(3) = i(3) + 3
+!$omp end parallel
+
+print *,i
+end program
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_32_box_3_byref : !fir.ref<!fir.box<!fir.array<3xi32>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 3 : index
+! CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+! CHECK:           %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
+! CHECK:           fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:         } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
+! CHECK:           %[[VAL_1:.*]] = arith.constant 3 : index
+! CHECK:           %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+! CHECK:           %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:           omp.parallel byref reduction(@add_reduction_i_32_box_3_byref %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
+! CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : index
+! CHECK:             %[[VAL_10:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_9]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             %[[VAL_14:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : index
+! CHECK:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_14]] (%[[VAL_15]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_13]] to %[[VAL_16]] : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_18:.*]] = arith.constant 2 : index
+! CHECK:             %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+! CHECK:             %[[VAL_21:.*]] = arith.constant 2 : i32
+! CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] : i32
+! CHECK:             %[[VAL_23:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_24:.*]] = arith.constant 2 : index
+! CHECK:             %[[VAL_25:.*]] = hlfir.designate %[[VAL_23]] (%[[VAL_24]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_26:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_27:.*]] = arith.constant 3 : index
+! CHECK:             %[[VAL_28:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_27]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<i32>
+! CHECK:             %[[VAL_30:.*]] = arith.constant 3 : i32
+! CHECK:             %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:             %[[VAL_32:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+! CHECK:             %[[VAL_33:.*]] = arith.constant 3 : index
+! CHECK:             %[[VAL_34:.*]] = hlfir.designate %[[VAL_32]] (%[[VAL_33]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_31]] to %[[VAL_34]] : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
new file mode 100644
index 0000000000000..4350ace820c26
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
@@ -0,0 +1,84 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+program reduce
+integer :: i = 0
+integer, dimension(2) :: r = 0
+
+!$omp parallel do reduction(+:r)
+do i=0,10
+  r(1) = i
+  r(2) = -i
+enddo
+!$omp end parallel do
+
+print *,r
+end program
+
+! CHECK-LABEL   omp.reduction.declare @add_reduction_i_32_box_2_byref : !fir.ref<!fir.box<!fir.array<2xi32>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.array<2xi32> {bindc_name = ".tmp"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+! CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+! CHECK:           %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<2xi32>>
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK:           fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
+
+! CHECK-LABEL   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<2xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<2xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<2xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:         }
+
+! CHECK-LABEL   func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<i32>
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref<!fir.array<2xi32>>
+! CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_6:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_8:.*]] = arith.constant 0 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_box_2_byref %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_18:.*]] = arith.constant 1 : index
+! CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = arith.constant 0 : i32
+! CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_24:.*]] = arith.constant 2 : index
+! CHECK:               %[[VAL_25:.*]] = hlfir.designate %[[VAL_23]] (%[[VAL_24]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
new file mode 100644
index 0000000000000..8543c58dec4d5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
@@ -0,0 +1,92 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+program reduce
+integer :: i = 0
+integer, dimension(2) :: r = 0
+
+!$omp parallel do reduction(+:r)
+do i=0,10
+  r(1) = r(1) + i
+  r(2) = r(2) - i
+enddo
+!$omp end parallel do
+
+print *,r
+end program
+
+! CHECK-LABEL   omp.reduction.declare @add_reduction_i_32_box_2_byref : !fir.ref<!fir.box<!fir.array<2xi32>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.array<2xi32> {bindc_name = ".tmp"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+! CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+! CHECK:           %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<2xi32>>
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK:           fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
+
+! CHECK-LABEL   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<2xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<2xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<2xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<i32>
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref<!fir.array<2xi32>>
+! CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_6:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_8:.*]] = arith.constant 0 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_box_2_byref %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_17:.*]] = arith.constant 1 : index
+! CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_23:.*]] = arith.constant 1 : index
+! CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_22]] (%[[VAL_23]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_24]] : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_26:.*]] = arith.constant 2 : index
+! CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:               %[[VAL_32:.*]] = arith.constant 2 : index
+! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_31]] (%[[VAL_32]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_33]] : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }