diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 0e07e6fcaac9d..a6a19d41e99ce 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -258,7 +258,7 @@ def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> {
   let assemblyFormat = "$heapref attr-dict `:` qualified(type($heapref))";
 }
 
-def fir_LoadOp : fir_OneResultOp<"load", [MemoryEffects<[MemRead]>]> {
+def fir_LoadOp : fir_OneResultOp<"load", []> {
   let summary = "load a value from a memory reference";
   let description = [{
     Load a value from a memory reference into an ssa-value (virtual register).
@@ -285,7 +285,7 @@ def fir_LoadOp : fir_OneResultOp<"load", [MemoryEffects<[MemRead]>]> {
   }];
 }
 
-def fir_StoreOp : fir_Op<"store", [MemoryEffects<[MemWrite]>]> {
+def fir_StoreOp : fir_Op<"store", []> {
   let summary = "store an SSA-value to a memory location";
 
   let description = [{
@@ -693,7 +693,7 @@ def fir_UnreachableOp : fir_Op<"unreachable", [Terminator]> {
 
 }
 
-def fir_FirEndOp : fir_Op<"end", [Terminator]> {
+def fir_FirEndOp : fir_Op<"end", [Terminator, NoMemoryEffect]> {
   let summary = "the end instruction";
 
   let description = [{
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 924e868d32afd..0aed2778698d3 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -170,7 +170,7 @@ def fir_AssignOp : hlfir_Op<"assign", [MemoryEffects<[MemWrite]>]> {
 }
 
 def hlfir_DesignateOp : hlfir_Op<"designate", [AttrSizedOperandSegments,
-    DeclareOpInterfaceMethods<fir_FortranVariableOpInterface>]> {
+    DeclareOpInterfaceMethods<fir_FortranVariableOpInterface>, NoMemoryEffect]> {
   let summary = "Designate a Fortran variable";
 
   let description = [{
@@ -593,7 +593,7 @@ def hlfir_NoReassocOp : hlfir_Op<"no_reassoc", [NoMemoryEffect, SameOperandsAndR
   let assemblyFormat = "$val attr-dict `:` type($val)";
 }
 
-def hlfir_ElementalOp : hlfir_Op<"elemental", []> {
+def hlfir_ElementalOp : hlfir_Op<"elemental", [RecursiveMemoryEffects]> {
   let summary = "elemental expression";
   let description = [{
     Represent an elemental expression as a function of the indices.
@@ -652,7 +652,7 @@ def hlfir_ElementalOp : hlfir_Op<"elemental", []> {
 
 }
 
-def hlfir_YieldElementOp : hlfir_Op<"yield_element", [Terminator, HasParent<"ElementalOp">]> {
+def hlfir_YieldElementOp : hlfir_Op<"yield_element", [Terminator, HasParent<"ElementalOp">, Pure]> {
   let summary = "Yield the elemental value in an ElementalOp";
   let description = [{
     Yield the element value of the current elemental expression iteration
@@ -717,7 +717,7 @@ def hlfir_NullOp : hlfir_Op<"null", [NoMemoryEffect, fir_FortranVariableOpInterf
   }];
 }
 
-def hlfir_DestroyOp : hlfir_Op<"destroy", []> {
+def hlfir_DestroyOp : hlfir_Op<"destroy", [MemoryEffects<[MemFree]>]> {
   let summary = "Mark the last use of an hlfir.expr";
   let description = [{
     Mark the last use of an hlfir.expr. This will be the point at which the
@@ -913,6 +913,11 @@ def hlfir_OrderedAssignmentTreeOpInterface : OpInterface<"OrderedAssignmentTreeO
   let extraClassDeclaration = [{
     /// Interface verifier imlementation.
     mlir::LogicalResult verifyImpl();
+
+    mlir::Block* getSubTreeBlock() {
+      mlir::Region* region = getSubTreeRegion();
+      return region && !region->empty()? &region->front() : nullptr;
+    }
   }];
 
   let verify = [{
@@ -987,7 +992,8 @@ def hlfir_RegionAssignOp : hlfir_Op<"region_assign", [hlfir_OrderedAssignmentTre
 
 def hlfir_YieldOp : hlfir_Op<"yield", [Terminator, ParentOneOf<["RegionAssignOp",
     "ElementalAddrOp", "ForallOp", "ForallMaskOp", "WhereOp", "ElseWhereOp"]>,
-    SingleBlockImplicitTerminator<"fir::FirEndOp">]> {
+    SingleBlockImplicitTerminator<"fir::FirEndOp">, RecursivelySpeculatable,
+        RecursiveMemoryEffects]> {
 
   let summary = "Yield a value or variable inside a forall, where or region assignment";
 
@@ -1116,6 +1122,13 @@ def hlfir_ForallOp : hlfir_Op<"forall", [hlfir_OrderedAssignmentTreeOpInterface]
     A Fortran forall with several indices is represented as a nest
     of hlfir.forall.
 
+    All the regions contained in the hlfir.forall must only contain
+    code that is pure from a Fortran point of view, except for the
+    assignment effect of the hlfir.region_assign.
+    This matches Fortran constraint C1037, but requires the outer
+    controls to be evaluated outside of the hlfir.forall (these
+    controls may have side effects as per Fortran 2018 10.1.4 section).
+
     Example: FORALL(I=1:10) X(I) = FOO(I)
     ```
       hlfir.forall lb {
diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index e2cf8447dff47..493240928869e 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -28,6 +28,14 @@ def LowerHLFIRIntrinsics : Pass<"lower-hlfir-intrinsics", "::mlir::ModuleOp"> {
 def LowerHLFIROrderedAssignments : Pass<"lower-hlfir-ordered-assignments", "::mlir::ModuleOp"> {
   let summary = "Lower HLFIR ordered assignments like forall and where operations";
   let constructor = "hlfir::createLowerHLFIROrderedAssignmentsPass()";
+  let options = [
+    Option<"tryFusingAssignments", "fuse-assignments",
+           "bool", /*default=*/"false",
+           "Fuse Forall and Where assignments in the same loop nest when legal."
+           "It is not clear yet if this is always beneficial. It may be best to"
+           "leave this to later loop optimizations."
+           "Hence this is off by default.">
+  ];
 }
 
 def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics", "::mlir::func::FuncOp"> {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index df2e2bd68f4ed..f7e51dce11e97 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms
   ConvertToFIR.cpp
   LowerHLFIRIntrinsics.cpp
   LowerHLFIROrderedAssignments.cpp
+  ScheduleOrderedAssignments.cpp
   SimplifyHLFIRIntrinsics.cpp
 
   DEPENDS
@@ -13,6 +14,7 @@ add_flang_library(HLFIRTransforms
   ${dialect_libs}
 
   LINK_LIBS
+  FIRAnalysis
   FIRDialect
   FIRBuilder
   FIRDialectSupport
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 85e32ffa0ec2d..a0dbd46975cdc 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -12,37 +12,76 @@
 // where.
 // The pass lowers these operations to regular hlfir.assign, loops and, if
 // needed, introduces temporary storage to fulfill Fortran semantics.
+//
+// For each rewrite, an analysis builds an evaluation schedule, and then the
+// new code is generated by following the evaluation schedule.
 //===----------------------------------------------------------------------===//
 
+#include "ScheduleOrderedAssignments.h"
 #include "flang/Optimizer/Builder/Todo.h"
-#include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "llvm/Support/Debug.h"
 
 namespace hlfir {
 #define GEN_PASS_DEF_LOWERHLFIRORDEREDASSIGNMENTS
 #include "flang/Optimizer/HLFIR/Passes.h.inc"
 } // namespace hlfir
 
-using namespace mlir;
+#define DEBUG_TYPE "flang-ordered-assignment"
+
+// Test option only to test the scheduling part only (operations are erased
+// without codegen). The only goal is to allow printing and testing the debug
+// info.
+static llvm::cl::opt<bool> dbgScheduleOnly(
+    "flang-dbg-order-assignment-schedule-only",
+    llvm::cl::desc("Only run ordered assignment scheduling with no codegen"),
+    llvm::cl::init(false));
+
+/// Shared rewrite entry point for all the ordered assignment tree root
+/// operations. It calls the scheduler and then apply the schedule.
+static mlir::LogicalResult
+rewrite(hlfir::OrderedAssignmentTreeOpInterface &root,
+        bool tryFusingAssignments, mlir::PatternRewriter &rewriter) {
+  (void)hlfir::buildEvaluationSchedule(root, tryFusingAssignments);
+
+  LLVM_DEBUG(
+      /// Debug option to print the scheduling debug info without doing
+      /// any code generation. The operations are simply erased to avoid
+      /// failing and calling the rewrite patterns on nested operations.
+      /// The only purpose of this is to help testing scheduling without
+      /// having to test generated code.
+      if (dbgScheduleOnly) {
+        rewriter.eraseOp(root);
+        return mlir::success();
+      });
+  // TODO: lower to loops according to schedule.
+  return mlir::failure();
+}
 
 namespace {
 
 class ForallOpConversion : public mlir::OpRewritePattern<hlfir::ForallOp> {
 public:
-  explicit ForallOpConversion(mlir::MLIRContext *ctx) : OpRewritePattern{ctx} {}
+  explicit ForallOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments)
+      : OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {}
 
   mlir::LogicalResult
   matchAndRewrite(hlfir::ForallOp forallOp,
                   mlir::PatternRewriter &rewriter) const override {
-    TODO(forallOp.getLoc(), "FORALL construct or statement in HLFIR");
-    return mlir::failure();
+    auto root = mlir::cast<hlfir::OrderedAssignmentTreeOpInterface>(
+        forallOp.getOperation());
+    if (mlir::failed(::rewrite(root, tryFusingAssignments, rewriter)))
+      TODO(forallOp.getLoc(), "FORALL construct or statement in HLFIR");
+    return mlir::success();
   }
+  const bool tryFusingAssignments;
 };
 
 class WhereOpConversion : public mlir::OpRewritePattern<hlfir::WhereOp> {
 public:
-  explicit WhereOpConversion(mlir::MLIRContext *ctx) : OpRewritePattern{ctx} {}
+  explicit WhereOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments)
+      : OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {}
 
   mlir::LogicalResult
   matchAndRewrite(hlfir::WhereOp whereOp,
@@ -50,6 +89,7 @@ class WhereOpConversion : public mlir::OpRewritePattern<hlfir::WhereOp> {
     TODO(whereOp.getLoc(), "WHERE construct or statement in HLFIR");
     return mlir::failure();
   }
+  const bool tryFusingAssignments;
 };
 
 class RegionAssignConversion
@@ -84,9 +124,9 @@ class LowerHLFIROrderedAssignments
     // operations that can be the root of ordered assignments. The other
     // operations will be taken care of while rewriting these trees (they
     // cannot exist outside of these operations given their verifiers/traits).
-    patterns
-        .insert<ForallOpConversion, WhereOpConversion, RegionAssignConversion>(
-            context);
+    patterns.insert<ForallOpConversion, WhereOpConversion>(
+        context, this->tryFusingAssignments.getValue());
+    patterns.insert<RegionAssignConversion>(context);
     mlir::ConversionTarget target(*context);
     target.markUnknownOpDynamicallyLegal([](mlir::Operation *op) {
       return !mlir::isa<hlfir::OrderedAssignmentTreeOpInterface>(op);
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
new file mode 100644
index 0000000000000..d8c909d8387b0
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
@@ -0,0 +1,664 @@
+//===- ScheduleOrderedAssignments.cpp -- Ordered Assignment Scheduling ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleOrderedAssignments.h"
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "flang-ordered-assignment"
+
+//===----------------------------------------------------------------------===//
+// Scheduling logging utilities for debug and test
+//===----------------------------------------------------------------------===//
+
+/// Log RAW or WAW conflict.
+static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os,
+                                              mlir::Value writtenOrReadVarA,
+                                              mlir::Value writtenVarB);
+/// Log when an expression evaluation must be saved.
+static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os,
+                                                    unsigned runid,
+                                                    mlir::Region &yieldRegion,
+                                                    bool anyWrite);
+/// Log when an assignment is scheduled.
+static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation(
+    llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign);
+/// Log when starting to schedule an order assignment tree.
+static void LLVM_ATTRIBUTE_UNUSED logStartScheduling(
+    llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root);
+/// Log op if effect value is not known.
+static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue(
+    llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect,
+    mlir::Operation &op);
+
+//===----------------------------------------------------------------------===//
+// Scheduling Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Structure that is in charge of building the schedule. For each
+/// hlfir.region_assign inside an ordered assignment tree, it is walked through
+/// the parent operations and their "leaf" regions (that contain expression
+/// evaluations). The Scheduler analyze the memory effects of these regions
+/// against the effect of the current assignment, and if any conflict is found,
+/// it will create an action to save the value computed by the region before the
+/// assignment evaluation.
+class Scheduler {
+public:
+  Scheduler(bool tryFusingAssignments)
+      : tryFusingAssignments{tryFusingAssignments} {}
+
+  /// Start scheduling an assignment. Gather the write side effect from the
+  /// assignment.
+  void startSchedulingAssignment(hlfir::RegionAssignOp assign,
+                                 bool leafRegionsMayOnlyRead);
+
+  /// Start analysing a set of evaluation regions that can be evaluated in
+  /// any order between themselves according to Fortran rules (like the controls
+  /// of forall). The point of this is to avoid adding the side effects of
+  /// independent evaluations to a run that would save only one of the control.
+  void startIndependentEvaluationGroup() {
+    assert(independentEvaluationEffects.empty() &&
+           "previous group was not finished");
+  };
+
+  /// Analyze the memory effects of a region containing an expression
+  /// evaluation. If any conflict is found with the current assignment, or if
+  /// the expression has write effects (which is possible outside of forall),
+  /// create an action in the schedule to save the value in the schedule before
+  /// evaluating the current assignment. For expression with write effect,
+  /// saving them ensures they are evaluated only once. A region whose value
+  /// was saved in a previous run is considered to have no side effects with the
+  /// current assignment: the saved value will be used.
+  void saveEvaluationIfConflict(mlir::Region &yieldRegion,
+                                bool leafRegionsMayOnlyRead,
+                                bool yieldIsImplicitRead = true);
+
+  /// Finish evaluating a group of independent regions. The current independent
+  /// regions effects are added to the "parent" effect list since evaluating the
+  /// next analyzed region would require evaluating the current independent
+  /// regions.
+  void finishIndependentEvaluationGroup() {
+    parentEvaluationEffects.append(independentEvaluationEffects.begin(),
+                                   independentEvaluationEffects.end());
+    independentEvaluationEffects.clear();
+  }
+
+  /// After all the dependent evaluation regions have been analyzed, create the
+  /// action to evaluate the assignment that was being analyzed.
+  void finishSchedulingAssignment(hlfir::RegionAssignOp assign);
+
+  /// Once all the assignments have been analyzed and scheduled, return the
+  /// schedule. The scheduler object should not be used after this call.
+  hlfir::Schedule moveSchedule() { return std::move(schedule); }
+
+private:
+  /// Save a conflicting region that is evaluating an expression that is
+  /// controlling or masking the current assignment, or is evaluating the
+  /// RHS/LHS.
+  void
+  saveEvaluation(mlir::Region &yieldRegion,
+                 llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effects,
+                 bool anyWrite);
+
+  /// Can the current assignment be schedule with the previous run. This is
+  /// only possible if the assignment and all of its dependencies have no side
+  /// effects conflicting with the previous run.
+  bool canFuseAssignmentWithPreviousRun();
+
+  /// Memory effects of the assignments being lowered.
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance> assignEffects;
+  /// Memory effects of the unsaved evaluation region that are controlling or
+  /// masking the current assignments.
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance>
+      parentEvaluationEffects;
+  /// Same as parentEvaluationEffects, but for the current "leaf group" being
+  /// analyzed scheduled.
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance>
+      independentEvaluationEffects;
+
+  /// Were any region saved for the current assignment?
+  bool savedAnyRegionForCurrentAssignment = false;
+
+  // Schedule being built.
+  hlfir::Schedule schedule;
+  /// Leaf regions that have been saved so far.
+  llvm::SmallSet<mlir::Region *, 16> savedRegions;
+  /// Is schedule.back() a schedule that is only saving region with read
+  /// effects?
+  bool currentRunIsReadOnly = false;
+
+  /// Option to tell if the scheduler should try fusing to assignments in the
+  /// same loops.
+  const bool tryFusingAssignments;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Scheduling Implementation : gathering memory effects of nodes.
+//===----------------------------------------------------------------------===//
+
+/// Is \p var the result of a ForallIndexOp?
+/// Read effects to forall index can be ignored since forall
+/// indices cannot be assigned to.
+static bool isForallIndex(mlir::Value var) {
+  return var &&
+         mlir::isa_and_nonnull<hlfir::ForallIndexOp>(var.getDefiningOp());
+}
+
+/// Gather the memory effects of the operations contained in a region.
+/// \p mayOnlyRead can be given to exclude some potential write effects that
+/// cannot affect the current scheduling problem because it is known that the
+/// regions are evaluating pure expressions from a Fortran point of view. It is
+/// useful because low level IR in the region may contain operation that lacks
+/// side effect interface, or that are writing temporary variables that may be
+/// hard to identify as such (one would have to prove the write is "local" to
+/// the region even when the alloca may be outside of the region).
+static void gatherMemoryEffects(
+    mlir::Region &region, bool mayOnlyRead,
+    llvm::SmallVectorImpl<mlir::MemoryEffects::EffectInstance> &effects) {
+  /// This analysis is a simple walk of all the operations of the region that is
+  /// evaluating and yielding a value. This is a lot simpler and safer than
+  /// trying to walk back the SSA DAG from the yielded value. But if desired,
+  /// this could be changed.
+  for (mlir::Operation &op : region.getOps()) {
+    if (op.hasTrait<mlir::OpTrait::HasRecursiveMemoryEffects>()) {
+      for (mlir::Region &subRegion : op.getRegions())
+        gatherMemoryEffects(subRegion, mayOnlyRead, effects);
+      // In MLIR, RecursiveMemoryEffects can be combined with
+      // MemoryEffectOpInterface to describe extra effects on top of the
+      // effects of the nested operations.  However, the presence of
+      // RecursiveMemoryEffects and the absence of MemoryEffectOpInterface
+      // implies the operation has no other memory effects than the one of its
+      // nested operations.
+      if (!mlir::isa<mlir::MemoryEffectOpInterface>(op))
+        continue;
+    }
+    mlir::MemoryEffectOpInterface interface =
+        mlir::dyn_cast<mlir::MemoryEffectOpInterface>(op);
+    if (!interface) {
+      LLVM_DEBUG(llvm::dbgs() << "unknown effect: " << op << "\n";);
+      // There is no generic way to know what this operation is reading/writing
+      // to. Assume the worst. No need to continue analyzing the code any
+      // further.
+      effects.emplace_back(mlir::MemoryEffects::Read::get());
+      if (!mayOnlyRead)
+        effects.emplace_back(mlir::MemoryEffects::Write::get());
+      return;
+    }
+    // Collect read/write effects. Alloc/Free effects do not matter, they
+    // are either local to the evaluation region and can be repeated, or, if
+    // they are allocatable/pointer allocation/deallocation, they are conveyed
+    // via the write that is updating the descriptor/allocatable (and there
+    // cannot be any indirect allocatable/pointer allocation/deallocation if
+    // mayOnlyRead is set). When mayOnlyRead is set, local write effects are
+    // also ignored.
+    llvm::SmallVector<mlir::MemoryEffects::EffectInstance> opEffects;
+    interface.getEffects(opEffects);
+    for (auto &effect : opEffects)
+      if (!isForallIndex(effect.getValue())) {
+        if (mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect())) {
+          LLVM_DEBUG(logIfUnkownEffectValue(llvm::dbgs(), effect, op););
+          effects.push_back(effect);
+        } else if (!mayOnlyRead &&
+                   mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect())) {
+          LLVM_DEBUG(logIfUnkownEffectValue(llvm::dbgs(), effect, op););
+          effects.push_back(effect);
+        }
+      }
+  }
+}
+
+/// Return the entity yielded by a region, or a null value if the region
+/// is not terminated by a yield.
+static mlir::Value getYieldedEntity(mlir::Region &region) {
+  if (region.empty() || region.back().empty())
+    return nullptr;
+  if (auto yield = mlir::dyn_cast<hlfir::YieldOp>(region.back().back()))
+    return yield.getEntity();
+  return nullptr;
+}
+
+/// Gather the effect of an assignment. This is the implicit write to the LHS
+/// of an assignment. This also includes the effects of the user defined
+/// assignment, if any, but this does not include the effects of evaluating the
+/// RHS and LHS, which occur before the assignment effects in Fortran.
+static void gatherAssignEffects(
+    hlfir::RegionAssignOp regionAssign,
+    bool userDefAssignmentMayOnlyWriteToAssignedVariable,
+    llvm::SmallVectorImpl<mlir::MemoryEffects::EffectInstance> &assignEffects) {
+  mlir::Value assignedVar = getYieldedEntity(regionAssign.getLhsRegion());
+  if (!assignedVar)
+    TODO(regionAssign.getLoc(),
+         "assignment to vector subscripted entity in HLFIR");
+  assignEffects.emplace_back(mlir::MemoryEffects::Write::get(), assignedVar);
+
+  // TODO: gather the read/write effects of user defined assignments.
+  if (!regionAssign.getUserDefinedAssignment().empty())
+    TODO(regionAssign.getLoc(), "user defined assignments");
+}
+
+//===----------------------------------------------------------------------===//
+// Scheduling Implementation : finding conflicting memory effects.
+//===----------------------------------------------------------------------===//
+
+/// Follow addressing and declare like operation to the storage source.
+/// This allows using FIR alias analysis that otherwise does not know
+/// about those operations. This is correct, but ignoring the designate
+/// and declare info may yield false positive regarding aliasing (e.g,
+/// if it could be proved that the variable are different sub-part of
+/// an array).
+static mlir::Value getStorageSource(mlir::Value var) {
+  // TODO: define some kind of View interface for Fortran in FIR,
+  // and use it in the FIR alias analysis.
+  mlir::Value source = var;
+  while (auto *op = source.getDefiningOp()) {
+    if (auto designate = mlir::dyn_cast<hlfir::DesignateOp>(op)) {
+      source = designate.getMemref();
+    } else if (auto declare = mlir::dyn_cast<hlfir::DeclareOp>(op)) {
+      source = declare.getMemref();
+    } else {
+      break;
+    }
+  }
+  return source;
+}
+
+/// Could there be any read or write in effectsA on a variable written to in
+/// effectsB?
+static bool
+anyRAWorWAW(llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effectsA,
+            llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effectsB,
+            fir::AliasAnalysis &aliasAnalysis) {
+  for (const auto &effectB : effectsB)
+    if (mlir::isa<mlir::MemoryEffects::Write>(effectB.getEffect())) {
+      mlir::Value writtenVarB = effectB.getValue();
+      if (writtenVarB)
+        writtenVarB = getStorageSource(writtenVarB);
+      for (const auto &effectA : effectsA)
+        if (mlir::isa<mlir::MemoryEffects::Write, mlir::MemoryEffects::Read>(
+                effectA.getEffect())) {
+          mlir::Value writtenOrReadVarA = effectA.getValue();
+          if (!writtenVarB || !writtenOrReadVarA) {
+            LLVM_DEBUG(
+                logConflict(llvm::dbgs(), writtenOrReadVarA, writtenVarB););
+            return true; // unknown conflict.
+          }
+          writtenOrReadVarA = getStorageSource(writtenOrReadVarA);
+          if (!aliasAnalysis.alias(writtenOrReadVarA, writtenVarB).isNo()) {
+            LLVM_DEBUG(
+                logConflict(llvm::dbgs(), writtenOrReadVarA, writtenVarB););
+            return true;
+          }
+        }
+    }
+  return false;
+}
+
+/// Could there be any read or write in effectsA on a variable written to in
+/// effectsB, or any read in effectsB on a variable written to in effectsA?
+static bool
+conflict(llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effectsA,
+         llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effectsB) {
+  fir::AliasAnalysis aliasAnalysis;
+  // (RAW || WAW) || (WAR || WAW).
+  return anyRAWorWAW(effectsA, effectsB, aliasAnalysis) ||
+         anyRAWorWAW(effectsB, effectsA, aliasAnalysis);
+}
+
+/// Could there be any write effects in "effects"?
+static bool
+anyWrite(llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effects) {
+  return llvm::any_of(
+      effects, [](const mlir::MemoryEffects::EffectInstance &effect) {
+        return mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect());
+      });
+}
+
+//===----------------------------------------------------------------------===//
+// Scheduling Implementation : Scheduler class implementation
+//===----------------------------------------------------------------------===//
+
+void Scheduler::startSchedulingAssignment(hlfir::RegionAssignOp assign,
+                                          bool leafRegionsMayOnlyRead) {
+  gatherAssignEffects(assign, leafRegionsMayOnlyRead, assignEffects);
+}
+
+void Scheduler::saveEvaluationIfConflict(mlir::Region &yieldRegion,
+                                         bool leafRegionsMayOnlyRead,
+                                         bool yieldIsImplicitRead) {
+  // If the region evaluation was previously executed and saved, the saved
+  // value will be used when evaluating the current assignment and this has
+  // no effects in the current assignment evaluation.
+  if (savedRegions.contains(&yieldRegion))
+    return;
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance> effects;
+  gatherMemoryEffects(yieldRegion, leafRegionsMayOnlyRead, effects);
+  // Yield has no effect as such, but in the context of order assignments.
+  // The order assignments will usually read the yielded entity (except for
+  // the yielded assignments LHS that is only read if this is an assignment
+  // with a finalizer, or a user defined assignment where the LHS is
+  // intent(inout)).
+  if (yieldIsImplicitRead) {
+    mlir::Value entity = getYieldedEntity(yieldRegion);
+    if (entity && hlfir::isFortranVariableType(entity.getType()))
+      effects.emplace_back(mlir::MemoryEffects::Read::get(), entity);
+  }
+  if (!leafRegionsMayOnlyRead && anyWrite(effects)) {
+    // Region with write effect must be executed only once: save it the first
+    // time it is encountered.
+    saveEvaluation(yieldRegion, effects, /*anyWrite=*/true);
+  } else if (conflict(effects, assignEffects)) {
+    // Region that conflicts with the current assignments must be fully
+    // evaluated and saved before doing the assignment (Note that it may
+    // have already have been evaluated without saving it before, but this
+    // implies that it never conflicted with a prior assignment, so its value
+    // should be the same.)
+    saveEvaluation(yieldRegion, effects, /*anyWrite=*/false);
+  } else {
+    // Can be executed while doing the assignment.
+    independentEvaluationEffects.append(effects.begin(), effects.end());
+  }
+}
+
+void Scheduler::saveEvaluation(
+    mlir::Region &yieldRegion,
+    llvm::ArrayRef<mlir::MemoryEffects::EffectInstance> effects,
+    bool anyWrite) {
+  savedAnyRegionForCurrentAssignment = true;
+  if (anyWrite) {
+    // Create a new run just for regions with side effect. Further analysis
+    // could try to prove the effects do not conflict with the previous
+    // schedule.
+    schedule.emplace_back(hlfir::Run{});
+    currentRunIsReadOnly = false;
+  } else if (!currentRunIsReadOnly) {
+    // For now, do not try to fuse an evaluation with a previous
+    // run that contains any write effects. One could try to prove
+    // that "effects" do not conflict with the current run assignments.
+    schedule.emplace_back(hlfir::Run{});
+    currentRunIsReadOnly = true;
+  }
+  // Otherwise, save the yielded entity in the current run, that already
+  // saving other read only entities.
+  schedule.back().actions.emplace_back(hlfir::SaveEntity{&yieldRegion});
+  // The run to save the yielded entity will need to evaluate all the unsaved
+  // parent control or masks. Note that these effects may already be in the
+  // current run memoryEffects, but it is just easier always add them, even if
+  // this may add them again.
+  schedule.back().memoryEffects.append(parentEvaluationEffects.begin(),
+                                       parentEvaluationEffects.end());
+  schedule.back().memoryEffects.append(effects.begin(), effects.end());
+  savedRegions.insert(&yieldRegion);
+  LLVM_DEBUG(
+      logSaveEvaluation(llvm::dbgs(), schedule.size(), yieldRegion, anyWrite););
+}
+
+bool Scheduler::canFuseAssignmentWithPreviousRun() {
+  // If a region was saved for the current assignment, the previous
+  // run is already known to conflict. Skip the analysis.
+  if (savedAnyRegionForCurrentAssignment || schedule.empty())
+    return false;
+  auto &previousRunEffects = schedule.back().memoryEffects;
+  return !conflict(previousRunEffects, assignEffects) &&
+         !conflict(previousRunEffects, parentEvaluationEffects) &&
+         !conflict(previousRunEffects, independentEvaluationEffects);
+}
+
+void Scheduler::finishSchedulingAssignment(hlfir::RegionAssignOp assign) {
+  // For now, always schedule each assignment in its own run. They could
+  // be done as part of previous assignment runs if it is proven they have
+  // no conflicting effects.
+  currentRunIsReadOnly = false;
+  if (!tryFusingAssignments || !canFuseAssignmentWithPreviousRun())
+    schedule.emplace_back(hlfir::Run{});
+  schedule.back().actions.emplace_back(assign);
+  // TODO: when fusing, it would probably be best to filter the
+  // parentEvaluationEffects that already in the previous run effects (since
+  // assignments may share the same parents), otherwise, this can make the
+  // conflict() calls more and more expensive.
+  schedule.back().memoryEffects.append(parentEvaluationEffects.begin(),
+                                       parentEvaluationEffects.end());
+  schedule.back().memoryEffects.append(assignEffects.begin(),
+                                       assignEffects.end());
+  assignEffects.clear();
+  parentEvaluationEffects.clear();
+  independentEvaluationEffects.clear();
+  savedAnyRegionForCurrentAssignment = false;
+  LLVM_DEBUG(logAssignmentEvaluation(llvm::dbgs(), schedule.size(), assign));
+}
+
+//===----------------------------------------------------------------------===//
+// Scheduling Implementation : driving the Scheduler in the assignment tree.
+//===----------------------------------------------------------------------===//
+
+/// Gather the hlfir.region_assign nested directly and indirectly inside root in
+/// execution order.
+static void
+gatherAssignments(hlfir::OrderedAssignmentTreeOpInterface root,
+                  llvm::SmallVector<hlfir::RegionAssignOp> &assignments) {
+  llvm::SmallVector<mlir::Operation *> nodeStack{root.getOperation()};
+  while (!nodeStack.empty()) {
+    mlir::Operation *node = nodeStack.pop_back_val();
+    if (auto regionAssign = mlir::dyn_cast<hlfir::RegionAssignOp>(node)) {
+      assignments.push_back(regionAssign);
+      continue;
+    }
+    auto nodeIface =
+        mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(node);
+    if (nodeIface)
+      if (mlir::Block *block = nodeIface.getSubTreeBlock())
+        for (mlir::Operation &op : llvm::reverse(block->getOperations()))
+          nodeStack.push_back(&op);
+  }
+}
+
+/// Gather the parents of (not included) \p node in reverse execution order.
+static void gatherParents(
+    hlfir::OrderedAssignmentTreeOpInterface node,
+    llvm::SmallVectorImpl<hlfir::OrderedAssignmentTreeOpInterface> &parents) {
+  while (node) {
+    auto parent =
+        mlir::dyn_cast_or_null<hlfir::OrderedAssignmentTreeOpInterface>(
+            node->getParentOp());
+    if (parent && parent.getSubTreeRegion() == node->getParentRegion()) {
+      parents.push_back(parent);
+      node = parent;
+    } else {
+      break;
+    }
+  }
+}
+
+// Build the list of the parent nodes for this assignment. The list is built
+// from the closest parent until the ordered assignment tree root (this is the
+// revere of their execution order).
+static void gatherAssignmentParents(
+    hlfir::RegionAssignOp assign,
+    llvm::SmallVectorImpl<hlfir::OrderedAssignmentTreeOpInterface> &parents) {
+  gatherParents(mlir::cast<hlfir::OrderedAssignmentTreeOpInterface>(
+                    assign.getOperation()),
+                parents);
+}
+
+hlfir::Schedule
+hlfir::buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root,
+                               bool tryFusingAssignments) {
+  LLVM_DEBUG(logStartScheduling(llvm::dbgs(), root););
+  // The expressions inside an hlfir.forall must be pure (with the Fortran
+  // definition of pure). This is not a commitment that there are no operation
+  // with write effect in the regions: entities local to the region may still
+  // be written to (e.g., a temporary accumulator implementing SUM). This is
+  // a commitment that no write effect will affect the scheduling problem, and
+  // that all write effect caught by MLIR analysis can be ignored for the
+  // current problem.
+  const bool leafRegionsMayOnlyRead =
+      mlir::isa<hlfir::ForallOp>(root.getOperation());
+
+  // Loop through the assignments and schedule them.
+  Scheduler scheduler(tryFusingAssignments);
+  llvm::SmallVector<hlfir::RegionAssignOp> assignments;
+  gatherAssignments(root, assignments);
+  for (hlfir::RegionAssignOp assign : assignments) {
+    scheduler.startSchedulingAssignment(assign, leafRegionsMayOnlyRead);
+    // Go through the list of parents (not including the current
+    // hlfir.region_assign) in Fortran execution order so that any parent leaf
+    // region that must be saved is saved in order.
+    llvm::SmallVector<hlfir::OrderedAssignmentTreeOpInterface> parents;
+    gatherAssignmentParents(assign, parents);
+    for (hlfir::OrderedAssignmentTreeOpInterface parent :
+         llvm::reverse(parents)) {
+      scheduler.startIndependentEvaluationGroup();
+      llvm::SmallVector<mlir::Region *, 4> yieldRegions;
+      parent.getLeafRegions(yieldRegions);
+      for (mlir::Region *yieldRegion : yieldRegions)
+        scheduler.saveEvaluationIfConflict(*yieldRegion,
+                                           leafRegionsMayOnlyRead);
+      scheduler.finishIndependentEvaluationGroup();
+    }
+    // Look for conflicts between the RHS/LHS evaluation and the assignments.
+    // The LHS yield has no implicit read effect on the produced variable (the
+    // variable is not read before the assignment).
+    scheduler.startIndependentEvaluationGroup();
+    scheduler.saveEvaluationIfConflict(assign.getRhsRegion(),
+                                       leafRegionsMayOnlyRead);
+    scheduler.saveEvaluationIfConflict(assign.getLhsRegion(),
+                                       leafRegionsMayOnlyRead,
+                                       /*yieldIsImplicitRead=*/false);
+    scheduler.finishIndependentEvaluationGroup();
+    scheduler.finishSchedulingAssignment(assign);
+  }
+  return scheduler.moveSchedule();
+}
+
+mlir::Value hlfir::SaveEntity::getSavedValue() {
+  mlir::Value saved = getYieldedEntity(*yieldRegion);
+  assert(saved && "SaveEntity must contain region terminated by YieldOp");
+  return saved;
+}
+
+//===----------------------------------------------------------------------===//
+// Debug and test logging implementation
+//===----------------------------------------------------------------------===//
+
+static llvm::raw_ostream &printRegionId(llvm::raw_ostream &os,
+                                        mlir::Region &yieldRegion) {
+  mlir::Operation *parent = yieldRegion.getParentOp();
+  if (auto forall = mlir::dyn_cast<hlfir::ForallOp>(parent)) {
+    if (&forall.getLbRegion() == &yieldRegion)
+      os << "lb";
+    else if (&forall.getUbRegion() == &yieldRegion)
+      os << "ub";
+    else if (&forall.getStepRegion() == &yieldRegion)
+      os << "step";
+  } else if (auto assign = mlir::dyn_cast<hlfir::ForallMaskOp>(parent)) {
+    if (&assign.getMaskRegion() == &yieldRegion)
+      os << "mask";
+  } else if (auto assign = mlir::dyn_cast<hlfir::RegionAssignOp>(parent)) {
+    if (&assign.getRhsRegion() == &yieldRegion)
+      os << "rhs";
+    else if (&assign.getLhsRegion() == &yieldRegion)
+      os << "lhs";
+  } else {
+    os << "unknown";
+  }
+  return os;
+}
+
+static llvm::raw_ostream &
+printNodeIndexInBody(llvm::raw_ostream &os,
+                     hlfir::OrderedAssignmentTreeOpInterface node,
+                     hlfir::OrderedAssignmentTreeOpInterface parent) {
+  if (!parent || !parent.getSubTreeRegion())
+    return os;
+  mlir::Operation *nodeOp = node.getOperation();
+  unsigned index = 1;
+  for (mlir::Operation &op : parent.getSubTreeRegion()->getOps())
+    if (nodeOp == &op) {
+      return os << index;
+    } else if (nodeOp->getName() == op.getName()) {
+      ++index;
+    }
+  return os;
+}
+
+static llvm::raw_ostream &printNodePath(llvm::raw_ostream &os,
+                                        mlir::Operation *op) {
+  auto node =
+      mlir::dyn_cast_or_null<hlfir::OrderedAssignmentTreeOpInterface>(op);
+  if (!node) {
+    os << "unknown node";
+    return os;
+  }
+  llvm::SmallVector<hlfir::OrderedAssignmentTreeOpInterface> parents;
+  gatherParents(node, parents);
+  hlfir::OrderedAssignmentTreeOpInterface previousParent;
+  for (auto parent : llvm::reverse(parents)) {
+    os << parent->getName().stripDialect();
+    printNodeIndexInBody(os, parent, previousParent) << "/";
+    previousParent = parent;
+  }
+  os << node->getName().stripDialect();
+  return printNodeIndexInBody(os, node, previousParent);
+}
+
+static llvm::raw_ostream &printRegionPath(llvm::raw_ostream &os,
+                                          mlir::Region &yieldRegion) {
+  printNodePath(os, yieldRegion.getParentOp()) << "/";
+  return printRegionId(os, yieldRegion);
+}
+
+static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os,
+                                                    unsigned runid,
+                                                    mlir::Region &yieldRegion,
+                                                    bool anyWrite) {
+  os << "run " << runid << " save  " << (anyWrite ? "(w)" : "  ") << ": ";
+  printRegionPath(os, yieldRegion) << "\n";
+}
+
+static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation(
+    llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign) {
+  os << "run " << runid << " evaluate: ";
+  printNodePath(os, assign.getOperation()) << "\n";
+}
+
+static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os,
+                                              mlir::Value writtenOrReadVarA,
+                                              mlir::Value writtenVarB) {
+  auto printIfValue = [&](mlir::Value var) -> llvm::raw_ostream & {
+    if (!var)
+      return os << "<unknown>";
+    return os << var;
+  };
+  os << "conflict: R/W: ";
+  printIfValue(writtenOrReadVarA) << " W:";
+  printIfValue(writtenVarB) << "\n";
+}
+
+static void LLVM_ATTRIBUTE_UNUSED logStartScheduling(
+    llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root) {
+  os << "------------ scheduling ";
+  printNodePath(os, root.getOperation());
+  if (auto funcOp = root->getParentOfType<mlir::func::FuncOp>())
+    os << " in " << funcOp.getSymName() << " ";
+  os << "------------\n";
+}
+
+static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue(
+    llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect,
+    mlir::Operation &op) {
+  if (effect.getValue() != nullptr)
+    return;
+  os << "unknown effected value (";
+  os << (mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect()) ? "R" : "W");
+  os << "): " << op << "\n";
+}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h
new file mode 100644
index 0000000000000..2ed242edc973a
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h
@@ -0,0 +1,96 @@
+//===- ScheduleOrderedAssignments.h --- Assignment scheduling ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file defines a utility to analyze and schedule the evaluation of
+// of hlfir::OrderedAssignmentTreeOpInterface trees that represent Fortran
+// Forall, Where, user defined assignments and assignments to vector
+// subscripted entities.
+//===----------------------------------------------------------------------===//
+
+#ifndef OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDEREDASSIGNMENTS_H
+#define OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDEREDASSIGNMENTS_H
+
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+
+namespace hlfir {
+
+/// Structure to represent that the value yielded by some region
+/// must be fully evaluated and saved for all index values at
+/// a given point of the ordered assignment tree evaluation.
+/// All subsequent evaluation depending on the value yielded
+/// by this region will use the value that was saved.
+struct SaveEntity {
+  mlir::Region *yieldRegion;
+  /// Returns the hlfir.yield op argument.
+  mlir::Value getSavedValue();
+};
+
+/// A run is a list of actions required to evaluate an ordered assignment tree
+/// that can be done in the same loop nest.
+/// The actions can evaluate and saves element values into temporary or evaluate
+/// assignments.
+/// The evaluation of an action in a run will cause the evaluation of all the
+/// regions that yield entities required to implement the action, except if the
+/// region was saved in a previous run, in which case it will use the previously
+/// saved value.
+struct Run {
+  /// An action is either saving the values yielded by a region, or evaluating
+  /// the assignment part of an hlfir::RegionAssignOp.
+  using Action = std::variant<hlfir::RegionAssignOp, SaveEntity>;
+  llvm::SmallVector<Action> actions;
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance> memoryEffects;
+};
+
+/// List of runs to be executed in order to evaluate an order assignment tree.
+using Schedule = llvm::SmallVector<Run>;
+
+/// Example of schedules and run, and what they mean:
+///  Fortran: forall (i=i:10) x(i) = y(i)
+///
+///  hlfir.forall lb { hlfir.yield %c1} ub { hlfir.yield %c10} do {
+///   ^bb1(%i: index)
+///     hlfir.region_assign {
+///        %yi_addr = hlfir.designate %y(%i)
+///        %yi = fir.load %yi_addr
+///        hlfir.yield %yi
+///     } to {
+///        %xi = hlfir.designate %x(%i)
+///        hlfir.yield %xi
+///     }
+///  }
+///
+///  If the scheduling analysis cannot prove that %x and %y do not overlap, it
+///  will generate 2 runs for the schdule. The first containing
+///  SaveEntity{rhs_region}, and the second one containing the
+///  hlfir.region_assign.
+///
+///  The lowering of that schedule will have to:
+///  For the first run:
+///   1. create a temporary to contain all the %yi for all %i
+///   2. create a loop nest for the forall, evaluate the %yi and save them
+///   inside the loop, but do not evaluate the LHS or assignment.
+///   For the second run:
+///   3. create a loop nest again for the forall, evaluate the LHS, get the
+///   saved %yi, and evaluate %yi to %xi. After all runs:
+///   4. clean the temporary for the %yi.
+///
+/// If the scheduling analysis can prove %x and %y do not overlap, it will
+/// generate only one run with the hlfir.region_assign, which will be
+/// implemented as a single loop that evaluate %xi, %yi and does %xi = %yi in
+/// the loop body.
+
+/// Core function that analyzes an ordered assignment tree and builds a
+/// schedule for its evaluation.
+/// The main goal of the scheduler is to avoid creating temporary storage
+/// (required for SaveEntity). But it can optionally be asked to fuse Forall
+/// and Where assignments in the same loop nests when possible since it has the
+/// memory effects analysis at hand.
+Schedule buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root,
+                                 bool tryFusingAssignments);
+
+} // namespace hlfir
+#endif // OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDERASSIGNMENTS_H
diff --git a/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90 b/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90
new file mode 100644
index 0000000000000..2ebbebd235521
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90
@@ -0,0 +1,103 @@
+! Test optional fusing of forall assignments in the scheduling analysis
+! from lower-hlfir-ordered-assignments pass. Assignments are fused in the
+! same loop nest if they are given the same run id.
+
+! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=false})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix NOFUSE
+
+! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=true})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix FUSE
+
+! REQUIRES: asserts
+
+subroutine fusable_assign_easy(x, y, z)
+  integer :: x(:), y(:), z(:)
+  forall(i=1:10)
+    x(i) = 42
+    z(i) = 42
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPfusable_assign_easy ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+!FUSE-LABEL: ------------ scheduling forall in _QPfusable_assign_easy ------------
+!FUSE-NEXT: run 1 evaluate: forall/region_assign1
+!FUSE-NEXT: run 1 evaluate: forall/region_assign2
+
+subroutine fusable_assign(x, y, z)
+  integer :: x(:), y(:), z(:)
+  forall(i=1:10)
+    x(i) = y(i)
+    z(i) = y(11-i)
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPfusable_assign ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+!FUSE-LABEL: ------------ scheduling forall in _QPfusable_assign ------------
+!FUSE-NEXT: run 1 evaluate: forall/region_assign1
+!FUSE-NEXT: run 1 evaluate: forall/region_assign2
+
+subroutine unfusable_assign_1(x, y, z)
+  integer :: x(:), y(:), z(:)
+  forall(i=1:10)
+    x(i) = y(i)
+    z(i) = x(11-i)
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_1 ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_1 ------------
+!FUSE-NEXT: run 1 evaluate: forall/region_assign1
+!FUSE-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0
+!FUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+subroutine unfusable_assign_2(x, y)
+  integer :: x(:), y(:)
+  forall(i=1:10)
+    x(i) = y(i)
+    x(i+1) = y(i+1)
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_2 ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_2 ------------
+!FUSE-NEXT: run 1 evaluate: forall/region_assign1
+!FUSE-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0
+!FUSE-NEXT: run 2 evaluate: forall/region_assign2
+
+subroutine unfusable_assign_3(x, y, z)
+  integer :: x(:, :), y(:, :), z(:, :)
+  forall(i=1:10)
+    forall(j=1:z(i, i)) x(i, j) = y(i, j)
+    z(i, :) = y(i, :)
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_3 ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/forall1/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/region_assign1
+
+!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_3 ------------
+!FUSE-NEXT: run 1 evaluate: forall/forall1/region_assign1
+!FUSE-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 2 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 2
+!FUSE-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine unfusable_assign_4(x, y, z)
+  integer :: x(:, :), y(:, :), z(:, :)
+  forall(i=1:10)
+    x(i, :) = y(i, :)
+    forall(j=1:x(i, i)) z(i, j) = y(i, j)
+  end forall
+end subroutine
+!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_4 ------------
+!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1
+!NOFUSE-NEXT: run 2 evaluate: forall/forall1/region_assign1
+
+!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_4 ------------
+!FUSE-NEXT: run 1 evaluate: forall/region_assign1
+!FUSE-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0
+!FUSE-NEXT: run 2 evaluate: forall/forall1/region_assign1
diff --git a/flang/test/HLFIR/order_assignments/forall-scheduling.f90 b/flang/test/HLFIR/order_assignments/forall-scheduling.f90
new file mode 100644
index 0000000000000..f5cb53bb8284f
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/forall-scheduling.f90
@@ -0,0 +1,168 @@
+! Test forall scheduling analysis from lower-hlfir-ordered-assignments pass.
+! The printed output is done via LLVM_DEBUG, hence the "asserts" requirement.
+! This test test that conflicting actions are not scheduled to be evaluated
+! in the same loops (same run id).
+
+! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments)" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s
+! REQUIRES: asserts
+
+subroutine no_conflict(x)
+  real :: x(:)
+  forall(i=1:10) x(i) = i
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPno_conflict ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+
+subroutine rhs_lhs_overlap(x)
+  real :: x(:)
+  forall(i=1:10) x(i) = x(11-i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPrhs_lhs_overlap ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?xf32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?xf32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/rhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine no_rhs_lhs_overlap(x, y)
+  real :: x(:), y(:)
+  forall(i=1:10) x(i) = y(i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+
+subroutine no_rhs_lhs_overlap_2(x)
+  real :: x(:), y(10)
+  forall(i=1:10) x(i) = y(i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap_2 ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+
+subroutine no_rhs_lhs_overlap_3()
+  real :: x(10), y(10)
+  forall(i=1:10) x(i) = y(i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap_3 ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+
+subroutine array_expr_rhs_lhs_overlap(x)
+  real :: x(:, :)
+  forall(i=1:10) x(i, :) = x(:, i)*2
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QParray_expr_rhs_lhs_overlap ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xf32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xf32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/rhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine array_expr_no_rhs_lhs_overlap(x, y, z)
+  real :: x(:, :), y(:, :), z(:, :)
+  forall(i=1:10) x(i, :) = y(:, i) + z(i, :)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QParray_expr_no_rhs_lhs_overlap ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+
+subroutine rhs_lhs_overlap_2(x, y)
+  real, target :: x(:), y(:)
+  forall(i=1:10) x(i) = y(i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPrhs_lhs_overlap_2 ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?xf32>>' at index: 1 W:<block argument> of type '!fir.box<!fir.array<?xf32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/rhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine lhs_lhs_overlap(x)
+  integer :: x(10)
+  forall(i=1:10) x(x(i)) = i
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPlhs_lhs_overlap ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.ref<!fir.array<10xi32>>' at index: 0 W:<block argument> of type '!fir.ref<!fir.array<10xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/lhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine unknown_function_call(x)
+  interface
+    pure real function foo(x, i)
+      integer, intent(in) :: i
+      real, intent(in) :: x(10)
+    end function
+  end interface
+  real :: x(10)
+  forall(i=1:10) x(i) = foo(x, i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPunknown_function_call ------------
+!CHECK-NEXT: unknown effect: {{.*}} fir.call @_QPfoo
+!CHECK-NEXT: conflict: R/W: <unknown> W:<block argument> of type '!fir.ref<!fir.array<10xf32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/rhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine unknown_function_call2(x)
+  interface
+    pure real function foo2(i)
+      integer, value :: i
+    end function
+  end interface
+  ! foo2 may read x since it is a target, even if it is pure,
+  ! if the actual argument of x is a module variable accessible
+  ! to foo via host association.
+  real, target :: x(:)
+  forall(i=1:10) x(i) = foo2(i)
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPunknown_function_call2 ------------
+!CHECK-NEXT: unknown effect: {{.*}} fir.call @_QPfoo2(
+!CHECK-NEXT: conflict: R/W: <unknown> W:<block argument> of type '!fir.box<!fir.array<?xf32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/region_assign1/rhs
+!CHECK-NEXT: run 2 evaluate: forall/region_assign1
+
+subroutine forall_mask_conflict(x)
+  integer :: x(:)
+  forall(i=1:10, x(11-i)>0) x(i) = 42
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPforall_mask_conflict ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/forall_mask1/mask
+!CHECK-NEXT: run 2 evaluate: forall/forall_mask1/region_assign1
+
+subroutine forall_ub_conflict(x, y)
+  integer :: x(:, :)
+  forall(i=1:10)
+    forall(j=1:x(i,i))
+      x(i, j) = 42
+    end forall
+  end forall
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPforall_ub_conflict ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/forall1/ub
+!CHECK-NEXT: run 2 evaluate: forall/forall1/region_assign1
+
+subroutine sequential_assign(x, y)
+  integer :: x(:), y(:)
+  forall(i=1:10)
+    x(i) = y(i)
+    y(2*i) = x(i)
+  end forall
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPsequential_assign ------------
+!CHECK-NEXT: run 1 evaluate: forall/region_assign1
+!CHECK-NEXT: run 2 evaluate: forall/region_assign2
+
+subroutine loads_of_conlficts(x, y)
+  integer, target :: x(:, :), y(:, :)
+  forall(i=1:10)
+    forall (j=1:y(i,i)) x(x(i, j), j) = y(i, j)
+    forall (j=1:x(i,i), y(i,i)>0) y(x(i, j), j) = 0
+  end forall
+end subroutine
+!CHECK-LABEL: ------------ scheduling forall in _QPloads_of_conlficts ------------
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/forall1/ub
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/forall1/region_assign1/rhs
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0
+!CHECK-NEXT: run 1 save    : forall/forall1/region_assign1/lhs
+!CHECK-NEXT: run 2 evaluate: forall/forall1/region_assign1
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1
+!CHECK-NEXT: run 3 save    : forall/forall2/ub
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1
+!CHECK-NEXT: run 3 save    : forall/forall2/forall_mask1/mask
+!CHECK-NEXT: conflict: R/W: <block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 0 W:<block argument> of type '!fir.box<!fir.array<?x?xi32>>' at index: 1
+!CHECK-NEXT: run 3 save    : forall/forall2/forall_mask1/region_assign1/lhs
+!CHECK-NEXT: run 4 evaluate: forall/forall2/forall_mask1/region_assign1
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index dcb37043e6eb7..fcba73da2efe7 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -300,6 +300,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
   (void)mlir::applyPassManagerCLOptions(pm);
   if (passPipeline.hasAnyOccurrences()) {
     // run the command-line specified pipeline
+    hlfir::registerHLFIRPasses();
     (void)passPipeline.addToPipeline(pm, [&](const llvm::Twine &msg) {
       mlir::emitError(mlir::UnknownLoc::get(&ctx)) << msg;
       return mlir::failure();