diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index 4651f2bb8038e..f11abd96ffcc5 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -51,12 +51,26 @@ static constexpr llvm::StringRef getNormalizedLowerBoundAttrName() { struct DebuggingResource : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "DebuggingResource"; } + /// DebuggingResource is a unit resource allowing to keep order + /// of operations that affect debug information generation. + bool isUnitResource() const final { return true; } + /// This is a synthetic resource that is parallel-safe, + /// i.e. a presence of an operation that reads/writes this resource + /// does not prevent parallelization. + mlir::SideEffects::Resource::UnitProperties getUnitProperties() const final { + return mlir::SideEffects::Resource::UnitProperties::ParallelSafe; + } }; /// Model operations which read from/write to volatile memory struct VolatileMemoryResource : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "VolatileMemoryResource"; } + /// VolatileMemoryResource is a unit resource allowing to keep order + /// of volatile memory accesses relative to each other. + /// Note that it is not parallel-safe, i.e. it is not allowed + /// to parallelize code with volatile memory accesses. + bool isUnitResource() const final { return true; } }; class CoordinateIndicesAdaptor; diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 30f5dcc37b0f3..ec2393fe37c28 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3160,7 +3160,8 @@ def fir_GlobalLenOp : fir_Op<"global_len", []> { } def fir_UseStmtOp - : fir_Op<"use_stmt", [MemoryEffects<[MemWrite]>]> { + : fir_Op<"use_stmt", [MemoryEffects<[MemRead, + MemWrite]>]> { let summary = "Represents a Fortran USE statement"; let description = [{ This operation records a Fortran USE statement with its associated only/rename @@ -3393,7 +3394,8 @@ def fir_IsPresentOp : fir_SimpleOp<"is_present", [NoMemoryEffect]> { // is not used. def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments, - MemoryEffects<[MemAlloc]>, + MemoryEffects<[MemAlloc, + MemRead]>, DeclareOpInterfaceMethods< fir_FortranVariableStorageOpInterface>, fir_FortranObjectViewOpInterface]> { @@ -3424,6 +3426,14 @@ def fir_DeclareOp If these operands are absent, then the storage of the declared variable is only known to start where the memref operand points to. + A read from a synthetic DebuggingResource allows reordering + fir.declare operations relative to each other, but prevents + their reordering relative to the fir.dummy_scope operations + which define the procedure scope the variables are declared in. + The allocation effect on the DebuggingResource is used to prevent + classifying the operation as trivially dead when its result + is unused (we still need fir.declare to generate debug information). + Example: CHARACTER(n), OPTIONAL, TARGET :: c(10:, 20:) @@ -3515,8 +3525,9 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> { ]; } -def fir_DummyScopeOp : fir_Op<"dummy_scope", - [MemoryEffects<[MemWrite]>]> { +def fir_DummyScopeOp + : fir_Op<"dummy_scope", [MemoryEffects<[MemRead, + MemWrite]>]> { let summary = "Define a scope for dummy arguments"; let description = [{ @@ -3588,7 +3599,7 @@ def fir_DummyScopeOp : fir_Op<"dummy_scope", `test`, the two inlined instances of `inner` must use two different fir.dummy_scope operations for their fir.declare ops. This two distinct fir.dummy_scope must remain distinct during the optimizations. - This is guaranteed by the write memory effect on the DebuggingResource. + This is guaranteed by the read/write memory effect on the DebuggingResource. }]; let results = (outs fir_DummyScopeType); diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index e0d309ff73b30..8f6eeec7d9b54 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -37,7 +37,8 @@ class hlfir_Op traits> // don't want to remove it as dead code def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments, - MemoryEffects<[MemAlloc]>, + MemoryEffects<[MemAlloc, + MemRead]>, DeclareOpInterfaceMethods< fir_FortranVariableStorageOpInterface>, fir_FortranObjectViewOpInterface]> { @@ -78,6 +79,14 @@ def hlfir_DeclareOp result are known to be the same descriptors (the input descriptor is known to already have the correct attributes and lower bounds). + A read from a synthetic DebuggingResource allows reordering + fir.declare operations relative to each other, but prevents + their reordering relative to the fir.dummy_scope operations + which define the procedure scope the variables are declared in. + The allocation effect on the DebuggingResource is used to prevent + classifying the operation as trivially dead when its result + is unused (we still need hlfir.declare to generate debug information). + Example: CHARACTER(n) :: c(10:n, 20:n) diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index d77d82feddd84..831bea265fc6c 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -128,6 +128,7 @@ inline void registerMLIRPassesForFortranTools() { mlir::affine::registerAffineLoopInvariantCodeMotionPass(); mlir::affine::registerAffineLoopTilingPass(); mlir::affine::registerAffineDataCopyGenerationPass(); + mlir::affine::registerAffineParallelizePass(); mlir::registerMem2RegPass(); mlir::registerLowerAffinePass(); diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 4234a72192f31..dbc7668571e5e 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -568,6 +568,11 @@ ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) { if (isa(effect.getEffect())) continue; + // A unit resource cannot be addressed through any location, + // so we can ignore the unit resources. + if (effect.getResource() && effect.getResource()->isUnitResource()) + continue; + // Check for an alias between the effect and our memory location. AliasResult aliasResult = AliasResult::MayAlias; if (Value effectValue = effect.getValue()) diff --git a/flang/test/Transforms/affine-parallelize.fir b/flang/test/Transforms/affine-parallelize.fir new file mode 100644 index 0000000000000..6635b7beced3d --- /dev/null +++ b/flang/test/Transforms/affine-parallelize.fir @@ -0,0 +1,14 @@ +// RUN: fir-opt %s -affine-parallelize | FileCheck %s + +// Check that a read/write to a unit DebuggingResource +// by fir.dummy_scope does not prevent parallelization: +func.func @write_to_debugging_resource() { + %cst = arith.constant 0.0 : f32 + affine.for %i = 0 to 100 { + %m = memref.alloc() : memref<1xf32> + affine.store %cst, %m[0] : memref<1xf32> + %scope = fir.dummy_scope : !fir.dscope + } + // CHECK: affine.parallel + return +} diff --git a/flang/test/Transforms/cse.fir b/flang/test/Transforms/cse.fir new file mode 100644 index 0000000000000..d4bd638ae4811 --- /dev/null +++ b/flang/test/Transforms/cse.fir @@ -0,0 +1,23 @@ +// RUN: fir-opt -cse %s | FileCheck %s + +// Verify that fir.dummy_scope writing to the DebuggingResource +// does not prevent CSE for the two fir.load's: +// CHECK-LABEL: func.func @dummy_scope( +// CHECK-SAME: %[[ARG0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref, +// CHECK-SAME: %[[ARG1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref) -> (f32, f32) { +// CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[DECLARE_0:.*]] = fir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] {uniq_name = "x"} : (!fir.ref, !fir.dscope) -> !fir.ref +// CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_0]] : !fir.ref +// CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[DECLARE_1:.*]] = fir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_1]] {uniq_name = "y"} : (!fir.ref, !fir.dscope) -> !fir.ref +// CHECK: return %[[LOAD_0]], %[[LOAD_0]] : f32, f32 +// CHECK: } +func.func @dummy_scope(%arg0: !fir.ref, %arg1: !fir.ref) -> (f32, f32) { + %scope1 = fir.dummy_scope : !fir.dscope + %0 = fir.declare %arg0 dummy_scope %scope1 {uniq_name = "x"} : (!fir.ref, !fir.dscope) -> !fir.ref + %1 = fir.load %0 : !fir.ref + %scope2 = fir.dummy_scope : !fir.dscope + %2 = fir.declare %arg1 dummy_scope %scope2 {uniq_name = "y"} : (!fir.ref, !fir.dscope) -> !fir.ref + %3 = fir.load %0 : !fir.ref + return %1, %3 : f32, f32 +} diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index 84fbf2c3d936c..d9013235e1258 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -211,16 +211,38 @@ static constexpr StringLiteral getCombinedConstructsAttrName() { struct RuntimeCounters : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "AccRuntimeCounters"; } + /// RuntimeCounters is a unit resource implemented by OpenACC + /// runtime library. It allows keeping order of OpenACC data operations + /// relative to each other (because they cannot be reordered + /// without specific analysis taking OpenACC semantics into account). + bool isUnitResource() const final { return true; } + /// The runtime implementation of the runtime counters is parallel-safe. + mlir::SideEffects::Resource::UnitProperties getUnitProperties() const final { + return mlir::SideEffects::Resource::UnitProperties::ParallelSafe; + } }; struct ConstructResource : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "AccConstructResource"; } + /// ConstructResource is a synthetic unit resource that allows + /// keeping order of OpenACC constructs relative to each other. + /// The OpenACC dialect's construct operations usually read + /// and write this resource so that a construct is not deleted + /// if it is postdominated by another construct (and has no other + /// side effects otherwise). + bool isUnitResource() const final { return true; } + // TBD: define if this resource is parallel-safe. }; struct CurrentDeviceIdResource : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "AccCurrentDeviceIdResource"; } + /// CurrentDeviceIdResource is a unit resource implemented by OpenACC + /// runtime library. It allows keeping order of OpenACC operations + /// that may read and write the current device id. + bool isUnitResource() const final { return true; } + // TBD: define if this resource is parallel-safe. }; } // namespace acc diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td index ee249bb7c3c99..cd4c06f16ab3f 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td @@ -30,7 +30,8 @@ def OpenACC_KernelEnvironmentOp NoTerminator, DeclareOpInterfaceMethods, - MemoryEffects<[MemWrite, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "Decomposition of compute constructs to capture data mapping " "and asynchronous behavior information"; diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 0a146db767760..b36b278c88edb 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -1669,10 +1669,11 @@ def OpenACC_ParallelOp [AttrSizedOperandSegments, AutomaticAllocationScope, RecursiveMemoryEffects, DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, OffloadRegionOpInterface, - MemoryEffects<[MemWrite, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "parallel construct"; let description = [{ @@ -1871,10 +1872,11 @@ def OpenACC_SerialOp [AttrSizedOperandSegments, AutomaticAllocationScope, RecursiveMemoryEffects, DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, OffloadRegionOpInterface, - MemoryEffects<[MemWrite, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "serial construct"; let description = [{ @@ -2013,10 +2015,11 @@ def OpenACC_KernelsOp [AttrSizedOperandSegments, AutomaticAllocationScope, RecursiveMemoryEffects, DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, OffloadRegionOpInterface, - MemoryEffects<[MemWrite, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "kernels construct"; let description = [{ @@ -2195,9 +2198,10 @@ def OpenACC_KernelsOp def OpenACC_DataOp : OpenACC_Op< "data", [AttrSizedOperandSegments, RecursiveMemoryEffects, - DeclareOpInterfaceMethods, - MemoryEffects<[MemWrite, + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "data construct"; @@ -2323,10 +2327,12 @@ def OpenACC_TerminatorOp // 2.6.6 Enter Data Directive //===----------------------------------------------------------------------===// -def OpenACC_EnterDataOp : OpenACC_Op<"enter_data", - [AttrSizedOperandSegments, - MemoryEffects<[MemWrite, - MemRead]>]> { +def OpenACC_EnterDataOp + : OpenACC_Op<"enter_data", + [AttrSizedOperandSegments, + MemoryEffects<[MemRead, + MemWrite, + MemRead]>]> { let summary = "enter data operation"; let description = [{ @@ -2395,10 +2401,12 @@ def OpenACC_EnterDataOp : OpenACC_Op<"enter_data", // 2.6.6 Exit Data Directive //===----------------------------------------------------------------------===// -def OpenACC_ExitDataOp : OpenACC_Op<"exit_data", - [AttrSizedOperandSegments, - MemoryEffects<[MemWrite, - MemRead]>]> { +def OpenACC_ExitDataOp + : OpenACC_Op<"exit_data", + [AttrSizedOperandSegments, + MemoryEffects<[MemRead, + MemWrite, + MemRead]>]> { let summary = "exit data operation"; let description = [{ @@ -2472,9 +2480,10 @@ def OpenACC_ExitDataOp : OpenACC_Op<"exit_data", def OpenACC_HostDataOp : OpenACC_Op<"host_data", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods, - MemoryEffects<[MemWrite, + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, + MemoryEffects<[MemRead, + MemWrite, MemRead]>]> { let summary = "host_data construct"; @@ -2519,9 +2528,10 @@ def OpenACC_LoopOp RecursiveMemoryEffects, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, - MemoryEffects<[MemWrite]>]> { + DeclareOpInterfaceMethods< + RegionBranchOpInterface, ["getSuccessorInputs"]>, + MemoryEffects<[MemRead, + MemWrite]>]> { let summary = "loop construct"; let description = [{ @@ -3034,9 +3044,11 @@ def AtomicCaptureOp : OpenACC_Op<"atomic.capture", // 2.13 Declare Directive //===----------------------------------------------------------------------===// -def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", - [MemoryEffects<[MemWrite, - MemRead]>]> { +def OpenACC_DeclareEnterOp + : OpenACC_Op<"declare_enter", + [MemoryEffects<[MemRead, + MemWrite, + MemRead]>]> { let summary = "declare directive - entry to implicit data region"; let description = [{ @@ -3065,10 +3077,12 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", let hasVerifier = 1; } -def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", - [AttrSizedOperandSegments, - MemoryEffects<[MemWrite, - MemRead]>]> { +def OpenACC_DeclareExitOp + : OpenACC_Op<"declare_exit", + [AttrSizedOperandSegments, + MemoryEffects<[MemRead, + MemWrite, + MemRead]>]> { let summary = "declare directive - exit from implicit data region"; let description = [{ @@ -3169,9 +3183,11 @@ def OpenACC_GlobalDestructorOp : OpenACC_Op<"global_dtor", let hasVerifier = 0; } -def OpenACC_DeclareOp : OpenACC_Op<"declare", - [RecursiveMemoryEffects, - MemoryEffects<[MemWrite]>]> { +def OpenACC_DeclareOp + : OpenACC_Op< + "declare", [RecursiveMemoryEffects, + MemoryEffects<[MemRead, + MemWrite]>]> { let summary = "declare implicit region"; let description = [{ @@ -3501,10 +3517,12 @@ def OpenACC_SetOp : OpenACC_Op<"set", [AttrSizedOperandSegments, // 2.14.4. Update Directive //===----------------------------------------------------------------------===// -def OpenACC_UpdateOp : OpenACC_Op<"update", - [AttrSizedOperandSegments, - MemoryEffects<[MemWrite, - MemRead]>]> { +def OpenACC_UpdateOp + : OpenACC_Op<"update", + [AttrSizedOperandSegments, + MemoryEffects<[MemRead, + MemWrite, + MemRead]>]> { let summary = "update operation"; let description = [{ diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h index 9de20f0c69f1a..fa79bc2bb2908 100644 --- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h +++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h @@ -69,7 +69,46 @@ class Effect { /// The id of the derived effect class. TypeID id; }; +} // namespace SideEffects + +//===----------------------------------------------------------------------===// +// Operation Memory-Effect Modeling +//===----------------------------------------------------------------------===// + +namespace MemoryEffects { +/// This class represents the base class used for memory effects. +struct Effect : public SideEffects::Effect { + using SideEffects::Effect::Effect; + + /// A base class for memory effects that provides helper utilities. + template + using Base = SideEffects::Effect::Base; + + static bool classof(const SideEffects::Effect *effect); +}; + +/// The following effect indicates that the operation allocates from some +/// resource. An 'allocate' effect implies only allocation of the resource, and +/// not any visible mutation or dereference. +struct Allocate : public Effect::Base {}; + +/// The following effect indicates that the operation frees some resource that +/// has been allocated. An 'allocate' effect implies only de-allocation of the +/// resource, and not any visible allocation, mutation or dereference. +struct Free : public Effect::Base {}; +/// The following effect indicates that the operation reads from some resource. +/// A 'read' effect implies only dereferencing of the resource, and not any +/// visible mutation. +struct Read : public Effect::Base {}; + +/// The following effect indicates that the operation writes to some resource. A +/// 'write' effect implies only mutating a resource, and not any visible +/// dereference or read. +struct Write : public Effect::Base {}; +} // namespace MemoryEffects + +namespace SideEffects { //===----------------------------------------------------------------------===// // Resources //===----------------------------------------------------------------------===// @@ -110,10 +149,68 @@ class Resource { /// Return a string name of the resource. virtual StringRef getName() = 0; + /// Return true if the resource is a "unit resource", + /// which means it cannot be partitioned and can only + /// be read/written as a whole. As such, none of the values + /// can legally point to this resource, i.e. all the side + /// effects affecting such a resource are operation-wide. + /// + /// Unit resources may be used for establishing ordering + /// constraints on specific operations, which otherwise + /// may be hard to preserve during MLIR transformations. + /// + /// Two operations may be reordered relative to each other + /// if they both only read a unit resource. + /// An operation may be erased if it writes a unit resource + /// and it is postdominated by another operation that writes + /// a unit resource (without first reading it). + /// + /// A unit resource should be always allocated, so Allocate/Free + /// effects should be illegal for it. For the time being, + /// Allocate effect is allowed to represent a case like this: + /// a set of operations that may be reordered relative to each + /// other may use Read effect, but with solely Read effect + /// the might be considered trivially dead - the presence + /// of Allocate effect currently prevents their deletion. + /// TODO: it is questionable whether relying on Allocate + /// effect is enough (e.g. a pass may decide to still + /// delete such operations if it does not find an operation + /// with the corresponding Free effect). How do we represent + /// a set of reorderable operations that cannot be deleted + /// but do not block any optimizations due to their side effects. + virtual bool isUnitResource() const { return false; } + + /// A set of properties of a unit resource. + enum class UnitProperties { + /// No special properties. + None = 0, + /// A parallel-safe unit resource can be read/written in parallel by the + /// same static instance of an operation without invalidating the program. + /// It may be useful to make a "unit resource" also parallel, + /// e.g. if it is an artificial resource to prevent static + /// operations reordering without blocking dynamic reordering. + /// For example, Flang uses DebuggingResource to preserve + /// ordering between certain operations providing debug information, + /// but these operations do not affect execution and can be assumed + /// parallel safe. + + ParallelSafe = 1, + }; + + bool hasUnitProperties(uint64_t props) { + assert(isUnitResource() && "the resource must be unit"); + return props == (static_cast(getUnitProperties()) & props); + } + protected: Resource(TypeID id) : id(id) {} private: + /// Overrideable method that returns properties of the unit resource. + virtual UnitProperties getUnitProperties() const { + return UnitProperties::None; + } + /// The id of the derived resource class. TypeID id; }; @@ -140,11 +237,15 @@ class EffectInstance { public: EffectInstance(EffectT *effect, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), stage(0), - effectOnFullRegion(false) {} + effectOnFullRegion(false) { + verify(); + } EffectInstance(EffectT *effect, int stage, bool effectOnFullRegion, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), stage(stage), - effectOnFullRegion(effectOnFullRegion) {} + effectOnFullRegion(effectOnFullRegion) { + verify(); + } template ::value, @@ -152,7 +253,9 @@ class EffectInstance { EffectInstance(EffectT *effect, T value, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(value), stage(0), - effectOnFullRegion(false) {} + effectOnFullRegion(false) { + verify(); + } template ::value, @@ -160,25 +263,35 @@ class EffectInstance { EffectInstance(EffectT *effect, T value, int stage, bool effectOnFullRegion, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(value), stage(stage), - effectOnFullRegion(effectOnFullRegion) {} + effectOnFullRegion(effectOnFullRegion) { + verify(); + } EffectInstance(EffectT *effect, SymbolRefAttr symbol, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(symbol), stage(0), - effectOnFullRegion(false) {} + effectOnFullRegion(false) { + verify(); + } EffectInstance(EffectT *effect, SymbolRefAttr symbol, int stage, bool effectOnFullRegion, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(symbol), stage(stage), - effectOnFullRegion(effectOnFullRegion) {} + effectOnFullRegion(effectOnFullRegion) { + verify(); + } EffectInstance(EffectT *effect, Attribute parameters, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), parameters(parameters), stage(0), - effectOnFullRegion(false) {} + effectOnFullRegion(false) { + verify(); + } EffectInstance(EffectT *effect, Attribute parameters, int stage, bool effectOnFullRegion, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), parameters(parameters), - stage(stage), effectOnFullRegion(effectOnFullRegion) {} + stage(stage), effectOnFullRegion(effectOnFullRegion) { + verify(); + } template ::value, @@ -186,7 +299,9 @@ class EffectInstance { EffectInstance(EffectT *effect, T value, Attribute parameters, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(value), - parameters(parameters), stage(0), effectOnFullRegion(false) {} + parameters(parameters), stage(0), effectOnFullRegion(false) { + verify(); + } template ::value, @@ -196,17 +311,23 @@ class EffectInstance { Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(value), parameters(parameters), stage(stage), - effectOnFullRegion(effectOnFullRegion) {} + effectOnFullRegion(effectOnFullRegion) { + verify(); + } EffectInstance(EffectT *effect, SymbolRefAttr symbol, Attribute parameters, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(symbol), - parameters(parameters), stage(0), effectOnFullRegion(false) {} + parameters(parameters), stage(0), effectOnFullRegion(false) { + verify(); + } EffectInstance(EffectT *effect, SymbolRefAttr symbol, Attribute parameters, int stage, bool effectOnFullRegion, Resource *resource = DefaultResource::get()) : effect(effect), resource(resource), value(symbol), parameters(parameters), stage(stage), - effectOnFullRegion(effectOnFullRegion) {} + effectOnFullRegion(effectOnFullRegion) { + verify(); + } /// Return the effect being applied. EffectT *getEffect() const { return effect; } @@ -256,6 +377,22 @@ class EffectInstance { bool getEffectOnFullRegion() const { return effectOnFullRegion; } private: + /// Verify the limitations of the side effect (e.g. a unit resource + /// effect cannot be applied to any value). + void verify() const { + std::string message; + llvm::raw_string_ostream os(message); + if (resource && resource->isUnitResource()) { + if (effect && isa(effect)) + os << "Free effect is not allowed for unit resources"; + else if (value) + os << "Unit resources cannot be addressed via any location"; + } + + if (!message.empty()) + llvm::report_fatal_error(Twine(message)); + } + /// The specific effect being applied. EffectT *effect; @@ -280,6 +417,10 @@ class EffectInstance { }; } // namespace SideEffects +namespace MemoryEffects { +using EffectInstance = SideEffects::EffectInstance; +} // namespace MemoryEffects + namespace Speculation { /// This enum is returned from the `getSpeculatability` method in the /// `ConditionallySpeculatable` op interface. @@ -341,44 +482,6 @@ struct AlwaysSpeculatableImplTrait }; } // namespace OpTrait -//===----------------------------------------------------------------------===// -// Operation Memory-Effect Modeling -//===----------------------------------------------------------------------===// - -namespace MemoryEffects { -/// This class represents the base class used for memory effects. -struct Effect : public SideEffects::Effect { - using SideEffects::Effect::Effect; - - /// A base class for memory effects that provides helper utilities. - template - using Base = SideEffects::Effect::Base; - - static bool classof(const SideEffects::Effect *effect); -}; -using EffectInstance = SideEffects::EffectInstance; - -/// The following effect indicates that the operation allocates from some -/// resource. An 'allocate' effect implies only allocation of the resource, and -/// not any visible mutation or dereference. -struct Allocate : public Effect::Base {}; - -/// The following effect indicates that the operation frees some resource that -/// has been allocated. An 'allocate' effect implies only de-allocation of the -/// resource, and not any visible allocation, mutation or dereference. -struct Free : public Effect::Base {}; - -/// The following effect indicates that the operation reads from some resource. -/// A 'read' effect implies only dereferencing of the resource, and not any -/// visible mutation. -struct Read : public Effect::Base {}; - -/// The following effect indicates that the operation writes to some resource. A -/// 'write' effect implies only mutating a resource, and not any visible -/// dereference or read. -struct Write : public Effect::Base {}; -} // namespace MemoryEffects - //===----------------------------------------------------------------------===// // SideEffect Utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp index 5a4679ef31422..e7c99821861e8 100644 --- a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp +++ b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp @@ -513,6 +513,13 @@ ModRefResult LocalAliasAnalysis::getModRef(Operation *op, Value location) { continue; } + // A unit resource cannot be addressed through any location, + // so we can ignore the unit resources. + if (effect.getResource() && effect.getResource()->isUnitResource()) { + LDBG() << " Skipping a unit resource effect"; + continue; + } + // Check for an alias between the effect and our memory location. // TODO: Add support for checking an alias with a symbol reference. AliasResult aliasResult = AliasResult::MayAlias; diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp index 3d1a73417d1ea..e465175142447 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -134,6 +134,26 @@ static bool isLocallyDefined(Value v, Operation *enclosingOp) { return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp); } +static bool allEffectsAreParallelSafe(Operation *op) { + std::optional> effects = + getEffectsRecursively(op); + if (!effects) + return false; + for (const MemoryEffects::EffectInstance &effect : *effects) { + // Allocate is a parallel-safe effect. + if (isa(effect.getEffect())) + continue; + SideEffects::Resource *resource = effect.getResource(); + // Reads/write to parallel-safe unit resources are parallelizable. + if (resource && resource->isUnitResource() && + resource->hasUnitProperties(static_cast( + SideEffects::Resource::UnitProperties::ParallelSafe))) + continue; + return false; + } + return true; +} + bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) { // Any memref-typed iteration arguments are treated as serializing. if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred)) @@ -151,8 +171,7 @@ bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) { if (!isLocallyDefined(writeOp.getMemRef(), forOp)) loadAndStoreOps.push_back(op); } else if (!isa(op) && - !hasSingleEffect(op) && - !isMemoryEffectFree(op)) { + !allEffectsAreParallelSafe(op)) { // Alloc-like ops inside `forOp` are fine (they don't impact parallelism) // as long as they don't escape the loop (which has been checked above). return WalkResult::interrupt(); diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp index 8eaac308755fd..3da0389f49a7e 100644 --- a/mlir/lib/Transforms/CSE.cpp +++ b/mlir/lib/Transforms/CSE.cpp @@ -174,6 +174,20 @@ void CSEDriver::replaceUsesAndDelete(ScopedMapTy &knownValues, Operation *op, ++numCSE; } +static llvm::SmallVector +getAllUnitResources(Operation *op) { + llvm::SmallVector result; + std::optional> effects = + getEffectsRecursively(op); + if (!effects) + return {}; + for (const MemoryEffects::EffectInstance &effect : *effects) + if (effect.getResource() && effect.getResource()->isUnitResource()) + result.push_back(effect.getResource()); + + return result; +} + bool CSEDriver::hasOtherSideEffectingOpInBetween(Operation *fromOp, Operation *toOp) { assert(fromOp->getBlock() == toOp->getBlock()); @@ -196,6 +210,12 @@ bool CSEDriver::hasOtherSideEffectingOpInBetween(Operation *fromOp, return true; } } + + llvm::SmallVector fromResources = + getAllUnitResources(fromOp); + llvm::SmallVector toResources = + getAllUnitResources(toOp); + while (nextOp && nextOp != toOp) { std::optional> effects = getEffectsRecursively(nextOp); @@ -210,6 +230,27 @@ bool CSEDriver::hasOtherSideEffectingOpInBetween(Operation *fromOp, for (const MemoryEffects::EffectInstance &effect : *effects) { if (isa(effect.getEffect())) { + // If fromOp/toOp are reading from a resource that the current + // effect does not affect, it is not a clobbering write + // and we may ignore it. At the same time, for operations + // that have read/write effects we can usually deduce the location + // and not the resource. So in general, we should assume that + // the read/write may happen to/from the same resource. + // Unit resources are special because they cannot be addressed + // via any location and we know exactly if an operation accesses + // a unit resource, so we can always reason whether operations + // read/write to/from the same unit resource. + SideEffects::Resource *opResource = effect.getResource(); + if (opResource && opResource->isUnitResource() && + llvm::all_of(fromResources, + [&](SideEffects::Resource *fromResource) { + return fromResource != opResource; + }) && + llvm::all_of(toResources, [&](SideEffects::Resource *toResource) { + return toResource != opResource; + })) + continue; + result.first->second = {nextOp, MemoryEffects::Write::get()}; return true; } diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt index 9354a85d984c9..5448606cfd42e 100644 --- a/mlir/test/lib/Dialect/Test/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt @@ -95,6 +95,7 @@ mlir_target_link_libraries(MLIRTestDialect PUBLIC MLIRTransforms MLIRValueBoundsOpInterface MLIRBufferizationDialect + MLIRSideEffectInterfaces ) add_mlir_translation_library(MLIRTestFromLLVMIRTranslation