diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h index 7bebfc9a30064..4ddacd67f99c2 100644 --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h @@ -13,6 +13,8 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/DenseMap.h" + namespace mlir { /// Represents a slot in memory. This is generated by an allocating operation @@ -44,4 +46,69 @@ enum class DeletionKind { #include "mlir/Interfaces/MemorySlotOpInterfaces.h.inc" #include "mlir/Interfaces/MemorySlotTypeInterfaces.h.inc" +namespace mlir { + +/// An entry in a `PromotableAliasMap`: the memory slot defined by an aliaser +/// operation and its source operand. +struct PromotableSlotAliasInfo { + /// The slot defined by the aliaser (its `ptr` is the map key). + MemorySlot slot; + /// The aliaser operand whose value is the parent slot's pointer. + OpOperand *aliasedSlotPointerOperand; +}; + +/// Maps an alias slot pointer (a result of a `PromotableAliaserInterface` op) +/// reachable from a root slot to its `PromotableSlotAliasInfo`. +using PromotableAliasMap = + llvm::SmallDenseMap; + +/// Populates `aliasMap` with alias entries produced by `aliaser` for operands +/// that already alias `rootSlot`. This should be called during a forward slice +/// traversal from `rootSlot.ptr` to ensure topological ordering. +void populatePromotableAliasMap(PromotableAliaserInterface aliaser, + const MemorySlot &rootSlot, + PromotableAliasMap &aliasMap); + +/// Returns a `MemorySlot` for the operand of `op` that aliases `rootSlot.ptr` +/// (either the root itself or a known entry in `aliasMap`), providing the +/// alias's element type. Returns `nullopt` if no operand of `op` reaches +/// `rootSlot`. If `op` reaches `rootSlot` through multiple distinct aliases +/// (e.g., a memcpy between two aliases of the same root), the result is one +/// of them; use `referencesAtMostOneAliasOfSlot` to rule this out. +std::optional getOpAliasSlot(Operation *op, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap); + +/// Returns true if `op`'s operands reach `rootSlot` through at most one +/// distinct alias pointer (the root itself or a single `aliasMap` entry). +/// Multiple operands referencing the same alias are allowed. This is used to +/// guard `PromotableMemOpInterface` calls, which assume a single slot per +/// operation. +bool referencesAtMostOneAliasOfSlot(Operation *op, const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap); + +/// Walks the alias chain from `rootSlot` down to `aliasSlot`. Calls +/// `projectSlotValueToAliasValue` at each step to convert `slotValue` +/// (initially the root slot's value) to `aliasSlot`'s value. Returns a null +/// value if any projection fails. +Value convertSlotValueToAliasValue(Value slotValue, const MemorySlot &aliasSlot, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap, + OpBuilder &builder); + +/// Walks the alias chain from `aliasSlot` back up to `rootSlot`. Calls +/// `projectAliasValueToSlotValue` at each step to convert `aliasValue` +/// (initially `aliasSlot`'s value) to the root slot's value. +/// `rootReachingDef` is the current value of the root slot; it is projected +/// down to each intermediate slot to provide the reaching definition required +/// by partial sub-aliases. +Value convertAliasValueToSlotValue(Value aliasValue, + const MemorySlot &aliasSlot, + Value rootReachingDef, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap, + OpBuilder &builder); + +} // namespace mlir + #endif // MLIR_INTERFACES_MEMORYSLOTINTERFACES_H diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td index 801555fba4947..cfb914c798729 100644 --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td @@ -266,6 +266,89 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> { ]; } +def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> { + let description = [{ + Describes an operation that creates a transparent alias of a memory slot + accessed through one of its operands. Mem2Reg traverses chains of these + aliases to project slot values across them. This allows load and store + operations on the alias to be promoted as if they were directly accessing + the underlying slot. + + Since an alias remains a blocking use of the underlying slot pointer, the + operation must also implement either `PromotableOpInterface` or + `PromotableMemOpInterface`. This ensures that mem2reg can remove the alias + after the slot has been promoted. + }]; + let cppNamespace = "::mlir"; + + let verify = [{ + if (!::mlir::isa<::mlir::PromotableOpInterface, + ::mlir::PromotableMemOpInterface>($_op)) + return $_op->emitOpError( + "implements `PromotableAliaserInterface` but must also implement " + "`PromotableOpInterface` or `PromotableMemOpInterface`."); + return ::mlir::success(); + }]; + + let methods = [ + InterfaceMethod<[{ + Populates `newMemorySlots` with the memory slots this operation + exposes by aliasing `parentSlot` (accessed via + `aliasedSlotPointerOperand`). Each new slot's pointer must be a + result of this operation, and its element type may differ from the + parent's. Leave the vector empty if no alias is exposed. An operation + can expose multiple aliases for the same parent slot. + + `parentSlot` is provided so that aliasers using opaque pointers can + derive the new slot's element type from `parentSlot.elemType`. + + Exposing an alias requires implementing the two projection methods + below to bridge values between the parent and new slot element types. + If these projections cannot be performed, leave `newMemorySlots` empty. + + No IR mutation is allowed in this method. + }], + "void", + "getPromotableSlotAliases", + (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand, + "const ::mlir::MemorySlot &":$parentSlot, + "::llvm::SmallVectorImpl<::mlir::MemorySlot> &":$newMemorySlots) + >, + InterfaceMethod<[{ + Extracts the value of `aliasSlot` from `slotValue` (the value of + `parentSlot`). Mem2Reg invokes this method when a load on the new slot + requires the parent's value to be materialized with the new slot's + element type. + }], + "::mlir::Value", + "projectSlotValueToAliasValue", + (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand, + "const ::mlir::MemorySlot &":$parentSlot, + "const ::mlir::MemorySlot &":$aliasSlot, + "::mlir::Value":$slotValue, + "::mlir::OpBuilder &":$builder), [{}], + [{ return slotValue; }] + >, + InterfaceMethod<[{ + Reconstructs the value of `parentSlot` from `aliasValue` (a store to + `aliasSlot`) and `reachingDef` (the parent slot's value immediately + preceding the store). For full aliases, `reachingDef` can be ignored. + For partial sub-aliases, it allows the result to be constructed by + inserting `aliasValue` into `reachingDef`. + }], + "::mlir::Value", + "projectAliasValueToSlotValue", + (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand, + "const ::mlir::MemorySlot &":$parentSlot, + "const ::mlir::MemorySlot &":$aliasSlot, + "::mlir::Value":$aliasValue, + "::mlir::Value":$reachingDef, + "::mlir::OpBuilder &":$builder), [{}], + [{ return aliasValue; }] + >, + ]; +} + def PromotableRegionOpInterface : OpInterface<"PromotableRegionOpInterface"> { let description = [{ diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp index 2c9e23250e9ee..57c00aa031d37 100644 --- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp +++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp @@ -8,5 +8,157 @@ #include "mlir/Interfaces/MemorySlotInterfaces.h" +#include "llvm/ADT/SmallVector.h" + #include "mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc" #include "mlir/Interfaces/MemorySlotTypeInterfaces.cpp.inc" + +using namespace mlir; + +/// Returns the slot describing `aliasPtr`: `rootSlot` if it is the root, +/// the entry in `aliasMap` if it's a known alias, or `nullopt` otherwise. +static std::optional +getParentSlot(Value aliasPtr, const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap) { + if (aliasPtr == rootSlot.ptr) + return rootSlot; + auto it = aliasMap.find(aliasPtr); + if (it == aliasMap.end()) + return std::nullopt; + return it->second.slot; +} + +void mlir::populatePromotableAliasMap(PromotableAliaserInterface aliaser, + const MemorySlot &rootSlot, + PromotableAliasMap &aliasMap) { + for (OpOperand &operand : aliaser->getOpOperands()) { + std::optional parentSlot = + getParentSlot(operand.get(), rootSlot, aliasMap); + if (!parentSlot) + continue; + SmallVector newSlots; + aliaser.getPromotableSlotAliases(operand, *parentSlot, newSlots); + for (const MemorySlot &alias : newSlots) + aliasMap.try_emplace(alias.ptr, PromotableSlotAliasInfo{alias, &operand}); + } +} + +std::optional +mlir::getOpAliasSlot(Operation *op, const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap) { + for (Value operand : op->getOperands()) + if (std::optional slot = + getParentSlot(operand, rootSlot, aliasMap)) + return slot; + return std::nullopt; +} + +bool mlir::referencesAtMostOneAliasOfSlot(Operation *op, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap) { + Value uniqueAliasPtr; + for (Value operand : op->getOperands()) { + std::optional slot = getParentSlot(operand, rootSlot, aliasMap); + if (!slot) + continue; + if (uniqueAliasPtr && uniqueAliasPtr != slot->ptr) + return false; + uniqueAliasPtr = slot->ptr; + } + return true; +} + +namespace { +/// A step in an alias chain, from leaf to root. `parentSlot` is one step +/// closer to the root; `aliasSlot` is the slot exposed at this step. +struct ChainStep { + PromotableAliaserInterface aliaser; + OpOperand *aliasedSlotPointerOperand; + MemorySlot parentSlot; + MemorySlot aliasSlot; +}; +} // namespace + +/// Walks from `aliasSlot` back to `rootSlot` via `aliasMap`. Returns the +/// leaf-to-root chain, or `nullopt` if `aliasSlot` is not a known alias. +static std::optional> +buildAliasChain(const MemorySlot &aliasSlot, const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap) { + SmallVector chain; + Value current = aliasSlot.ptr; + while (current != rootSlot.ptr) { + auto it = aliasMap.find(current); + if (it == aliasMap.end()) + return std::nullopt; + OpOperand *operand = it->second.aliasedSlotPointerOperand; + auto aliaser = cast(operand->getOwner()); + std::optional parent = + getParentSlot(operand->get(), rootSlot, aliasMap); + if (!parent) + return std::nullopt; + chain.push_back(ChainStep{aliaser, operand, *parent, it->second.slot}); + current = operand->get(); + } + return chain; +} + +Value mlir::convertSlotValueToAliasValue(Value slotValue, + const MemorySlot &aliasSlot, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap, + OpBuilder &builder) { + std::optional> chain = + buildAliasChain(aliasSlot, rootSlot, aliasMap); + if (!chain) + return {}; + Value current = slotValue; + // Root-to-leaf walk: reverse the leaf-first chain. + for (ChainStep &step : llvm::reverse(*chain)) { + current = step.aliaser.projectSlotValueToAliasValue( + *step.aliasedSlotPointerOperand, step.parentSlot, step.aliasSlot, + current, builder); + if (!current) + return {}; + } + return current; +} + +Value mlir::convertAliasValueToSlotValue(Value aliasValue, + const MemorySlot &aliasSlot, + Value rootReachingDef, + const MemorySlot &rootSlot, + const PromotableAliasMap &aliasMap, + OpBuilder &builder) { + std::optional> chainOpt = + buildAliasChain(aliasSlot, rootSlot, aliasMap); + if (!chainOpt) + return {}; + SmallVector &chain = *chainOpt; + + // Project `rootReachingDef` down to each step's parent level so the + // per-step projector can use it (needed for partial sub-aliases; full + // aliases ignore it). The chain is leaf-first, so `chain.back()` is the + // root-most step (parent = rootSlot) and `chain.front()` is the leaf. + SmallVector perStepReachingDef(chain.size()); + Value current = rootReachingDef; + for (int i = static_cast(chain.size()) - 1; i >= 0; --i) { + perStepReachingDef[i] = current; + current = chain[i].aliaser.projectSlotValueToAliasValue( + *chain[i].aliasedSlotPointerOperand, chain[i].parentSlot, + chain[i].aliasSlot, current, builder); + if (!current) + return {}; + } + + // Walk leaf-to-root, combining `aliasValue` with the projected reaching + // definition at each step. + current = aliasValue; + for (size_t i = 0; i < chain.size(); ++i) { + current = chain[i].aliaser.projectAliasValueToSlotValue( + *chain[i].aliasedSlotPointerOperand, chain[i].parentSlot, + chain[i].aliasSlot, current, perStepReachingDef[i], builder); + if (!current) + return {}; + } + return current; +} diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp index 516002906bcf6..277457f574f55 100644 --- a/mlir/lib/Transforms/Mem2Reg.cpp +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -158,6 +158,9 @@ struct MemorySlotPromotionInfo { /// are guaranteed to be held by a PromotableRegionOpInterface, and to be /// nested within the parent region of the slot pointer. DenseMap regionsToPromote; + /// Transitive aliases of `slot.ptr` via `PromotableAliaserInterface`, + /// mapping alias values to their exposed slot and aliased operand. + PromotableAliasMap aliasMap; }; /// Computes information for basic slot promotion. This will check that direct @@ -184,9 +187,10 @@ class MemorySlotPromotionAnalyzer { /// Resulting blocking uses are grouped by region. /// This also ensures all the uses are within promotable regions, adding /// information about regions to be promoted to the `regionsToPromote` map. - LogicalResult computeBlockingUses( - RegionBlockingUsesMap &userToBlockingUses, - DenseMap ®ionsToPromote); + LogicalResult + computeBlockingUses(RegionBlockingUsesMap &userToBlockingUses, + DenseMap ®ionsToPromote, + PromotableAliasMap &aliasMap); /// Computes the points in the provided region where multiple re-definitions /// of the slot's value (stores) may conflict. @@ -344,7 +348,8 @@ Value MemorySlotPromoter::getOrCreateDefaultValue() { LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses( RegionBlockingUsesMap &userToBlockingUses, - DenseMap ®ionsToPromote) { + DenseMap ®ionsToPromote, + PromotableAliasMap &aliasMap) { // The promotion of an operation may require the promotion of further // operations (typically, removing operations that use an operation that must // delete itself). We thus need to start from the use of the slot pointer and @@ -389,6 +394,10 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses( if (it == blockingUsesMap.end()) continue; + // Populate the alias map for alias-exposing ops. + if (auto aliaser = dyn_cast(user)) + populatePromotableAliasMap(aliaser, slot, aliasMap); + SmallPtrSet &blockingUses = it->second; SmallVector newBlockingUses; @@ -400,14 +409,22 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses( return failure(); regionsWithDirectUse.insert(user->getParentRegion()); } else if (auto promotable = dyn_cast(user)) { - if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses, + // If the memop reaches the root slot through multiple distinct alias + // operands, promotion fails. `PromotableMemOpInterface` expects a + // single slot per call. Supporting multiple aliases would require + // extending the interface. + if (!referencesAtMostOneAliasOfSlot(user, slot, aliasMap)) + return failure(); + MemorySlot aliasSlot = + getOpAliasSlot(user, slot, aliasMap).value_or(slot); + if (!promotable.canUsesBeRemoved(aliasSlot, blockingUses, newBlockingUses, dataLayout)) return failure(); // Operations that interact with the slot's memory will be promoted using // a reaching definition. Therefore, the operation must be within a region // where the reaching definition can be computed. - if (promotable.storesTo(slot)) + if (promotable.storesTo(aliasSlot)) regionsWithDirectStore.insert(user->getParentRegion()); else regionsWithDirectUse.insert(user->getParentRegion()); @@ -509,18 +526,30 @@ MemorySlotPromotionAnalyzer::computeInfo() { // cannot find a way to resolve their blocking uses, we abort the promotion. // We also compute at this stage the regions that will be analyzed for // reaching definition information. - if (failed( - computeBlockingUses(info.userToBlockingUses, info.regionsToPromote))) + if (failed(computeBlockingUses(info.userToBlockingUses, info.regionsToPromote, + info.aliasMap))) return {}; // Compute the blocks containing a store for each region, either directly or // inherited from a nested region. As a side effect, `definingBlocks` contains // all regions with at least one store. + // + // Iterate over direct users of the slot pointer and all alias pointers in + // `info.aliasMap`. This assumes `PromotableMemOpInterface` operations storing + // to the slot use the slot pointer or its aliases directly. Dialects must + // implement `PromotableAliaserInterface` for views/aliasing. DenseMap> definingBlocks; - for (Operation *user : slot.ptr.getUsers()) - if (auto storeOp = dyn_cast(user)) - if (storeOp.storesTo(slot)) - definingBlocks[user->getParentRegion()].insert(user->getBlock()); + auto collectStoringBlocks = [&](Value ptr, const MemorySlot &ptrSlot) { + for (OpOperand &use : ptr.getUses()) { + Operation *user = use.getOwner(); + if (auto storeOp = dyn_cast(user)) + if (storeOp.storesTo(ptrSlot)) + definingBlocks[user->getParentRegion()].insert(user->getBlock()); + } + }; + collectStoringBlocks(slot.ptr, slot); + for (auto &[aliasPtr, aliasInfo] : info.aliasMap) + collectStoringBlocks(aliasPtr, aliasInfo.slot); for (auto &[region, regionInfo] : info.regionsToPromote) if (regionInfo.hasValueStores) definingBlocks[region->getParentRegion()].insert( @@ -551,18 +580,39 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) { if (info.userToBlockingUses[memOp->getParentRegion()].contains(memOp)) reachingDefs.insert({memOp, reachingDef}); - if (memOp.storesTo(slot)) { + MemorySlot aliasSlot = + getOpAliasSlot(memOp, slot, info.aliasMap).value_or(slot); + if (memOp.storesTo(aliasSlot)) { builder.setInsertionPointAfter(memOp); // To not expose default value creation to the interfaces, if we have // no reaching definition by now, we set it to the default value. // This is slightly too eager as `getStored` may not need it. if (!reachingDef) reachingDef = getOrCreateDefaultValue(); - Value stored = memOp.getStored(slot, builder, reachingDef, dataLayout); + Value reachingDefAtStore = reachingDef; + if (slot.ptr != aliasSlot.ptr) { + // The store sees the slot at `aliasSlot.elemType`; project the + // reaching definition (at root elem type) before handing it to + // `getStored`. + reachingDefAtStore = convertSlotValueToAliasValue( + reachingDef, aliasSlot, slot, info.aliasMap, builder); + assert(reachingDefAtStore && + "projectSlotValueToAliasValue contract violation"); + } + Value stored = + memOp.getStored(aliasSlot, builder, reachingDefAtStore, dataLayout); assert(stored && "a memory operation storing to a slot must provide a " "new definition of the slot"); - reachingDef = stored; + // `replacedValuesMap` keeps `stored` at `aliasSlot.elemType` for + // `visitReplacedValues`; the new reaching definition is tracked at + // the root slot's elem type, so project `stored` back. replacedValuesMap[memOp] = stored; + if (aliasSlot.ptr != slot.ptr) { + stored = convertAliasValueToSlotValue(stored, aliasSlot, reachingDef, + slot, info.aliasMap, builder); + assert(stored && "projectAliasValueToSlotValue contract violation"); + } + reachingDef = stored; } } @@ -764,11 +814,22 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) { reachingDef = getOrCreateDefaultValue(); builder.setInsertionPointAfter(toPromote); - if (toPromoteMemOp.removeBlockingUses(slot, blockingUsesMap[toPromote], - builder, reachingDef, - dataLayout) == DeletionKind::Delete) + MemorySlot aliasSlot = + getOpAliasSlot(toPromote, slot, info.aliasMap).value_or(slot); + Value reachingDefAtBlockingUse = reachingDef; + if (aliasSlot.ptr != slot.ptr) { + // Project the reaching definition to `aliasSlot.elemType` to match + // what `toPromoteMemOp` sees. + reachingDefAtBlockingUse = convertSlotValueToAliasValue( + reachingDef, aliasSlot, slot, info.aliasMap, builder); + assert(reachingDefAtBlockingUse && + "projectSlotValueToAliasValue contract violation"); + } + if (toPromoteMemOp.removeBlockingUses( + aliasSlot, blockingUsesMap[toPromote], builder, + reachingDefAtBlockingUse, dataLayout) == DeletionKind::Delete) toErase.insert(toPromote); - if (toPromoteMemOp.storesTo(slot)) + if (toPromoteMemOp.storesTo(aliasSlot)) if (Value replacedValue = replacedValuesMap[toPromoteMemOp]) replacedValues.push_back({toPromoteMemOp, replacedValue}); continue; diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir index 94b721cf28dcf..551f913b49313 100644 --- a/mlir/test/Transforms/mem2reg.mlir +++ b/mlir/test/Transforms/mem2reg.mlir @@ -181,3 +181,223 @@ func.func @poison_insertion_point(%val: f64) { ^bb3: return } + +// ----- + +// Verifies that mem2reg promotes a memory slot accessed through a transparent +// alias operation exposing itself via `getPromotableSlotAliases`. The +// conditional store on the alias in ^bb1 must be discovered as a defining +// block; otherwise, the merge point at ^bb2 would lack a block argument, +// silently dropping the conditional update. + +// CHECK-LABEL: func.func @promotable_through_alias +// CHECK-SAME: (%[[A:.*]]: i32, %[[COND:.*]]: i1) -> i32 +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_alias +// CHECK: %[[C42:.*]] = arith.constant 42 : i32 +// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[C42]] : i32) +// CHECK: ^[[BB1]]: +// CHECK: cf.br ^[[BB2]](%[[A]] : i32) +// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32): +// CHECK: return %[[MERGE]] : i32 +func.func @promotable_through_alias(%a: i32, %cond: i1) -> i32 { + %c42 = arith.constant 42 : i32 + %slot = test.multi_slot_alloca : () -> memref + %alias = test.transparent_alias %slot : (memref) -> memref + memref.store %c42, %alias[] : memref + cf.cond_br %cond, ^bb1, ^bb2 +^bb1: + memref.store %a, %alias[] : memref + cf.br ^bb2 +^bb2: + %v = memref.load %alias[] : memref + return %v : i32 +} + +// ----- + +// Type-changing transparent alias: the store and load access the slot as f32 +// while the underlying allocation is i32. mem2reg materializes an +// `unrealized_conversion_cast` at the store (f32 → i32 via `projectAliasValueToSlotValue`) +// and at the load (i32 → f32 via `projectSlotValueToAliasValue`). + +// CHECK-LABEL: func.func @promotable_through_cast_alias +// CHECK-SAME: (%[[A:.*]]: f32) -> f32 +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_cast_alias +// CHECK: %[[I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32 +// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[I32]] : i32 to f32 +// CHECK: return %{{.*}} : f32 +func.func @promotable_through_cast_alias(%a: f32) -> f32 { + %slot = test.multi_slot_alloca : () -> memref + %alias = test.transparent_cast_alias %slot : (memref) -> memref + memref.store %a, %alias[] : memref + %v = memref.load %alias[] : memref + return %v : f32 +} + +// ----- + +// Same as above with a conditional store across blocks. The merge-point +// block argument uses the root slot's element type (i32). Casts are inserted +// at the store sites (f32 → i32 via `projectAliasValueToSlotValue`) and the +// load site (i32 → f32 via `projectSlotValueToAliasValue`). + +// CHECK-LABEL: func.func @promotable_through_cast_alias_blocks +// CHECK-SAME: (%[[A:.*]]: f32, %[[COND:.*]]: i1) -> f32 +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_cast_alias +// CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[CST_I32:.*]] = builtin.unrealized_conversion_cast %[[CST]] : f32 to i32 +// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[CST_I32]] : i32) +// CHECK: ^[[BB1]]: +// CHECK: %[[A_I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32 +// CHECK: cf.br ^[[BB2]](%[[A_I32]] : i32) +// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32): +// CHECK: %[[MERGE_F32:.*]] = builtin.unrealized_conversion_cast %[[MERGE]] : i32 to f32 +// CHECK: return %[[MERGE_F32]] : f32 +func.func @promotable_through_cast_alias_blocks(%a: f32, %cond: i1) -> f32 { + %cst = arith.constant 1.0 : f32 + %slot = test.multi_slot_alloca : () -> memref + %alias = test.transparent_cast_alias %slot : (memref) -> memref + memref.store %cst, %alias[] : memref + cf.cond_br %cond, ^bb1, ^bb2 +^bb1: + memref.store %a, %alias[] : memref + cf.br ^bb2 +^bb2: + %v = memref.load %alias[] : memref + return %v : f32 +} + +// ----- + +// Regression test: the alias is defined in the parent region, but the store +// is in a nested region (`scf.if`). The new blocking use must be registered +// under the store's region; otherwise, `removeBlockingUses` fails the region +// invariant after `scf.if` rebuilds itself in `finalizePromotion`. + +// CHECK-LABEL: func.func @promotable_through_alias_across_regions +// CHECK-SAME: (%[[COND:.*]]: i1, %[[A:.*]]: i32) +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_alias +// CHECK-NOT: memref.store +// CHECK: scf.if %[[COND]] +func.func @promotable_through_alias_across_regions(%cond: i1, %a: i32) { + %slot = test.multi_slot_alloca : () -> memref + %alias = test.transparent_alias %slot : (memref) -> memref + scf.if %cond { + memref.store %a, %alias[] : memref + } + return +} + +// ----- + +// Mirror case: the alias is created *inside* `scf.if`, used to store an +// `f32` value through a type-changing alias, while the parent `i32` slot +// is read outside. The alias-to-slot projection (`f32` -> `i32`) must run +// *inside* the region (where the alias is alive) and the resulting `i32` +// value must be threaded out of `scf.if` via its `setupPromotion`/ +// `finalizePromotion` hooks to feed the parent load. + +// CHECK-LABEL: func.func @alias_inside_region_parent_read_outside +// CHECK-SAME: (%[[COND:.*]]: i1, %[[A:.*]]: f32, %[[INIT:.*]]: i32) -> i32 +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_cast_alias +// CHECK-NOT: memref.store +// CHECK-NOT: memref.load +// CHECK: %[[RES:.*]] = scf.if %[[COND]] -> (i32) +// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32 +// CHECK: scf.yield %[[CAST]] : i32 +// CHECK: } else { +// CHECK: scf.yield %[[INIT]] : i32 +// CHECK: } +// CHECK: return %[[RES]] : i32 +func.func @alias_inside_region_parent_read_outside(%cond: i1, %a: f32, + %init: i32) -> i32 { + %slot = test.multi_slot_alloca : () -> memref + memref.store %init, %slot[] : memref + scf.if %cond { + %alias = test.transparent_cast_alias %slot : (memref) -> memref + memref.store %a, %alias[] : memref + } + %v = memref.load %slot[] : memref + return %v : i32 +} + +// ----- + +// Chained aliasers: an identity alias is aliased by a type-changing alias. +// The alias-map walk must follow both hops and project through each step. + +// CHECK-LABEL: func.func @promotable_through_chained_aliases +// CHECK-SAME: (%[[A:.*]]: f32) -> f32 +// CHECK-NOT: test.multi_slot_alloca +// CHECK-NOT: test.transparent_alias +// CHECK-NOT: test.transparent_cast_alias +// CHECK: %[[I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32 +// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[I32]] : i32 to f32 +// CHECK: return %{{.*}} : f32 +func.func @promotable_through_chained_aliases(%a: f32) -> f32 { + %slot = test.multi_slot_alloca : () -> memref + %alias1 = test.transparent_alias %slot : (memref) -> memref + %alias2 = test.transparent_cast_alias %alias1 : (memref) -> memref + memref.store %a, %alias2[] : memref + %v = memref.load %alias2[] : memref + return %v : f32 +} + +// ----- + +// Dual-alias case: a single aliaser op exposes two simultaneously usable +// aliases of the same parent slot (signless i32) at different signednesses +// (signed and unsigned i32). `getPromotableSlotAliases` populates two +// entries for that operand, both of which end up in the alias map. The +// store reaches the slot through the signed alias and the load reaches it +// through the unsigned alias. + +// CHECK-LABEL: func.func @promotable_through_dual_alias +// CHECK-SAME: (%[[A:.*]]: si32) -> ui32 +// CHECK-NOT: memref.alloca +// CHECK-NOT: test.transparent_dual_alias +// CHECK-NOT: memref.store +// CHECK-NOT: memref.load +// CHECK: %[[A_I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : si32 to i32 +// CHECK: %[[A_UI32:.*]] = builtin.unrealized_conversion_cast %[[A_I32]] : i32 to ui32 +// CHECK: return %[[A_UI32]] : ui32 +func.func @promotable_through_dual_alias(%a: si32) -> ui32 { + %slot = memref.alloca() : memref + %signed, %unsigned = test.transparent_dual_alias %slot + : (memref) -> (memref, memref) + memref.store %a, %signed[] : memref + %v = memref.load %unsigned[] : memref + return %v : ui32 +} + +// ----- + +// Partial aliasing: the parent slot stores a `complex` (a 2-tuple of +// `f32`), and the alias exposes one component as a `memref`. +// The alias-to-slot projection reconstructs the parent value by consuming the +// current reaching definition (modelled as a 2-input `unrealized_conversion_cast`: +// new sub-value + parent reaching def). The slot-to-alias projection extracts +// a component (1-input cast). + +// CHECK-LABEL: func.func @promotable_through_partial_alias +// CHECK-SAME: (%[[X:.*]]: f32) -> f32 +// CHECK-NOT: memref.alloca +// CHECK-NOT: test.partial_alias +// CHECK-NOT: memref.store +// CHECK-NOT: memref.load +// CHECK: %[[POISON:.*]] = ub.poison : complex +// CHECK: %[[NEW:.*]] = builtin.unrealized_conversion_cast %[[X]], %[[POISON]] : f32, complex to complex +// CHECK: %[[R:.*]] = builtin.unrealized_conversion_cast %[[NEW]] : complex to f32 +// CHECK: return %[[R]] : f32 +func.func @promotable_through_partial_alias(%x: f32) -> f32 { + %slot = memref.alloca() : memref> + %alias = test.partial_alias %slot : (memref>) -> memref + memref.store %x, %alias[] : memref + %v = memref.load %alias[] : memref + return %v : f32 +} diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp index a3ff397ac26db..8315bd7cef783 100644 --- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp +++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp @@ -1769,6 +1769,187 @@ TestMultiSlotAlloca::handleDestructuringComplete( return createNewMultiAllocaWithoutSlot(slot, builder, *this); } +//===----------------------------------------------------------------------===// +// TestTransparentAlias +//===----------------------------------------------------------------------===// + +void TestTransparentAlias::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/, + SmallVectorImpl &newMemorySlots) { + if (aliasedSlotPointerOperand.get() != getSource()) + return; + Type elemType = cast(getResult().getType()).getElementType(); + newMemorySlots.push_back(MemorySlot{getResult(), elemType}); +} + +bool TestTransparentAlias::canUsesBeRemoved( + const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + for (OpOperand &use : getResult().getUses()) + newBlockingUses.push_back(&use); + return true; +} + +DeletionKind TestTransparentAlias::removeBlockingUses( + const SmallPtrSetImpl &blockingUses, OpBuilder &builder) { + return DeletionKind::Delete; +} + +//===----------------------------------------------------------------------===// +// TestTransparentCastAlias +//===----------------------------------------------------------------------===// + +void TestTransparentCastAlias::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/, + SmallVectorImpl &newMemorySlots) { + if (aliasedSlotPointerOperand.get() != getSource()) + return; + Type elemType = cast(getResult().getType()).getElementType(); + newMemorySlots.push_back(MemorySlot{getResult(), elemType}); +} + +bool TestTransparentCastAlias::canUsesBeRemoved( + const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + for (OpOperand &use : getResult().getUses()) + newBlockingUses.push_back(&use); + return true; +} + +DeletionKind TestTransparentCastAlias::removeBlockingUses( + const SmallPtrSetImpl &blockingUses, OpBuilder &builder) { + return DeletionKind::Delete; +} + +Value TestTransparentCastAlias::projectSlotValueToAliasValue( + OpOperand & /*aliasedSlotPointerOperand*/, + const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot, + Value slotValue, OpBuilder &builder) { + if (slotValue.getType() == aliasSlot.elemType) + return slotValue; + return UnrealizedConversionCastOp::create(builder, getLoc(), + aliasSlot.elemType, slotValue) + .getResult(0); +} + +Value TestTransparentCastAlias::projectAliasValueToSlotValue( + OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot, + const MemorySlot & /*aliasSlot*/, Value aliasValue, Value /*reachingDef*/, + OpBuilder &builder) { + if (aliasValue.getType() == parentSlot.elemType) + return aliasValue; + return UnrealizedConversionCastOp::create(builder, getLoc(), + parentSlot.elemType, aliasValue) + .getResult(0); +} + +//===----------------------------------------------------------------------===// +// TestTransparentDualAlias +//===----------------------------------------------------------------------===// + +void TestTransparentDualAlias::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/, + SmallVectorImpl &newMemorySlots) { + if (aliasedSlotPointerOperand.get() != getSource()) + return; + // Expose both results as aliases of the same parent at their own + // signedness (same bit width as the parent's signless i32). + newMemorySlots.push_back(MemorySlot{ + getResultSigned(), + cast(getResultSigned().getType()).getElementType()}); + newMemorySlots.push_back(MemorySlot{ + getResultUnsigned(), + cast(getResultUnsigned().getType()).getElementType()}); +} + +bool TestTransparentDualAlias::canUsesBeRemoved( + const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + for (Value result : getResults()) + for (OpOperand &use : result.getUses()) + newBlockingUses.push_back(&use); + return true; +} + +DeletionKind TestTransparentDualAlias::removeBlockingUses( + const SmallPtrSetImpl &blockingUses, OpBuilder &builder) { + return DeletionKind::Delete; +} + +Value TestTransparentDualAlias::projectSlotValueToAliasValue( + OpOperand & /*aliasedSlotPointerOperand*/, + const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot, + Value slotValue, OpBuilder &builder) { + if (slotValue.getType() == aliasSlot.elemType) + return slotValue; + return UnrealizedConversionCastOp::create(builder, getLoc(), + aliasSlot.elemType, slotValue) + .getResult(0); +} + +Value TestTransparentDualAlias::projectAliasValueToSlotValue( + OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot, + const MemorySlot & /*aliasSlot*/, Value aliasValue, Value /*reachingDef*/, + OpBuilder &builder) { + if (aliasValue.getType() == parentSlot.elemType) + return aliasValue; + return UnrealizedConversionCastOp::create(builder, getLoc(), + parentSlot.elemType, aliasValue) + .getResult(0); +} + +//===----------------------------------------------------------------------===// +// TestPartialAlias +//===----------------------------------------------------------------------===// + +void TestPartialAlias::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/, + SmallVectorImpl &newMemorySlots) { + if (aliasedSlotPointerOperand.get() != getSource()) + return; + newMemorySlots.push_back(MemorySlot{ + getResult(), cast(getResult().getType()).getElementType()}); +} + +bool TestPartialAlias::canUsesBeRemoved( + const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + for (OpOperand &use : getResult().getUses()) + newBlockingUses.push_back(&use); + return true; +} + +DeletionKind TestPartialAlias::removeBlockingUses( + const SmallPtrSetImpl &blockingUses, OpBuilder &builder) { + return DeletionKind::Delete; +} + +Value TestPartialAlias::projectSlotValueToAliasValue( + OpOperand & /*aliasedSlotPointerOperand*/, + const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot, + Value slotValue, OpBuilder &builder) { + // Sub-value extraction: 1-input cast. + return UnrealizedConversionCastOp::create(builder, getLoc(), + aliasSlot.elemType, slotValue) + .getResult(0); +} + +Value TestPartialAlias::projectAliasValueToSlotValue( + OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot, + const MemorySlot & /*aliasSlot*/, Value aliasValue, Value reachingDef, + OpBuilder &builder) { + // Sub-value insertion into the current reaching definition: emit a 2-input + // cast taking both the new alias value and the existing parent value. + return UnrealizedConversionCastOp::create(builder, getLoc(), + parentSlot.elemType, + ValueRange{aliasValue, reachingDef}) + .getResult(0); +} + namespace { /// Returns test dialect's memref layout for test dialect's tensor encoding when /// applicable. diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 56db6837b870c..a1529e3020c82 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -3941,6 +3941,73 @@ def TestMultiSlotAlloca : TEST_Op<"multi_slot_alloca", let assemblyFormat = "attr-dict `:` functional-type(operands, results)"; } +// Transparent alias of a memref slot with the same element type. Exercises +// alias-chain handling in mem2reg with identity projections. +def TestTransparentAlias : TEST_Op<"transparent_alias", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let arguments = (ins MemRefOf<[I32]>:$source); + let results = (outs MemRefOf<[I32]>:$result); + let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)"; +} + +// Type-changing transparent alias of a memref slot. The projection methods +// bridge the element types using `builtin.unrealized_conversion_cast`. +def TestTransparentCastAlias : TEST_Op<"transparent_cast_alias", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let arguments = (ins MemRefOf<[I32, F32]>:$source); + let results = (outs MemRefOf<[I32, F32]>:$result); + let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)"; +} + +// Transparent alias of a memref slot exposing two simultaneously-usable +// aliases of the same bit-width at different signednesses (signed and +// unsigned 32-bit integers, both aliasing the signless i32 storage). +// Exercises mem2reg's handling of an aliaser whose +// `getPromotableSlotAliases` returns more than one entry for a single +// aliased operand. +def TestTransparentDualAlias : TEST_Op<"transparent_dual_alias", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let arguments = (ins MemRefOf<[I32]>:$source); + let results = (outs MemRefOf<[SI32]>:$result_signed, + MemRefOf<[UI32]>:$result_unsigned); + let assemblyFormat = "$source attr-dict `:` functional-type($source, results)"; +} + +// Partial alias of a memref slot: exposes a sub-value of the parent slot. +// The slot-to-alias projection is a 1-input `unrealized_conversion_cast` +// (sub-value extraction). The alias-to-slot projection is a 2-input cast +// taking both the new alias value and the parent slot's reaching definition +// (sub-value insertion). This exercises mem2reg's `reachingDef` plumbing +// for partial sub-aliases. +def TestPartialAlias : TEST_Op<"partial_alias", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let arguments = (ins AnyMemRef:$source); + let results = (outs AnyMemRef:$result); + let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)"; +} + //===----------------------------------------------------------------------===// // Test allocation Ops //===----------------------------------------------------------------------===//