From 5a35479975b569fc726227b387b3c6934027cfdf Mon Sep 17 00:00:00 2001 From: Meghana Gupta Date: Wed, 5 Apr 2023 15:30:34 -0700 Subject: [PATCH 1/2] Move around utilities, move isWriteAllocation outside the class --- lib/SILOptimizer/Transforms/SILMem2Reg.cpp | 369 ++++++++++----------- 1 file changed, 183 insertions(+), 186 deletions(-) diff --git a/lib/SILOptimizer/Transforms/SILMem2Reg.cpp b/lib/SILOptimizer/Transforms/SILMem2Reg.cpp index 8c137bfcbc829..b0672eb7a75d9 100644 --- a/lib/SILOptimizer/Transforms/SILMem2Reg.cpp +++ b/lib/SILOptimizer/Transforms/SILMem2Reg.cpp @@ -374,6 +374,189 @@ static void collectLoads(SILInstruction *i, } } +/// Returns true if \p I is an address of a LoadInst, skipping struct and +/// tuple address projections. Sets \p singleBlock to null if the load (or +/// it's address is not in \p singleBlock. +/// This function looks for these patterns: +/// 1. (load %ASI) +/// 2. (load (struct_element_addr/tuple_element_addr/unchecked_addr_cast %ASI)) +static bool isAddressForLoad(SILInstruction *load, SILBasicBlock *&singleBlock, + bool &involvesUntakableProjection) { + if (auto *li = dyn_cast(load)) { + // SILMem2Reg is disabled when we find a load [take] of an untakable + // projection. See below for further discussion. + if (involvesUntakableProjection && + li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { + return false; + } + return true; + } + + if (isa(load)) { + if (involvesUntakableProjection) { + return false; + } + return true; + } + + if (!isa(load) && !isa(load) && + !isa(load)) + return false; + + // None of the projections are lowered to owned values: + // + // struct_element_addr and tuple_element_addr instructions are lowered to + // struct_extract and tuple_extract instructions respectively. These both + // have guaranteed ownership (since they forward ownership and can only be + // used on a guaranteed value). + // + // unchecked_addr_cast instructions are lowered to unchecked_bitwise_cast + // instructions. These have unowned ownership. + // + // So in no case can a load [take] be lowered into the new projected value + // (some sequence of struct_extract, tuple_extract, and + // unchecked_bitwise_cast instructions) taking over ownership of the original + // value. Without additional changes. + // + // For example, for a sequence of element_addr projections could be + // transformed into a sequence of destructure instructions, followed by a + // sequence of structure instructions where all the original values are + // kept in place but the taken value is "knocked out" and replaced with + // undef. The running value would then be set to the newly structed + // "knockout" value. + // + // Alternatively, a new copy of the running value could be created and a new + // set of destroys placed after its last uses. + involvesUntakableProjection = true; + + // Recursively search for other (non-)loads in the instruction's uses. + auto *svi = cast(load); + for (auto *use : svi->getUses()) { + SILInstruction *user = use->getUser(); + if (user->getParent() != singleBlock) + singleBlock = nullptr; + + if (!isAddressForLoad(user, singleBlock, involvesUntakableProjection)) + return false; + } + return true; +} + +/// Returns true if \p I is a dead struct_element_addr or tuple_element_addr. +static bool isDeadAddrProjection(SILInstruction *inst) { + if (!isa(inst) && !isa(inst) && + !isa(inst)) + return false; + + // Recursively search for uses which are dead themselves. + for (auto UI : cast(inst)->getUses()) { + SILInstruction *II = UI->getUser(); + if (!isDeadAddrProjection(II)) + return false; + } + return true; +} + +/// Returns true if this \p def is captured. +/// Sets \p inSingleBlock to true if all uses of \p def are in a single block. +static bool isCaptured(SILValue def, bool *inSingleBlock) { + SILBasicBlock *singleBlock = def->getParentBlock(); + + // For all users of the def + for (auto *use : def->getUses()) { + SILInstruction *user = use->getUser(); + + if (user->getParent() != singleBlock) + singleBlock = nullptr; + + // Loads are okay. + bool involvesUntakableProjection = false; + if (isAddressForLoad(user, singleBlock, involvesUntakableProjection)) + continue; + + // We can store into an AllocStack (but not the pointer). + if (auto *si = dyn_cast(user)) + if (si->getDest() == def) + continue; + + if (auto *sbi = dyn_cast(user)) { + if (sbi->getDest() == def) { + if (isCaptured(sbi, inSingleBlock)) { + return true; + } + continue; + } + } + + // Deallocation is also okay, as are DebugValue w/ address value. We will + // promote the latter into normal DebugValue. + if (isa(user) || DebugValueInst::hasAddrVal(user)) + continue; + + if (isa(user)) + continue; + + // Destroys of loadable types can be rewritten as releases, so + // they are fine. + if (auto *dai = dyn_cast(user)) + if (dai->getOperand()->getType().isLoadable(*dai->getFunction())) + continue; + + // Other instructions are assumed to capture the AllocStack. + LLVM_DEBUG(llvm::dbgs() << "*** AllocStack is captured by: " << *user); + return true; + } + + // None of the users capture the AllocStack. + *inSingleBlock = (singleBlock != nullptr); + return false; +} + +/// Returns true if the \p def is only stored into. +static bool isWriteOnlyAllocation(SILValue def) { + assert(isa(def) || isa(def)); + + // For all users of the def: + for (auto *use : def->getUses()) { + SILInstruction *user = use->getUser(); + + // It is okay to store into the AllocStack. + if (auto *si = dyn_cast(user)) + if (!isa(si->getSrc())) + continue; + + if (auto *sbi = dyn_cast(user)) { + // Since all uses of the alloc_stack will be via store_borrow, check if + // there are any non-writes from the store_borrow location. + if (!isWriteOnlyAllocation(sbi)) { + return false; + } + continue; + } + + // Deallocation is also okay. + if (isa(user)) + continue; + + if (isa(user)) + continue; + + // If we haven't already promoted the AllocStack, we may see + // DebugValue uses. + if (DebugValueInst::hasAddrVal(user)) + continue; + + if (isDeadAddrProjection(user)) + continue; + + // Can't do anything else with it. + LLVM_DEBUG(llvm::dbgs() << "*** AllocStack has non-write use: " << *user); + return false; + } + + return true; +} + static void replaceLoad(SILInstruction *inst, SILValue newValue, AllocStackInst *asi, SILBuilderContext &ctx, InstructionDeleter &deleter, @@ -1664,9 +1847,6 @@ class MemoryToRegisters { return *domTreeLevels; } - /// Check if \p def is a write-only allocation. - bool isWriteOnlyAllocation(SILValue def); - /// Promote all of the AllocStacks in a single basic block in one /// linear scan. Note: This function deletes all of the users of the /// AllocStackInst, including the DeallocStackInst but it does not remove the @@ -1689,189 +1869,6 @@ class MemoryToRegisters { } // end anonymous namespace -/// Returns true if \p I is an address of a LoadInst, skipping struct and -/// tuple address projections. Sets \p singleBlock to null if the load (or -/// it's address is not in \p singleBlock. -/// This function looks for these patterns: -/// 1. (load %ASI) -/// 2. (load (struct_element_addr/tuple_element_addr/unchecked_addr_cast %ASI)) -static bool isAddressForLoad(SILInstruction *load, SILBasicBlock *&singleBlock, - bool &involvesUntakableProjection) { - if (auto *li = dyn_cast(load)) { - // SILMem2Reg is disabled when we find a load [take] of an untakable - // projection. See below for further discussion. - if (involvesUntakableProjection && - li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { - return false; - } - return true; - } - - if (isa(load)) { - if (involvesUntakableProjection) { - return false; - } - return true; - } - - if (!isa(load) && !isa(load) && - !isa(load)) - return false; - - // None of the projections are lowered to owned values: - // - // struct_element_addr and tuple_element_addr instructions are lowered to - // struct_extract and tuple_extract instructions respectively. These both - // have guaranteed ownership (since they forward ownership and can only be - // used on a guaranteed value). - // - // unchecked_addr_cast instructions are lowered to unchecked_bitwise_cast - // instructions. These have unowned ownership. - // - // So in no case can a load [take] be lowered into the new projected value - // (some sequence of struct_extract, tuple_extract, and - // unchecked_bitwise_cast instructions) taking over ownership of the original - // value. Without additional changes. - // - // For example, for a sequence of element_addr projections could be - // transformed into a sequence of destructure instructions, followed by a - // sequence of structure instructions where all the original values are - // kept in place but the taken value is "knocked out" and replaced with - // undef. The running value would then be set to the newly structed - // "knockout" value. - // - // Alternatively, a new copy of the running value could be created and a new - // set of destroys placed after its last uses. - involvesUntakableProjection = true; - - // Recursively search for other (non-)loads in the instruction's uses. - auto *svi = cast(load); - for (auto *use : svi->getUses()) { - SILInstruction *user = use->getUser(); - if (user->getParent() != singleBlock) - singleBlock = nullptr; - - if (!isAddressForLoad(user, singleBlock, involvesUntakableProjection)) - return false; - } - return true; -} - -/// Returns true if \p I is a dead struct_element_addr or tuple_element_addr. -static bool isDeadAddrProjection(SILInstruction *inst) { - if (!isa(inst) && !isa(inst) && - !isa(inst)) - return false; - - // Recursively search for uses which are dead themselves. - for (auto UI : cast(inst)->getUses()) { - SILInstruction *II = UI->getUser(); - if (!isDeadAddrProjection(II)) - return false; - } - return true; -} - -/// Returns true if this \p def is captured. -/// Sets \p inSingleBlock to true if all uses of \p def are in a single block. -static bool isCaptured(SILValue def, bool *inSingleBlock) { - SILBasicBlock *singleBlock = def->getParentBlock(); - - // For all users of the def - for (auto *use : def->getUses()) { - SILInstruction *user = use->getUser(); - - if (user->getParent() != singleBlock) - singleBlock = nullptr; - - // Loads are okay. - bool involvesUntakableProjection = false; - if (isAddressForLoad(user, singleBlock, involvesUntakableProjection)) - continue; - - // We can store into an AllocStack (but not the pointer). - if (auto *si = dyn_cast(user)) - if (si->getDest() == def) - continue; - - if (auto *sbi = dyn_cast(user)) { - if (sbi->getDest() == def) { - if (isCaptured(sbi, inSingleBlock)) { - return true; - } - continue; - } - } - - // Deallocation is also okay, as are DebugValue w/ address value. We will - // promote the latter into normal DebugValue. - if (isa(user) || DebugValueInst::hasAddrVal(user)) - continue; - - if (isa(user)) - continue; - - // Destroys of loadable types can be rewritten as releases, so - // they are fine. - if (auto *dai = dyn_cast(user)) - if (dai->getOperand()->getType().isLoadable(*dai->getFunction())) - continue; - - // Other instructions are assumed to capture the AllocStack. - LLVM_DEBUG(llvm::dbgs() << "*** AllocStack is captured by: " << *user); - return true; - } - - // None of the users capture the AllocStack. - *inSingleBlock = (singleBlock != nullptr); - return false; -} - -/// Returns true if the \p def is only stored into. -bool MemoryToRegisters::isWriteOnlyAllocation(SILValue def) { - assert(isa(def) || isa(def)); - - // For all users of the def: - for (auto *use : def->getUses()) { - SILInstruction *user = use->getUser(); - - // It is okay to store into the AllocStack. - if (auto *si = dyn_cast(user)) - if (!isa(si->getSrc())) - continue; - - if (auto *sbi = dyn_cast(user)) { - // Since all uses of the alloc_stack will be via store_borrow, check if - // there are any non-writes from the store_borrow location. - if (!isWriteOnlyAllocation(sbi)) { - return false; - } - continue; - } - - // Deallocation is also okay. - if (isa(user)) - continue; - - if (isa(user)) - continue; - - // If we haven't already promoted the AllocStack, we may see - // DebugValue uses. - if (DebugValueInst::hasAddrVal(user)) - continue; - - if (isDeadAddrProjection(user)) - continue; - - // Can't do anything else with it. - LLVM_DEBUG(llvm::dbgs() << "*** AllocStack has non-write use: " << *user); - return false; - } - - return true; -} - void MemoryToRegisters::removeSingleBlockAllocation(AllocStackInst *asi) { LLVM_DEBUG(llvm::dbgs() << "*** Promoting in-block: " << *asi); From 96a6d44cf95ee7e8436e6cfbf4a4076ceb407a1a Mon Sep 17 00:00:00 2001 From: Meghana Gupta Date: Tue, 11 Apr 2023 15:47:12 -0700 Subject: [PATCH 2/2] Enable mem2reg of enum types Previously it was disabled because we have incomplete address lifetimes for such types, and transforming to value form in mem2reg would expose verification errors due to incompleteness. Enable this case here and fixup the lifetimes using the new lifetime completion utility. --- lib/SILOptimizer/Transforms/SILMem2Reg.cpp | 66 +++++++++++++++---- .../mem2reg_lifetime_nontrivial.sil | 1 - test/SILOptimizer/mem2reg_ossa.sil | 64 ++++++++++++++++-- test/SILOptimizer/mem2reg_ossa_nontrivial.sil | 13 ++-- 4 files changed, 120 insertions(+), 24 deletions(-) diff --git a/lib/SILOptimizer/Transforms/SILMem2Reg.cpp b/lib/SILOptimizer/Transforms/SILMem2Reg.cpp index b0672eb7a75d9..f608fc0c02e77 100644 --- a/lib/SILOptimizer/Transforms/SILMem2Reg.cpp +++ b/lib/SILOptimizer/Transforms/SILMem2Reg.cpp @@ -25,6 +25,7 @@ #include "swift/Basic/TaggedUnion.h" #include "swift/SIL/BasicBlockDatastructures.h" #include "swift/SIL/Dominance.h" +#include "swift/SIL/OSSALifetimeCompletion.h" #include "swift/SIL/Projection.h" #include "swift/SIL/SILBuilder.h" #include "swift/SIL/SILFunction.h" @@ -1784,6 +1785,8 @@ void StackAllocationPromoter::promoteAllocationToPhi() { } void StackAllocationPromoter::run() { + auto *function = asi->getFunction(); + // Reduce the number of load/stores in the function to minimum. // After this phase we are left with up to one load and store // per block and the last store is recorded. @@ -1791,6 +1794,33 @@ void StackAllocationPromoter::run() { // Replace AllocStacks with Phi-nodes. promoteAllocationToPhi(); + + // Make sure that all of the allocations were promoted into registers. + assert(isWriteOnlyAllocation(asi) && "Non-write uses left behind"); + + SmallVector valuesToComplete; + + // Enum types may have incomplete lifetimes in address form, when promoted to + // value form after mem2reg, they will end up with incomplete ossa lifetimes. + // Use the lifetime completion utility to complete such lifetimes. + // First, collect the stored values to complete. + if (asi->getType().isOrHasEnum()) { + for (auto it : initializationPoints) { + auto *si = it.second; + auto src = si->getOperand(0); + valuesToComplete.push_back(src); + } + } + + // ... and erase the allocation. + deleter.forceDeleteWithUsers(asi); + + // Now, complete lifetimes! + OSSALifetimeCompletion completion(function, domInfo); + + for (auto it : valuesToComplete) { + completion.completeOSSALifetime(it); + } } //===----------------------------------------------------------------------===// @@ -2091,12 +2121,31 @@ bool MemoryToRegisters::promoteSingleAllocation(AllocStackInst *alloc) { deleter.forceDeleteWithUsers(alloc); return true; } else { - // For enums we require that all uses are in the same block. - // Otherwise there could be a switch_enum of an optional where the none-case - // does not have a destroy of the enum value. - // After transforming such an alloc_stack, the value would leak in the none- - // case block. - if (f.hasOwnership() && alloc->getType().isOrHasEnum()) + auto enableOptimizationForEnum = [](AllocStackInst *asi) { + if (asi->isLexical()) { + return false; + } + for (auto *use : asi->getUses()) { + auto *user = use->getUser(); + if (!isa(user) && !isa(user)) { + continue; + } + auto stored = user->getOperand(CopyLikeInstruction::Src); + if (stored->isLexical()) { + return false; + } + } + return true; + }; + // For stack locs of enum type that are lexical or with lexical stored + // values, we require that all uses are in the same block. This is because + // we can have incomplete lifetime of enum typed addresses, and on + // converting to value form this causes verification error. For all other + // stack locs of enum type, we use the lifetime completion utility to fix + // the lifetime. But when we have a lexical value, the utility can complete + // lifetimes on dead end blocks only. + if (f.hasOwnership() && alloc->getType().isOrHasEnum() && + !enableOptimizationForEnum(alloc)) return false; } @@ -2108,11 +2157,6 @@ bool MemoryToRegisters::promoteSingleAllocation(AllocStackInst *alloc) { StackAllocationPromoter(alloc, domInfo, domTreeLevels, ctx, deleter, instructionsToDelete) .run(); - - // Make sure that all of the allocations were promoted into registers. - assert(isWriteOnlyAllocation(alloc) && "Non-write uses left behind"); - // ... and erase the allocation. - deleter.forceDeleteWithUsers(alloc); return true; } diff --git a/test/SILOptimizer/mem2reg_lifetime_nontrivial.sil b/test/SILOptimizer/mem2reg_lifetime_nontrivial.sil index 4994d3b483293..4965e71565e73 100644 --- a/test/SILOptimizer/mem2reg_lifetime_nontrivial.sil +++ b/test/SILOptimizer/mem2reg_lifetime_nontrivial.sil @@ -1061,7 +1061,6 @@ enum KlassOptional { sil @return_optional_or_error : $@convention(thin) () -> (@owned KlassOptional, @error Error) // CHECK-LABEL: sil [ossa] @test_deadphi4 : -// mem2reg cannot optimize an enum location which spans over multiple blocks. // CHECK: alloc_stack // CHECK-LABEL: } // end sil function 'test_deadphi4' sil [ossa] @test_deadphi4 : $@convention(thin) (@owned KlassOptional) -> () { diff --git a/test/SILOptimizer/mem2reg_ossa.sil b/test/SILOptimizer/mem2reg_ossa.sil index 164972e5fa087..167263c98b673 100644 --- a/test/SILOptimizer/mem2reg_ossa.sil +++ b/test/SILOptimizer/mem2reg_ossa.sil @@ -489,10 +489,10 @@ bb0: return %16 : $((), ()) } -// CHECK-LABEL: sil [ossa] @dont_optimize_optional_in_multiple_blocks : -// CHECK: alloc_stack -// CHECK: } // end sil function 'dont_optimize_optional_in_multiple_blocks' -sil [ossa] @dont_optimize_optional_in_multiple_blocks : $@convention(method) (@guaranteed Optional) -> () { +// CHECK-LABEL: sil [ossa] @test_optional_in_multiple_blocks : +// CHECK-NOT: alloc_stack +// CHECK: } // end sil function 'test_optional_in_multiple_blocks' +sil [ossa] @test_optional_in_multiple_blocks : $@convention(method) (@guaranteed Optional) -> () { bb0(%0 : @guaranteed $Optional): %1 = copy_value %0 : $Optional %32 = alloc_stack $Optional @@ -504,8 +504,62 @@ bb5: br bb7 bb6(%50 : @guaranteed $Klass): - %53 = load [take] %32 : $*Optional + %53 = load [copy] %32 : $*Optional + destroy_value %53 : $Optional + destroy_addr %32 : $*Optional + dealloc_stack %32 : $*Optional + br bb7 + +bb7: + %r = tuple () + return %r : $() +} + +// CHECK-LABEL: sil [ossa] @test_optional_in_multiple_blocks_lexical : +// CHECK: alloc_stack +// CHECK: } // end sil function 'test_optional_in_multiple_blocks_lexical' +sil [ossa] @test_optional_in_multiple_blocks_lexical : $@convention(method) (@guaranteed Optional) -> () { +bb0(%0 : @guaranteed $Optional): + %1 = copy_value %0 : $Optional + %32 = alloc_stack [lexical] $Optional + store %1 to [init] %32 : $*Optional + switch_enum %0 : $Optional, case #Optional.some!enumelt: bb6, case #Optional.none!enumelt: bb5 + +bb5: + dealloc_stack %32 : $*Optional + br bb7 + +bb6(%50 : @guaranteed $Klass): + %53 = load [copy] %32 : $*Optional + destroy_value %53 : $Optional + destroy_addr %32 : $*Optional + dealloc_stack %32 : $*Optional + br bb7 + +bb7: + %r = tuple () + return %r : $() +} + +// CHECK-LABEL: sil [ossa] @test_optional_in_multiple_blocks_lexical_storedvalue : +// CHECK: alloc_stack +// CHECK: } // end sil function 'test_optional_in_multiple_blocks_lexical_storedvalue' +sil [ossa] @test_optional_in_multiple_blocks_lexical_storedvalue : $@convention(method) (@owned Optional) -> () { +bb0(%0 : @owned $Optional): + %1 = copy_value %0 : $Optional + %32 = alloc_stack $Optional + store %0 to [init] %32 : $*Optional + switch_enum %1 : $Optional, case #Optional.some!enumelt: bb6, case #Optional.none!enumelt: bb5 + +bb5: + dealloc_stack %32 : $*Optional + br bb7 + +bb6(%50 : @owned $Klass): + %53 = load [copy] %32 : $*Optional destroy_value %53 : $Optional + destroy_value %50 : $Klass + destroy_addr %32 : $*Optional dealloc_stack %32 : $*Optional br bb7 diff --git a/test/SILOptimizer/mem2reg_ossa_nontrivial.sil b/test/SILOptimizer/mem2reg_ossa_nontrivial.sil index 45fc52cdfb9ac..9c8cabd366801 100644 --- a/test/SILOptimizer/mem2reg_ossa_nontrivial.sil +++ b/test/SILOptimizer/mem2reg_ossa_nontrivial.sil @@ -53,6 +53,11 @@ struct UInt8 { var _value : Builtin.Int8 } +enum KlassOptional { + case some(Klass) + case none +} + sil [ossa] @get_nontrivialstruct : $@convention(thin) () -> @owned NonTrivialStruct sil [ossa] @get_nontrivialenum : $@convention(thin) () -> @owned NonTrivialEnum sil [ossa] @get_optionalnontrivialstruct : $@convention(thin) () -> @owned FakeOptional @@ -1026,16 +1031,10 @@ bb12: return %39 : $() } -enum KlassOptional { - case some(Klass) - case none -} - sil @return_optional_or_error : $@convention(thin) () -> (@owned KlassOptional, @error Error) // CHECK-LABEL: sil [ossa] @test_deadphi4 : -// mem2reg cannot optimize an enum location which spans over multiple blocks. -// CHECK: alloc_stack +// CHECK-NOT: alloc_stack // CHECK-LABEL: } // end sil function 'test_deadphi4' sil [ossa] @test_deadphi4 : $@convention(thin) (@owned KlassOptional) -> () { bb0(%0 : @owned $KlassOptional):