From 6219e5a00b8d1c3fa7ec03ee5d05f3381f14e53e Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 02:37:50 -0700
Subject: [PATCH 01/12] find apply site util.

---
 include/swift/SIL/InstructionUtils.h | 49 +++++++++++++++++++++
 lib/SIL/InstructionUtils.cpp         | 64 ++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)
diff --git a/include/swift/SIL/InstructionUtils.h b/include/swift/SIL/InstructionUtils.h
index ec8c46631ef94..5da5d00963812 100644
--- a/include/swift/SIL/InstructionUtils.h
+++ b/include/swift/SIL/InstructionUtils.h
@@ -135,6 +135,55 @@ struct LLVM_LIBRARY_VISIBILITY FindClosureResult {
 /// by a reabstraction thunk.
 FindClosureResult findClosureForAppliedArg(SILValue V);
 
+struct LLVM_LIBRARY_VISIBILITY FindLocalApplySitesResult {
+  /// Set to true if the function-ref_ref escapes into a use that our analysis
+  /// does not understand. Set to false if we found a use that had an actual
+  /// escape. Set to None if we did not find any call sites, but also didn't
+  /// find any "escaping uses" as well.
+  ///
+  /// The none case is so that we can distinguish in between saying that a value
+  /// did escape and saying that we did not find any conservative information.
+  Optional<bool> escapes;
+
+  /// Contains the list of local non fully applied partial apply sites that we
+  /// found.
+  SmallVector<ApplySite, 1> partialApplySites;
+
+  /// Contains the list of full apply sites that we found.
+  SmallVector<FullApplySite, 1> fullApplySites;
+
+  /// Deleted default constructor. This is a move only type.
+  FindLocalApplySitesResult()
+      : escapes(), partialApplySites(), fullApplySites() {}
+  FindLocalApplySitesResult(const FindLocalApplySitesResult &) = delete;
+  FindLocalApplySitesResult(FindLocalApplySitesResult &&) = default;
+  ~FindLocalApplySitesResult() = default;
+
+  /// Treat this function ref as escaping if we either found an actual user we
+  /// didn't understand.
+  ///
+  /// When determining if we have an "interesting" result, we want to return
+  /// true if escaping is true or false. That is because conservatively we want
+  /// to know that we found some sort of information. On the other hand, for
+  /// determining if non-conservatively we actually did escape, we do not want
+  /// to consider not finding a value for escaping as equivalent to having an
+  /// escape.
+  bool isEscaping() const { return escapes.getValueOr(false); }
+
+  /// We convert to true if we have any "non"-conservative information about the
+  /// FunctionRefInst that was processed.
+  ///
+  /// NOTE: We want to return true if escapes has any value. Otherwise, we may
+  /// ignore e
+  operator bool() const {
+    return escapes.hasValue() || partialApplySites.size() ||
+           fullApplySites.size();
+  }
+};
+
+/// Returns true if we found any apply sites for the given function_ref.
+FindLocalApplySitesResult findLocalApplySites(FunctionRefInst *FRI);
+
 /// A utility class for evaluating whether a newly parsed or deserialized
 /// function has qualified or unqualified ownership.
 ///
diff --git a/lib/SIL/InstructionUtils.cpp b/lib/SIL/InstructionUtils.cpp
index 04252abaf3317..627c8bf9e7e85 100644
--- a/lib/SIL/InstructionUtils.cpp
+++ b/lib/SIL/InstructionUtils.cpp
@@ -455,6 +455,70 @@ FindClosureResult swift::findClosureForAppliedArg(SILValue V) {
   return FindClosureResult(PAI, false);
 }
 
+FindLocalApplySitesResult swift::findLocalApplySites(FunctionRefInst *FRI) {
+  SmallVector<Operand *, 32> worklist(FRI->use_begin(), FRI->use_end());
+
+  FindLocalApplySitesResult f;
+
+  // Optimistically state that we have no escapes before our def-use dataflow.
+  f.escapes = false;
+  while (!worklist.empty()) {
+    auto *op = worklist.pop_back_val();
+    auto *user = op->getUser();
+
+    // If we have a full apply site as our user.
+    if (auto apply = FullApplySite::isa(user)) {
+      if (apply.getCallee() == op->get()) {
+        f.fullApplySites.push_back(apply);
+        continue;
+      }
+    }
+
+    // If we have a partial apply as a user, start tracking it, but also look at
+    // its users.
+    if (auto *pai = dyn_cast<PartialApplyInst>(user)) {
+      if (pai->getCallee() == op->get()) {
+        // Track the partial apply that we saw so we can potentially eliminate
+        // dead closure arguments.
+        f.partialApplySites.push_back(pai);
+        // Look to see if we can find a full application of this partial apply
+        // as well.
+        copy(pai->getUses(), std::back_inserter(worklist));
+        continue;
+      }
+    }
+
+    // Otherwise, see if we have any function casts to look through...
+    switch (user->getKind()) {
+    case SILInstructionKind::ThinToThickFunctionInst:
+    case SILInstructionKind::ConvertFunctionInst:
+    case SILInstructionKind::ConvertEscapeToNoEscapeInst:
+      copy(cast<SingleValueInstruction>(user)->getUses(),
+           std::back_inserter(worklist));
+      continue;
+
+    // Look through any reference count instructions since these are not
+    // escapes:
+    case SILInstructionKind::CopyValueInst:
+      copy(cast<CopyValueInst>(user)->getUses(), std::back_inserter(worklist));
+      continue;
+    case SILInstructionKind::StrongRetainInst:
+    case SILInstructionKind::StrongReleaseInst:
+    case SILInstructionKind::RetainValueInst:
+    case SILInstructionKind::ReleaseValueInst:
+    case SILInstructionKind::DestroyValueInst:
+      continue;
+    default:
+      break;
+    }
+
+    // But everything else is considered an escape.
+    f.escapes = true;
+  }
+
+  return f;
+}
+
 namespace {
 
 enum class OwnershipQualifiedKind {

From 8d743d8da6bb2bcc97b092830e5f127c6ec1af79 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 02:39:00 -0700
Subject: [PATCH 02/12] [caller-analysis] Reimplement CallerAnalysis ontop of
 FindLocalCallSites.

Now the caller analysis can tell callers if it was able to find /all/ callers of
a callee.

NOTE: This does not change FSO itself yet.

rdar://41146023
---
 .../SILOptimizer/Analysis/CallerAnalysis.h    | 235 +++++---
 lib/SILOptimizer/Analysis/CallerAnalysis.cpp  | 305 ++++++++--
 .../FunctionSignatureOpts.cpp                 |   2 +-
 .../UtilityPasses/CallerAnalysisPrinter.cpp   |  15 +-
 test/SILOptimizer/caller_analysis.sil         | 519 ++++++++++++++++++
 test/SILOptimizer/caller_analysis_printer.sil | 149 -----
 6 files changed, 960 insertions(+), 265 deletions(-)
 create mode 100644 test/SILOptimizer/caller_analysis.sil
 delete mode 100644 test/SILOptimizer/caller_analysis_printer.sil

diff --git a/include/swift/SILOptimizer/Analysis/CallerAnalysis.h b/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
index 375c06eb18f06..54fa3b64a2a3a 100644
--- a/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
+++ b/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
@@ -44,125 +44,226 @@ class CallerAnalysis : public SILAnalysis {
   class FunctionInfo;
 
 private:
+  struct CallerInfo;
+
   /// Current module we are analyzing.
-  SILModule &Mod;
+  SILModule &mod;
 
   /// A map between all the functions and their callsites in the module.
-  llvm::DenseMap<SILFunction *, FunctionInfo> FuncInfos;
+  ///
+  /// We use a map vector to ensure that when we dump the state of the caller
+  /// analysis,
+  llvm::DenseMap<SILFunction *, FunctionInfo> funcInfos;
 
   /// A list of functions that needs to be recomputed.
-  llvm::SetVector<SILFunction *> RecomputeFunctionList;
-
-  /// Iterate over all the call sites in the function and update
-  /// CallInfo.
-  void processFunctionCallSites(SILFunction *F); 
-
-  /// This function is about to become "unknown" to us. Invalidate any 
-  /// callsite information related to it.
-  void invalidateExistingCalleeRelation(SILFunction *F); 
-
-  void processRecomputeFunctionList() {
-    for (auto &F : RecomputeFunctionList) {
-      processFunctionCallSites(F);
-    }
-    RecomputeFunctionList.clear(); 
-  }
+  llvm::SetVector<SILFunction *> recomputeFunctionList;
 
 public:
-  CallerAnalysis(SILModule *M) : SILAnalysis(AnalysisKind::Caller), Mod(*M) {
-    // Make sure we compute everything first time called.
-    for (auto &F : Mod) {
-      FuncInfos.FindAndConstruct(&F);
-      RecomputeFunctionList.insert(&F);
-    }
-  }
+  CallerAnalysis(SILModule *m);
 
-  static bool classof(const SILAnalysis *S) {
-    return S->getKind() == AnalysisKind::Caller;
+  static bool classof(const SILAnalysis *s) {
+    return s->getKind() == AnalysisKind::Caller;
   }
 
   /// Invalidate all information in this analysis.
   virtual void invalidate() override {
-    FuncInfos.clear();
-    RecomputeFunctionList.clear();
-    for (auto &F : Mod) {
-      RecomputeFunctionList.insert(&F);
+    funcInfos.clear();
+    recomputeFunctionList.clear();
+    for (auto &f : mod) {
+      recomputeFunctionList.insert(&f);
     }
   }
 
-  /// Invalidate all of the information for a specific function.
-  virtual void invalidate(SILFunction *F, InvalidationKind K) override {
+  /// Invalidate all of the information for a specific caller function.
+  virtual void invalidate(SILFunction *caller, InvalidationKind k) override {
     // Should we invalidate based on the invalidation kind.
-    bool shouldInvalidate = K & InvalidationKind::CallsAndInstructions;
+    bool shouldInvalidate = k & InvalidationKind::CallsAndInstructions;
     if (!shouldInvalidate)
       return;
 
     // This function has become "unknown" to us. Invalidate any callsite
     // information related to this function.
-    invalidateExistingCalleeRelation(F);
+    invalidateExistingCalleeRelation(caller);
+
     // Make sure this function is recomputed next time.
-    RecomputeFunctionList.insert(F);
+    recomputeFunctionList.insert(caller);
   }
 
   /// Notify the analysis about a newly created function.
-  virtual void notifyAddFunction(SILFunction *F) override {
-    RecomputeFunctionList.insert(F);
+  virtual void notifyAddFunction(SILFunction *f) override {
+    recomputeFunctionList.insert(f);
   }
 
   /// Notify the analysis about a function which will be deleted from the
   /// module.
-  virtual void notifyDeleteFunction(SILFunction *F) override {
-    invalidateExistingCalleeRelation(F);
-    RecomputeFunctionList.remove(F);
+  virtual void notifyDeleteFunction(SILFunction *f) override {
+    invalidateExistingCalleeRelation(f);
+    recomputeFunctionList.remove(f);
   }
 
   /// Notify the analysis about changed witness or vtables.
-  virtual void invalidateFunctionTables() override { }
+  virtual void invalidateFunctionTables() override {}
 
-  const FunctionInfo &getCallerInfo(SILFunction *F) {
-    // Recompute every function in the invalidated function list and empty the
-    // list.
-    processRecomputeFunctionList();
-    return FuncInfos[F];
+  /// Look up the function info that we have stored for f, recomputing all
+  /// invalidating parts of the call graph.
+  const FunctionInfo &getCallerInfo(SILFunction *f) const;
+
+#ifndef NDEBUG
+  LLVM_ATTRIBUTE_DEPRECATED(void dump() const LLVM_ATTRIBUTE_USED,
+                            "Only for use in the debugger");
+#endif
+
+  /// Print the state of the caller analysis as a sequence of yaml documents for
+  /// each callee we are tracking.
+  void print(llvm::raw_ostream &os) const;
+
+  /// Print the state of the caller analysis as a sequence of yaml documents for
+  /// each callee we are tracking to the passed in file path.
+  LLVM_ATTRIBUTE_DEPRECATED(void print(const char *filePath)
+                                const LLVM_ATTRIBUTE_USED,
+                            "Only for use in the debugger");
+
+private:
+  /// Iterate over all the call sites in the function and update
+  /// CallInfo.
+  void processFunctionCallSites(SILFunction *f);
+
+  /// This function is about to become "unknown" to us. Invalidate any
+  /// callsite information related to it.
+  void invalidateExistingCalleeRelation(SILFunction *f);
+
+  void processRecomputeFunctionList() {
+    for (auto &f : recomputeFunctionList) {
+      processFunctionCallSites(f);
+    }
+    recomputeFunctionList.clear();
   }
+
+  /// Internal only way for getting a caller info. Will insert f if needed and
+  /// _WILL NOT_ preform any recomputation of the callgraph.
+  FunctionInfo &getOrInsertCallerInfo(SILFunction *f);
 };
 
-/// NOTE: this can be extended to contain the callsites of the function.
+/// Auxillary information that we store about a specific caller.
+struct CallerAnalysis::CallerInfo {
+  /// Given a SILFunction F that contains at least one partial apply of the
+  /// given function, map F to the minimum number of partial applied
+  /// arguments of any partial application in F.
+  ///
+  /// By storing the minimum number of partial applied arguments, we are able
+  /// to decide quickly if we are able to eliminate dead captured arguments.
+  Optional<unsigned> numPartialAppliedArguments;
+
+  /// True if this caller performs at least one full application of the
+  /// callee.
+  bool hasFullApply : 1;
+
+  /// True if this caller can guarantee that all direct caller's of this
+  /// function inside of it can be found.
+  ///
+  /// NOTE: This does not imply that a function can not be called
+  /// indirectly. That is a separate query that is type system specific.
+  bool isDirectCallerSetComplete : 1;
+
+  CallerInfo()
+      : numPartialAppliedArguments(), hasFullApply(false),
+        isDirectCallerSetComplete(false) {}
+};
+
+/// This is a representation of the caller information that we have associated
+/// with a specific function.
+///
+/// NOTE: this can be extended to contain the callsites of the function. For
+/// now there is no need for the exact call sites due to us using only the
+/// caller information. By not implementing this we save memory and get rid of
+/// dead code.
 class CallerAnalysis::FunctionInfo {
   friend class CallerAnalysis;
+  using CallerInfo = CallerAnalysis::CallerInfo;
+  struct YAMLRepresentation;
+
+public:
+  /// FIXME: Upstream in LLVM this is a public using declaration on
+  /// MapVector (MapVector::value_type). In the version of LLVM that
+  /// Swift compiles against currently this is not true, so we provide
+  /// this for ease of use now.
+  ///
+  /// This is meant to be an internal implementation detail.
+  using CallerStatesValueType = std::pair<SILFunction *, CallerInfo>;
 
-  /// A list of all the functions this function calls or partially applies.
-  llvm::SetVector<SILFunction *> Callees;
-  /// A list of all the callers this function has.
-  llvm::SmallSet<SILFunction *, 4> Callers;
+private:
+  /// A map from a function containing uses of a function_ref of the callee to
+  /// the state that we store about the caller's body.
+  llvm::SmallMapVector<SILFunction *, CallerInfo, 1> callerStates;
 
-  /// The number of partial applied arguments of this function.
+  /// True if this function is something that could be called via a vtable or
+  /// a witness table. This does not include escaping uses.
   ///
-  /// Specifically, it stores the minimum number of partial applied arguments
-  /// of each function which contain one or multiple partial_applys of this
-  /// function.
-  /// This is a little bit off-topic because a partial_apply is not really
-  /// a "call" of this function.
-  llvm::DenseMap<SILFunction *, int> PartialAppliers;
+  /// For now this is very conservative and is only set to not be true if we
+  /// have a function whose representation never can escape. In future cases,
+  /// we should consider refining this to take into account the compilation
+  /// visibility of a protocol conformance or class (and thus if we have
+  /// enough visibility to).
+  bool mayHaveIndirectCallers : 1;
+
+  /// This is a special set vector that is an abuse as a performance
+  /// optimization. We in this case are treating the function info data
+  /// structure as a source of info about callers so that we can update a
+  /// caller's callees when we invalidate a caller. (See
+  /// invalidateExistingCalleeRelation).
+  llvm::SmallSetVector<SILFunction *, 1> calleeStates;
 
 public:
+  FunctionInfo(SILFunction *f);
+
+  bool hasAllCallers() const {
+    return hasOnlyCompleteDirectCallerSets() && !mayHaveIndirectCallers;
+  }
+
   /// Returns true if this function has at least one caller.
-  bool hasCaller() const { return !Callers.empty(); }
+  bool hasCaller() const {
+    return callerStates.size() &&
+           llvm::any_of(callerStates, [](const CallerStatesValueType &v) {
+             return v.second.hasFullApply;
+           });
+  }
 
   /// Returns non zero if this function is partially applied anywhere.
   ///
   /// The return value is the minimum number of partially applied arguments.
   /// Usually all partial applies of a function partially apply the same
   /// number of arguments anyway.
-  int getMinPartialAppliedArgs() const {
-    int minArgs = 0;
-    for (auto Iter : PartialAppliers) {
-      int numArgs = Iter.second;
-      if (minArgs == 0 || numArgs < minArgs)
-        minArgs = numArgs;
+  unsigned getMinPartialAppliedArgs() const {
+    if (callerStates.empty())
+      return 0;
+
+    bool foundArg = false;
+    unsigned minArgs = UINT_MAX;
+    for (const auto &iter : callerStates) {
+      if (auto numArgs = iter.second.numPartialAppliedArguments) {
+        foundArg = true;
+        minArgs = std::min(minArgs, numArgs.getValue());
+      }
     }
-    return minArgs;
+
+    return foundArg ? minArgs : 0;
   }
+
+  bool hasOnlyCompleteDirectCallerSets() const {
+    return llvm::all_of(callerStates, [](const CallerStatesValueType &v) {
+      return v.second.isDirectCallerSetComplete;
+    });
+  }
+
+  auto getAllReferencingCallers() const
+      -> decltype(llvm::make_range(callerStates.begin(), callerStates.end())) {
+    return llvm::make_range(callerStates.begin(), callerStates.end());
+  }
+
+  LLVM_ATTRIBUTE_DEPRECATED(void dump() const LLVM_ATTRIBUTE_USED,
+                            "Only for use in the debugger");
+
+  void print(llvm::raw_ostream &os) const;
 };
 
 } // end namespace swift
diff --git a/lib/SILOptimizer/Analysis/CallerAnalysis.cpp b/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
index 7121b5fda2829..9b2d51d8e10fa 100644
--- a/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
@@ -11,64 +11,291 @@
 //===----------------------------------------------------------------------===//
 
 #include "swift/SILOptimizer/Analysis/CallerAnalysis.h"
-
+#include "swift/SIL/InstructionUtils.h"
 #include "swift/SIL/SILModule.h"
 #include "swift/SILOptimizer/Utils/Local.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/YAMLTraits.h"
 
 using namespace swift;
 
-void CallerAnalysis::processFunctionCallSites(SILFunction *F) {
-  // Scan the whole module and search Apply sites.
-  for (auto &BB : *F) {
-    for (auto &II : BB) {
-      if (auto Apply = FullApplySite::isa(&II)) {
-        SILFunction *CalleeFn = Apply.getCalleeFunction();
-        if (!CalleeFn)
+namespace {
+using FunctionInfo = CallerAnalysis::FunctionInfo;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//                        CallerAnalysis::FunctionInfo
+//===----------------------------------------------------------------------===//
+
+CallerAnalysis::FunctionInfo::FunctionInfo(SILFunction *f)
+    : callerStates(),
+      mayHaveIndirectCallers(canBeCalledIndirectly(f->getRepresentation())) {}
+
+//===----------------------------------------------------------------------===//
+//                               CallerAnalysis
+//===----------------------------------------------------------------------===//
+
+// NOTE: This is only meant to be used by external users of
+// CallerAnalysis since we recompute. For internal uses, please
+// instead use getOrInsertFunctionInfo.
+const FunctionInfo &CallerAnalysis::getCallerInfo(SILFunction *f) const {
+  // Recompute every function in the invalidated function list and empty the
+  // list.
+  auto &self = const_cast<CallerAnalysis &>(*this);
+  self.processRecomputeFunctionList();
+  return self.getOrInsertCallerInfo(f);
+}
+
+// Private only version of this function for mutable callers that tries to
+// initialize a new f.
+FunctionInfo &CallerAnalysis::getOrInsertCallerInfo(SILFunction *f) {
+  return funcInfos.try_emplace(f, f).first->second;
+}
+
+CallerAnalysis::CallerAnalysis(SILModule *m)
+    : SILAnalysis(AnalysisKind::Caller), mod(*m) {
+  // Make sure we add everything to the recompute function list when we start.
+  for (auto &f : mod) {
+    getOrInsertCallerInfo(&f);
+    recomputeFunctionList.insert(&f);
+  }
+}
+
+void CallerAnalysis::processFunctionCallSites(SILFunction *callerFn) {
+  // First grab our caller info so that we can store back references
+  // from our callerFn to the calleeFn so that we can invalidate all
+  // callee info about our caller efficiently.
+  FunctionInfo &callerInfo = getOrInsertCallerInfo(callerFn);
+
+#ifndef NDEBUG
+  llvm::SmallPtrSet<SILInstruction *, 8> visitedCallSites;
+  llvm::SmallSetVector<SILInstruction *, 8> callSitesThatMustBeVisited;
+#endif
+
+  // Scan the caller function and search for full or partial apply
+  // sites in the caller function.
+  for (auto &block : *callerFn) {
+    for (auto &i : block) {
+#ifndef NDEBUG
+      // If this is a call site that we visited as part of seeing a different
+      // function_ref, skip it. We know that it has been processed correctly.
+      if (visitedCallSites.count(&i))
+        continue;
+#endif
+
+      // Try to find the apply sites.
+      if (auto *fri = dyn_cast<FunctionRefInst>(&i)) {
+        if (auto result = findLocalApplySites(fri)) {
+          auto *calleeFn = fri->getReferencedFunction();
+          FunctionInfo &calleeInfo = getOrInsertCallerInfo(calleeFn);
+
+          // TODO: Make this more aggressive by considering
+          // final/visibility/etc.
+          calleeInfo.mayHaveIndirectCallers =
+              canBeCalledIndirectly(calleeFn->getRepresentation());
+
+          // Next create our caller state.
+          auto iter = calleeInfo.callerStates.insert({callerFn, {}});
+          // If we succeeded in inserting a new value, put in an optimistic
+          // value for escaping.
+          if (iter.second) {
+            iter.first->second.isDirectCallerSetComplete = true;
+          }
+          iter.first->second.isDirectCallerSetComplete &= !result.isEscaping();
+
+          if (result.fullApplySites.size()) {
+            callerInfo.calleeStates.insert(calleeFn);
+            iter.first->second.hasFullApply = true;
+#ifndef NDEBUG
+            for (auto applySite : result.fullApplySites) {
+              visitedCallSites.insert(applySite.getInstruction());
+              callSitesThatMustBeVisited.remove(applySite.getInstruction());
+            }
+#endif
+          }
+
+          if (result.partialApplySites.size()) {
+            callerInfo.calleeStates.insert(calleeFn);
+            auto &optMin = iter.first->second.numPartialAppliedArguments;
+            unsigned min = optMin.getValueOr(UINT_MAX);
+            for (ApplySite partialSite : result.partialApplySites) {
+              min = std::min(min, partialSite.getNumArguments());
+            }
+            optMin = min;
+#ifndef NDEBUG
+            for (auto applySite : result.partialApplySites) {
+              visitedCallSites.insert(applySite.getInstruction());
+              callSitesThatMustBeVisited.remove(applySite.getInstruction());
+            }
+#endif
+          }
           continue;
+        }
+      }
 
-        // Update the callee information for this function.
-        FunctionInfo &CallerInfo = FuncInfos[F];
-        CallerInfo.Callees.insert(CalleeFn);
-        
-        // Update the callsite information for the callee.
-        FunctionInfo &CalleeInfo = FuncInfos[CalleeFn];
-        CalleeInfo.Callers.insert(F);
+#ifndef NDEBUG
+      // Make sure that we are in sync with FullApplySite.
+      if (auto apply = FullApplySite::isa(&i)) {
+        if (apply.getCalleeFunction() && !visitedCallSites.count(&i)) {
+          callSitesThatMustBeVisited.insert(&i);
+        }
         continue;
       }
-      if (auto *PAI = dyn_cast<PartialApplyInst>(&II)) {
-        SILFunction *CalleeFn = PAI->getCalleeFunction();
-        if (!CalleeFn)
-          continue;
 
-        // Update the callee information for this function.
-        FunctionInfo &CallerInfo = FuncInfos[F];
-        CallerInfo.Callees.insert(CalleeFn);
-        
-        // Update the partial-apply information for the callee.
-        FunctionInfo &CalleeInfo = FuncInfos[CalleeFn];
-        int &minAppliedArgs = CalleeInfo.PartialAppliers[F];
-        int numArgs = (int)PAI->getNumArguments();
-        if (minAppliedArgs == 0 || numArgs < minAppliedArgs) {
-          minAppliedArgs = numArgs;
+      // Make sure that we are in sync with looking for partial apply callees.
+      if (auto *pai = dyn_cast<PartialApplyInst>(&i)) {
+        if (pai->getCalleeFunction() && !visitedCallSites.count(&i)) {
+          callSitesThatMustBeVisited.insert(pai);
         }
         continue;
       }
-    }   
-  }   
+#endif
+    }
+  }
+
+#ifndef NDEBUG
+  if (callSitesThatMustBeVisited.empty())
+    return;
+  llvm::errs() << "Found unhandled call sites!\n";
+  while (callSitesThatMustBeVisited.size()) {
+    auto *i = callSitesThatMustBeVisited.pop_back_val();
+    llvm::errs() << "Inst: " << *i;
+  }
+  assert(false && "Unhandled call site?!");
+#endif
+}
+
+void CallerAnalysis::invalidateExistingCalleeRelation(SILFunction *caller) {
+  // Look up the callees that our caller refers to and invalidate any
+  // values that point back at the caller.
+  FunctionInfo &callerInfo = getOrInsertCallerInfo(caller);
+
+  while (callerInfo.calleeStates.size()) {
+    auto *callee = callerInfo.calleeStates.pop_back_val();
+    FunctionInfo &calleeInfo = getOrInsertCallerInfo(callee);
+    assert(calleeInfo.callerStates.count(caller) &&
+           "Referenced callee is not fully/partially applied in the caller?!");
+
+    // Then remove the caller from this specific callee's info struct
+    // and to be conservative mark the callee as potentially having an
+    // escaping use that we do not understand.
+    calleeInfo.callerStates.erase(caller);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                          CallerAnalysis YAML Dump
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+using llvm::yaml::IO;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::Output;
+using llvm::yaml::ScalarEnumerationTraits;
+using llvm::yaml::SequenceTraits;
+
+/// A special struct that marshals call graph state into a form that
+/// is easy for llvm's yaml i/o to dump. Its structure is meant to
+/// correspond to how the data should be shown by the printer, so
+/// naturally it is slightly redundant.
+struct YAMLCallGraphNode {
+  StringRef calleeName;
+  bool hasCaller;
+  unsigned minPartialAppliedArgs;
+  bool hasOnlyCompleteDirectCallerSets;
+  bool hasAllCallers;
+  std::vector<StringRef> partialAppliers;
+  std::vector<StringRef> fullAppliers;
+
+  YAMLCallGraphNode() = delete;
+  ~YAMLCallGraphNode() = default;
+
+  /// Deleted copy constructor. This is a move only data structure.
+  YAMLCallGraphNode(const YAMLCallGraphNode &) = delete;
+  YAMLCallGraphNode(YAMLCallGraphNode &&) = default;
+  YAMLCallGraphNode &operator=(const YAMLCallGraphNode &) = delete;
+  YAMLCallGraphNode &operator=(YAMLCallGraphNode &&) = default;
+
+  YAMLCallGraphNode(StringRef calleeName, bool hasCaller,
+                    unsigned minPartialAppliedArgs,
+                    bool hasOnlyCompleteDirectCallerSets, bool hasAllCallers,
+                    std::vector<StringRef> &&partialAppliers,
+                    std::vector<StringRef> &&fullAppliers)
+      : calleeName(calleeName), hasCaller(hasCaller),
+        minPartialAppliedArgs(minPartialAppliedArgs),
+        hasOnlyCompleteDirectCallerSets(hasOnlyCompleteDirectCallerSets),
+        hasAllCallers(hasAllCallers),
+        partialAppliers(std::move(partialAppliers)),
+        fullAppliers(std::move(fullAppliers)) {}
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<YAMLCallGraphNode> {
+  static void mapping(IO &io, YAMLCallGraphNode &func) {
+    io.mapRequired("calleeName", func.calleeName);
+    io.mapRequired("hasCaller", func.hasCaller);
+    io.mapRequired("minPartialAppliedArgs", func.minPartialAppliedArgs);
+    io.mapRequired("hasOnlyCompleteDirectCallerSets",
+                   func.hasOnlyCompleteDirectCallerSets);
+    io.mapRequired("hasAllCallers", func.hasAllCallers);
+    io.mapRequired("partialAppliers", func.partialAppliers);
+    io.mapRequired("fullAppliers", func.fullAppliers);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+void CallerAnalysis::dump() const { print(llvm::errs()); }
+
+void CallerAnalysis::print(const char *filePath) const {
+  using namespace llvm::sys;
+  std::error_code error;
+  llvm::raw_fd_ostream fileOutputStream(filePath, error, fs::F_Text);
+  if (error) {
+    llvm::errs() << "Failed to open path \"" << filePath << "\" for writing.!";
+    llvm_unreachable("default error handler");
+  }
+  print(fileOutputStream);
 }
 
-void CallerAnalysis::invalidateExistingCalleeRelation(SILFunction *F) {
-  FunctionInfo &CallerInfo = FuncInfos[F];
-  for (auto Callee : CallerInfo.Callees) {
-    FunctionInfo &CalleeInfo = FuncInfos[Callee];
-    CalleeInfo.Callers.erase(F);
-    CalleeInfo.PartialAppliers.erase(F);
+void CallerAnalysis::print(llvm::raw_ostream &os) const {
+  llvm::yaml::Output yout(os);
+
+  // NOTE: We purposely do not iterate over our internal state here to ensure
+  // that we dump for all functions and that we dump the state we have stored
+  // with the functions in module order.
+  for (auto &f : mod) {
+    const auto &fi = getCallerInfo(&f);
+
+    std::vector<StringRef> partialAppliers;
+    std::vector<StringRef> fullAppliers;
+    for (auto &apply : fi.getAllReferencingCallers()) {
+      if (apply.second.hasFullApply) {
+        fullAppliers.push_back(apply.first->getName());
+      }
+      if (apply.second.numPartialAppliedArguments.hasValue()) {
+        partialAppliers.push_back(apply.first->getName());
+      }
+    }
+
+    YAMLCallGraphNode node(
+        f.getName(), fi.hasCaller(), fi.getMinPartialAppliedArgs(),
+        fi.hasOnlyCompleteDirectCallerSets(), fi.hasAllCallers(),
+        std::move(partialAppliers), std::move(fullAppliers));
+    yout << node;
   }
 }
 
 //===----------------------------------------------------------------------===//
 //                              Main Entry Point
 //===----------------------------------------------------------------------===//
-SILAnalysis *swift::createCallerAnalysis(SILModule *M) {
-  return new CallerAnalysis(M);
+
+SILAnalysis *swift::createCallerAnalysis(SILModule *mod) {
+  return new CallerAnalysis(mod);
 }
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
index 69bb6dd3561aa..c57706ccfd385 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
@@ -755,7 +755,7 @@ class FunctionSignatureOpts : public SILFunctionTransform {
       return;
     }
 
-    CallerAnalysis *CA = PM->getAnalysis<CallerAnalysis>();
+    const CallerAnalysis *CA = PM->getAnalysis<CallerAnalysis>();
     const CallerAnalysis::FunctionInfo &FuncInfo = CA->getCallerInfo(F);
 
     // Check the signature of F to make sure that it is a function that we
diff --git a/lib/SILOptimizer/UtilityPasses/CallerAnalysisPrinter.cpp b/lib/SILOptimizer/UtilityPasses/CallerAnalysisPrinter.cpp
index e150b4f3f2d2e..dc9ae67397035 100644
--- a/lib/SILOptimizer/UtilityPasses/CallerAnalysisPrinter.cpp
+++ b/lib/SILOptimizer/UtilityPasses/CallerAnalysisPrinter.cpp
@@ -14,10 +14,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "swift/SILOptimizer/Analysis/CallerAnalysis.h"
 #include "swift/SIL/SILFunction.h"
 #include "swift/SIL/SILModule.h"
+#include "swift/SILOptimizer/Analysis/CallerAnalysis.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace swift;
@@ -26,19 +27,15 @@ using namespace swift;
 
 namespace {
 
+/// A pass that dumps the caller analysis state in yaml form. Intended to allow
+/// for visualizing of the caller analysis via external data visualization and
+/// analysis programs.
 class CallerAnalysisPrinterPass : public SILModuleTransform {
   /// The entry point to the transformation.
   void run() override {
     auto *CA = getAnalysis<CallerAnalysis>();
-    for (auto &F : *getModule()) {
-      const CallerAnalysis::FunctionInfo &FI = CA->getCallerInfo(&F);
-      const char *hasCaller = FI.hasCaller() ? "true" : "false";
-      llvm::outs() << "Function " << F.getName() << " has caller: "
-                   << hasCaller << ", partial applied args = "
-                   << FI.getMinPartialAppliedArgs() << "\n";
-    }
+    CA->print(llvm::outs());
   }
-
 };
 
 } // end anonymous namespace
diff --git a/test/SILOptimizer/caller_analysis.sil b/test/SILOptimizer/caller_analysis.sil
new file mode 100644
index 0000000000000..d5fc85122370a
--- /dev/null
+++ b/test/SILOptimizer/caller_analysis.sil
@@ -0,0 +1,519 @@
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -caller-analysis-printer -o /dev/null | %FileCheck --check-prefix=CHECK %s
+
+sil_stage canonical
+
+import Builtin
+
+// CHECK-LABEL: calleeName:      dead_func
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil hidden @dead_func : $@convention(thin) () -> () {
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      call_top
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil hidden @call_top : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @call_middle : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      call_middle
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - call_top
+// CHECK-NEXT:  ...
+sil hidden @call_middle : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @call_bottom : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      call_bottom
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - call_middle
+// CHECK-NEXT:  ...
+sil hidden @call_bottom : $@convention(thin) () -> () {
+bb0:
+  %0 = tuple ()
+  return %0 : $()
+}
+
+// CHECK-LABEL: calleeName:      self_recursive_func
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - self_recursive_func
+// CHECK-NEXT:  ...
+sil hidden @self_recursive_func : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @self_recursive_func : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      mutual_recursive_func1
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - mutual_recursive_func2
+// CHECK-NEXT:  ...
+sil hidden @mutual_recursive_func1 : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @mutual_recursive_func2 : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      mutual_recursive_func2
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - mutual_recursive_func1
+// CHECK-NEXT:  ...
+sil hidden @mutual_recursive_func2 : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @mutual_recursive_func1 : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      multi_called
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - multi_calles
+// CHECK-NEXT:  ...
+sil hidden @multi_called : $@convention(thin) () -> () {
+bb0:
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      multi_calles
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil hidden @multi_calles : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @multi_called : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  cond_br undef, bb1, bb2
+bb1:
+  %2 = apply %0() : $@convention(thin) () -> ()
+  br bb3
+bb2:
+  %3 = apply %0() : $@convention(thin) () -> ()
+  br bb3
+bb3:
+  %4 = tuple ()
+  return %4 : $()
+}
+
+// CHECK-LABEL: calleeName:      multi_callers
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - multi_caller1
+// CHECK-NEXT:    - multi_caller2
+// CHECK-NEXT:  ...
+sil hidden @multi_callers : $@convention(thin) () -> () {
+bb0:
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      multi_caller1
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil hidden @multi_caller1 : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @multi_callers : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// CHECK-LABEL: calleeName:      multi_caller2
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil hidden @multi_caller2 : $@convention(thin) () -> () {
+bb0:
+  %0 = function_ref @multi_callers : $@convention(thin) () -> ()
+  %1 = apply %0() : $@convention(thin) () -> ()
+  %2 = tuple ()
+  return %2 : $()
+}
+
+// This doesn't have all the direct caller sets since we return the
+// partial_apply.
+//
+// CHECK-LABEL: calleeName:      closure1
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 1
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: false
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_one_arg
+// CHECK-NEXT:    - partial_apply_two_args1
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      closure2
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: false
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_two_args2
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @closure2 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      partial_apply_one_arg
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @partial_apply_one_arg : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned (Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  return %2 : $@callee_owned (Builtin.Int32) -> Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      partial_apply_two_args1
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @partial_apply_two_args1 : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned () -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  return %2 : $@callee_owned () -> Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      partial_apply_two_args2
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @partial_apply_two_args2 : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned () -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @closure2 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  return %2 : $@callee_owned () -> Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      called_closure
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  ...
+sil @called_closure : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+sil @partial_apply_that_is_applied : $@convention(thin) (Builtin.Int32) -> () {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @called_closure : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %3 = apply %2() : $@callee_owned () -> Builtin.Int32
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We should ignore destroys in the fullness of time. Once we handle that
+// correctly, we should have the complete caller set here.
+//
+// CHECK-LABEL: calleeName:      called_closure_then_destroy
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  ...
+sil @called_closure_then_destroy : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+sil @partial_apply_that_is_applied_and_destroyed : $@convention(thin) (Builtin.Int32) -> () {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @called_closure_then_destroy : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %3 = apply %2() : $@callee_owned () -> Builtin.Int32
+  strong_release %2 : $@callee_owned () -> Builtin.Int32
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: calleeName:      called_escaping_closure
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: false
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  ...
+sil @called_escaping_closure : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+sil @partial_apply_that_is_applied_and_escapes : $@convention(thin) (Builtin.Int32) -> @owned @callee_guaranteed () -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @called_escaping_closure : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply [callee_guaranteed] %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %3 = apply %2() : $@callee_guaranteed () -> Builtin.Int32
+  return %2 : $@callee_guaranteed () -> Builtin.Int32
+}
+
+// Make sure that we ignore strong_retain.
+//
+// CHECK-LABEL: calleeName:      called_closure_then_copy_destroy
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - partial_apply_that_is_applied
+// CHECK-NEXT:  ...
+sil @called_closure_then_copy_destroy : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+sil @partial_apply_that_is_applied_and_copy_destroy : $@convention(thin) (Builtin.Int32) -> () {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @called_closure_then_copy_destroy : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  strong_retain %2 : $@callee_owned () -> Builtin.Int32
+  retain_value %2 : $@callee_owned () -> Builtin.Int32
+  %3 = apply %2() : $@callee_owned () -> Builtin.Int32
+  release_value %2 : $@callee_owned () -> Builtin.Int32
+  strong_release %2 : $@callee_owned () -> Builtin.Int32
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We should ignore escapes of non-escaping partial applies in the fullness of
+// time. Once we handle that correctly, we should have the complete caller set
+// here as well as an application. This would require us to have a flow
+// sensitive callgraph analysis.
+//
+// CHECK-LABEL: calleeName:      noescape_callee
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 2
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: false
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:    - partial_apply_that_is_applied_and_passed_noescape
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @noescape_callee : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
+bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
+  return %0 : $Builtin.Int32
+}
+
+// CHECK-LABEL: calleeName:      noescape_caller
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   true
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - partial_apply_that_is_applied_and_passed_noescape
+// CHECK-NEXT:    - thin_to_thick_is_applied_and_passed_noescape
+// CHECK-NEXT:  ...
+sil @noescape_caller : $@convention(thin) (@noescape @callee_owned () -> Builtin.Int32) -> () {
+bb0(%0 : $@noescape @callee_owned () -> Builtin.Int32):
+  %1 = apply %0() : $@noescape @callee_owned () -> Builtin.Int32
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+sil @partial_apply_that_is_applied_and_passed_noescape : $@convention(thin) (Builtin.Int32) -> () {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @noescape_callee : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
+  %3 = convert_escape_to_noescape %2 : $@callee_owned () -> Builtin.Int32 to $@noescape @callee_owned () -> Builtin.Int32
+  %4 = function_ref @noescape_caller : $@convention(thin) (@noescape @callee_owned () -> Builtin.Int32) -> ()
+  apply %4(%3) : $@convention(thin) (@noescape @callee_owned () -> Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: calleeName:      noescape_callee2
+// CHECK-NEXT:  hasCaller:       false
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: false
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:  ...
+sil @noescape_callee2 : $@convention(thin) () -> Builtin.Int32 {
+bb0:
+  return undef : $Builtin.Int32
+}
+
+sil @thin_to_thick_is_applied_and_passed_noescape : $@convention(thin) (Builtin.Int32) -> () {
+bb0(%0 : $Builtin.Int32):
+  %1 = function_ref @noescape_callee2 : $@convention(thin) () -> Builtin.Int32
+  %2 = thin_to_thick_function %1 : $@convention(thin) () -> Builtin.Int32 to $@callee_owned () -> Builtin.Int32
+  %3 = convert_escape_to_noescape %2 : $@callee_owned () -> Builtin.Int32 to $@noescape @callee_owned () -> Builtin.Int32
+  %4 = function_ref @noescape_caller : $@convention(thin) (@noescape @callee_owned () -> Builtin.Int32) -> ()
+  apply %4(%3) : $@convention(thin) (@noescape @callee_owned () -> Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+class Klass {
+  @_silgen_name("called_method")
+  func doSomething() {}
+
+  @_silgen_name("final_called_method")
+  final func finalDoSomething() {}
+
+}
+
+// Check that we know that we have a complete direct caller set, but that we do
+// not have all callers since based on our trivial heuristic today, we always
+// assume methods could be called indirectly.
+//
+// CHECK-LABEL: calleeName:      called_method
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - apply_called_method
+// CHECK-NEXT:  ...
+sil @called_method : $@convention(method) (@guaranteed Klass) -> () {
+bb0(%0 : $Klass):
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+sil @apply_called_method : $@convention(thin) (@guaranteed Klass) -> () {
+bb0(%0 : $Klass):
+  %1 = function_ref @called_method : $@convention(method) (@guaranteed Klass) -> ()
+  apply %1(%0) : $@convention(method) (@guaranteed Klass) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// Once we understand final, in the fullness of time we should find all callers
+// in this example. Today we do not though.
+//
+// CHECK-LABEL: calleeName:      final_called_method
+// CHECK-NEXT:  hasCaller:       true
+// CHECK-NEXT:  minPartialAppliedArgs: 0
+// CHECK-NEXT:  hasOnlyCompleteDirectCallerSets: true
+// CHECK-NEXT:  hasAllCallers:   false
+// CHECK-NEXT:  partialAppliers: 
+// CHECK-NEXT:  fullAppliers:    
+// CHECK-NEXT:    - final_apply_called_method
+// CHECK-NEXT:  ...
+sil @final_called_method : $@convention(method) (@guaranteed Klass) -> () {
+bb0(%0 : $Klass):
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+sil @final_apply_called_method : $@convention(thin) (@guaranteed Klass) -> () {
+bb0(%0 : $Klass):
+  %1 = function_ref @final_called_method : $@convention(method) (@guaranteed Klass) -> ()
+  apply %1(%0) : $@convention(method) (@guaranteed Klass) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
diff --git a/test/SILOptimizer/caller_analysis_printer.sil b/test/SILOptimizer/caller_analysis_printer.sil
deleted file mode 100644
index 3d43a0ce1ea66..0000000000000
--- a/test/SILOptimizer/caller_analysis_printer.sil
+++ /dev/null
@@ -1,149 +0,0 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -caller-analysis-printer -o /dev/null | %FileCheck --check-prefix=CHECK %s
-
-// CHECK: Function dead_func has caller: false, partial applied args = 0
-// CHECK: Function call_top has caller: false, partial applied args = 0
-// CHECK: Function call_middle has caller: true, partial applied args = 0
-// CHECK: Function call_bottom has caller: true, partial applied args = 0
-// CHECK: Function self_recursive_func has caller: true, partial applied args = 0
-// CHECK: Function mutual_recursive_func1 has caller: true, partial applied args = 0
-// CHECK: Function mutual_recursive_func2 has caller: true, partial applied args = 0
-// CHECK: Function multi_called has caller: true, partial applied args = 0
-// CHECK: Function multi_calles has caller: false, partial applied args = 0
-// CHECK: Function multi_callers has caller: true, partial applied args = 0
-// CHECK: Function multi_caller1 has caller: false, partial applied args = 0
-// CHECK: Function multi_caller2 has caller: false, partial applied args = 0
-// CHECK: Function closure1 has caller: false, partial applied args = 1
-// CHECK: Function closure2 has caller: false, partial applied args = 2
-// CHECK: Function partial_apply_one_arg has caller: false, partial applied args = 0
-// CHECK: Function partial_apply_two_args1 has caller: false, partial applied args = 0
-// CHECK: Function partial_apply_two_args2 has caller: false, partial applied args = 0
-
-sil_stage canonical
-
-import Builtin
-
-sil hidden @dead_func : $@convention(thin) () -> () {
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @call_top : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @call_middle : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @call_middle : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @call_bottom : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @call_bottom : $@convention(thin) () -> () {
-bb0:
-  %0 = tuple ()
-  return %0 : $()
-}
-
-sil hidden @self_recursive_func : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @self_recursive_func : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @mutual_recursive_func1 : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @mutual_recursive_func2 : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @mutual_recursive_func2 : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @mutual_recursive_func1 : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @multi_called : $@convention(thin) () -> () {
-bb0:
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @multi_calles : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @multi_called : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  cond_br undef, bb1, bb2
-bb1:
-  %2 = apply %0() : $@convention(thin) () -> ()
-  br bb3
-bb2:
-  %3 = apply %0() : $@convention(thin) () -> ()
-  br bb3
-bb3:
-  %4 = tuple ()
-  return %4 : $()
-}
-
-sil hidden @multi_callers : $@convention(thin) () -> () {
-bb0:
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @multi_caller1 : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @multi_callers : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil hidden @multi_caller2 : $@convention(thin) () -> () {
-bb0:
-  %0 = function_ref @multi_callers : $@convention(thin) () -> ()
-  %1 = apply %0() : $@convention(thin) () -> ()
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
-bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
-  return %0 : $Builtin.Int32
-}
-
-sil @closure2 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
-bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
-  return %0 : $Builtin.Int32
-}
-
-sil @partial_apply_one_arg : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned (Builtin.Int32) -> Builtin.Int32 {
-bb0(%0 : $Builtin.Int32):
-  %1 = function_ref @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  %2 = partial_apply %1(%0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  return %2 : $@callee_owned (Builtin.Int32) -> Builtin.Int32
-}
-
-sil @partial_apply_two_args1 : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned () -> Builtin.Int32 {
-bb0(%0 : $Builtin.Int32):
-  %1 = function_ref @closure1 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  return %2 : $@callee_owned () -> Builtin.Int32
-}
-
-sil @partial_apply_two_args2 : $@convention(thin) (Builtin.Int32) -> @owned @callee_owned () -> Builtin.Int32 {
-bb0(%0 : $Builtin.Int32):
-  %1 = function_ref @closure2 : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  %2 = partial_apply %1(%0, %0) : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32
-  return %2 : $@callee_owned () -> Builtin.Int32
-}

From 176682a88cc8def1cc02b279d1caaaf1bec451d0 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 20 Jun 2018 14:23:16 -0700
Subject: [PATCH 03/12] [cmpcodesize] Require pyyaml so we can process yaml
 output from swift-demangle-yamldump.

---
 utils/cmpcodesize/setup.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/utils/cmpcodesize/setup.py b/utils/cmpcodesize/setup.py
index 5a92c8b39ce06..98303850a43cb 100644
--- a/utils/cmpcodesize/setup.py
+++ b/utils/cmpcodesize/setup.py
@@ -50,5 +50,8 @@
         'console_scripts': [
             'cmpcodesize = cmpcodesize:main',
         ],
-    }
+    },
+    install_requires=[
+        'pyyaml==3.12'
+    ],
 )

From 195bbeb7af399aa21b9b46aba2a4aa710edc09b2 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 6 Jun 2018 12:45:41 -0700
Subject: [PATCH 04/12] [func-sig-opts] Capitalize nominal type names in this
 test file.

---
 test/SILOptimizer/functionsigopts.sil | 349 +++++++++++++-------------
 1 file changed, 175 insertions(+), 174 deletions(-)

diff --git a/test/SILOptimizer/functionsigopts.sil b/test/SILOptimizer/functionsigopts.sil
index 4ef3bcb9a561d..e6c5027e85311 100644
--- a/test/SILOptimizer/functionsigopts.sil
+++ b/test/SILOptimizer/functionsigopts.sil
@@ -8,33 +8,33 @@ import Swift
 // Data Structures //
 /////////////////////
 
-class foo {
+class Foo {
   var a: Int
-   deinit
+  deinit
   init()
 }
 
-class bar {
-  var start: foo
-  var end: foo
-   deinit
+class Bar {
+  var start: Foo
+  var end: Foo
+  deinit
   init()
 }
 
-struct baz {
-  var start: foo
-  var end: foo
+struct Baz {
+  var start: Foo
+  var end: Foo
   init()
 }
 
-struct boo {
-  var tbaz = baz()
+struct Boo {
+  var tBaz = Baz()
   var a = 0
   init()
 }
 
-struct lotsoffield {
-  var tbaz = baz()
+struct LotsOfFields {
+  var tBaz = Baz()
   var a = 0
   var b = 0
   var c = 0
@@ -42,15 +42,15 @@ struct lotsoffield {
   init()
 }
 
-struct goo {
-  var left : foo
-  var right : foo
-  var top : foo
-  var bottom : foo
+struct Goo {
+  var left : Foo
+  var right : Foo
+  var top : Foo
+  var bottom : Foo
 }
 
 public protocol P {
-  func foo() -> Int64
+  func Foo() -> Int64
 }
 
 public protocol KlassFoo : class {
@@ -80,11 +80,11 @@ bb0:
 }
 
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @argument_with_incomplete_epilogue_release
-// CHECK: [[IN2:%.*]] = struct_extract [[IN1:%.*]] : $goo, #goo.top
-// CHECK: function_ref @$S41argument_with_incomplete_epilogue_releaseTf4x_nTf4gn_n : $@convention(thin) (@guaranteed foo, @owned foo) -> ()
+// CHECK: [[IN2:%.*]] = struct_extract [[IN1:%.*]] : $Goo, #Goo.top
+// CHECK: function_ref @$S41argument_with_incomplete_epilogue_releaseTf4x_nTf4gn_n : $@convention(thin) (@guaranteed Foo, @owned Foo) -> ()
 // CHECK: release_value [[IN2]]
-sil @argument_with_incomplete_epilogue_release : $@convention(thin) (@owned goo) -> () {
-bb0(%0 : $goo):
+sil @argument_with_incomplete_epilogue_release : $@convention(thin) (@owned Goo) -> () {
+bb0(%0 : $Goo):
   // make inline costs = 2
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -111,18 +111,18 @@ bb0(%0 : $goo):
 
 
 
-  %1 = struct_extract %0 : $goo, #goo.top
-  %2 = ref_element_addr %1 : $foo, #foo.a
+  %1 = struct_extract %0 : $Goo, #Goo.top
+  %2 = ref_element_addr %1 : $Foo, #Foo.a
   %3 = load %2 : $*Int
   %4 = function_ref @use_Int : $@convention(thin) (Int) -> ()
   apply %4(%3) : $@convention(thin) (Int) -> ()
 
-  %5 = struct_extract %0 : $goo, #goo.bottom
-  %6 = ref_element_addr %5 : $foo, #foo.a
+  %5 = struct_extract %0 : $Goo, #Goo.bottom
+  %6 = ref_element_addr %5 : $Foo, #Foo.a
   %7 = load %6 : $*Int
   apply %4(%7) : $@convention(thin) (Int) -> ()
 
-  release_value %1 : $foo
+  release_value %1 : $Foo
   %8 = tuple ()
   return %8 : $()
 }
@@ -136,15 +136,15 @@ bb0(%0 : $goo):
 sil [serialized] @user : $@convention(thin) (Builtin.NativeObject) -> ()
 sil [serialized] @create_object : $@convention(thin) () -> Builtin.NativeObject
 
-// Make sure argument is exploded and the baz part is not passed in as argument, as its only use
+// Make sure argument is exploded and the Baz part is not passed in as argument, as its only use
 // is a release.
 //
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @dead_argument_due_to_only_release_user
 // CHECK: [[IN:%.*]] = function_ref @$S38dead_argument_due_to_only_release_userTf4gX_n
-// CHECK: [[IN2:%.*]] = struct_extract [[IN1:%.*]] : $boo, #boo.a
+// CHECK: [[IN2:%.*]] = struct_extract [[IN1:%.*]] : $Boo, #Boo.a
 // CHECK: apply [[IN]]([[IN2]])
-sil @dead_argument_due_to_only_release_user : $@convention(thin) (@owned boo) -> (Int, Int) {
-bb0(%0 : $boo):
+sil @dead_argument_due_to_only_release_user : $@convention(thin) (@owned Boo) -> (Int, Int) {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -169,23 +169,23 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  %1 = struct_extract %0 : $boo, #boo.tbaz
-  %2 = struct_extract %0 : $boo, #boo.a
-  release_value  %1 : $baz
+  %1 = struct_extract %0 : $Boo, #Boo.tBaz
+  %2 = struct_extract %0 : $Boo, #Boo.a
+  release_value  %1 : $Baz
   %4 = tuple (%2  : $Int, %2 : $Int)
   return %4 : $(Int, Int)
 }
 
-// Make sure argument is exploded and the baz part is not passed in as argument, as its only use
+// Make sure argument is exploded and the Baz part is not passed in as argument, as its only use
 // is a release.
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @dead_argument_due_to_only_release_user_but__exploded
 // CHECK: [[FN1:%.*]] = function_ref @$S52dead_argument_due_to_only_release_user_but__explodedTf4gX_n
-// CHECK: [[IN1:%.*]] = struct_extract %0 : $lotsoffield, #lotsoffield.c
-// CHECK: [[IN2:%.*]] = struct_extract %0 : $lotsoffield, #lotsoffield.b
-// CHECK: [[IN3:%.*]] = struct_extract %0 : $lotsoffield, #lotsoffield.a
+// CHECK: [[IN1:%.*]] = struct_extract %0 : $LotsOfFields, #LotsOfFields.c
+// CHECK: [[IN2:%.*]] = struct_extract %0 : $LotsOfFields, #LotsOfFields.b
+// CHECK: [[IN3:%.*]] = struct_extract %0 : $LotsOfFields, #LotsOfFields.a
 // CHECK: apply [[FN1]]([[IN3]], [[IN2]], [[IN1]])
-sil @dead_argument_due_to_only_release_user_but__exploded : $@convention(thin) (@owned lotsoffield) -> (Int, Int, Int) {
-bb0(%0 : $lotsoffield):
+sil @dead_argument_due_to_only_release_user_but__exploded : $@convention(thin) (@owned LotsOfFields) -> (Int, Int, Int) {
+bb0(%0 : $LotsOfFields):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -210,21 +210,22 @@ bb0(%0 : $lotsoffield):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  %1 = struct_extract %0 : $lotsoffield, #lotsoffield.tbaz
-  %2 = struct_extract %0 : $lotsoffield, #lotsoffield.a
-  %3 = struct_extract %0 : $lotsoffield, #lotsoffield.b
-  %4 = struct_extract %0 : $lotsoffield, #lotsoffield.c
-  release_value  %1 : $baz
+  %1 = struct_extract %0 : $LotsOfFields, #LotsOfFields.tBaz
+  %2 = struct_extract %0 : $LotsOfFields, #LotsOfFields.a
+  %3 = struct_extract %0 : $LotsOfFields, #LotsOfFields.b
+  %4 = struct_extract %0 : $LotsOfFields, #LotsOfFields.c
+  release_value  %1 : $Baz
   %5 = tuple (%2  : $Int, %3 : $Int, %4 : $Int)
   return %5 : $(Int, Int, Int)
 }
 
-// Make sure argument is exploded and the baz part is not passed in as argument, as its only use
-// is a release.
+// Since this is a value that contains only a singular owned type, there is no
+// point from an ARC perspective in splitting it up. We still want to perform owned to guaranteed though.
+//
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @dead_argument_due_to_more_than_release_user
-// CHECK: [[FN1:%.*]] = function_ref @$S43dead_argument_due_to_more_than_release_userTf4gX_n : $@convention(thin) (@guaranteed baz, Int) -> (Int, Int)
-sil @dead_argument_due_to_more_than_release_user : $@convention(thin) (@owned boo) -> (Int, Int) {
-bb0(%0 : $boo):
+// CHECK: [[FN1:%.*]] = function_ref @$S43dead_argument_due_to_more_than_release_userTf4g_n : $@convention(thin) (@guaranteed Boo) -> (Int, Int)
+sil @dead_argument_due_to_more_than_release_user : $@convention(thin) (@owned Boo) -> (Int, Int) {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -249,10 +250,10 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  %1 = struct_extract %0 : $boo, #boo.tbaz
-  %2 = struct_extract %0 : $boo, #boo.a
-  retain_value %1 : $baz
-  release_value %1 : $baz
+  %1 = struct_extract %0 : $Boo, #Boo.tBaz
+  %2 = struct_extract %0 : $Boo, #Boo.a
+  retain_value %1 : $Baz
+  release_value %1 : $Baz
   %4 = tuple (%2  : $Int, %2 : $Int)
   return %4 : $(Int, Int)
 }
@@ -304,12 +305,12 @@ bb3:
 // Make sure %0 is a dead argument.
 //
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @exploded_release_to_dead_argument
-// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $boo):
+// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $Boo):
 // CHECK: [[IN1:%.*]] = function_ref @$S33exploded_release_to_dead_argumentTf4d_n
 // CHECK: apply [[IN1]]()
 // CHECK: release_value [[INPUT_ARG0]]
-sil @exploded_release_to_dead_argument : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
+sil @exploded_release_to_dead_argument : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -334,11 +335,11 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  %1 = struct_extract %0 : $boo, #boo.tbaz
-  %2 = struct_extract %1 : $baz, #baz.start
-  %3 = struct_extract %1 : $baz, #baz.end
-  release_value  %2 : $foo
-  release_value  %3 : $foo
+  %1 = struct_extract %0 : $Boo, #Boo.tBaz
+  %2 = struct_extract %1 : $Baz, #Baz.start
+  %3 = struct_extract %1 : $Baz, #Baz.end
+  release_value  %2 : $Foo
+  release_value  %3 : $Foo
   %f = function_ref @update_global: $@convention(thin) () -> ()
   apply %f() : $@convention(thin) () -> ()
   %4 = tuple ()
@@ -349,11 +350,11 @@ bb0(%0 : $boo):
 // Make sure %0 is not a dead argument, but gets converted to a guaranteed arg.
 //
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @exploded_release_to_guaranteed_param
-// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $boo):
+// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $Boo):
 // CHECK: [[IN1:%.*]] = function_ref @$S36exploded_release_to_guaranteed_paramTf4gX_n
 // CHECK: release_value [[INPUT_ARG0]]
-sil @exploded_release_to_guaranteed_param : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
+sil @exploded_release_to_guaranteed_param : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -378,25 +379,25 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  %1 = struct_extract %0 : $boo, #boo.tbaz
-  %2 = struct_extract %1 : $baz, #baz.start
-  %3 = struct_extract %1 : $baz, #baz.end
-  %4 = struct_extract %0 : $boo, #boo.a
+  %1 = struct_extract %0 : $Boo, #Boo.tBaz
+  %2 = struct_extract %1 : $Baz, #Baz.start
+  %3 = struct_extract %1 : $Baz, #Baz.end
+  %4 = struct_extract %0 : $Boo, #Boo.a
   %5 = function_ref @use_Int : $@convention(thin) (Int) -> ()
   apply %5(%4) : $@convention(thin) (Int) -> ()
-  release_value  %2 : $foo
-  release_value  %3 : $foo
+  release_value  %2 : $Foo
+  release_value  %3 : $Foo
   %6 = tuple ()
   return %6 : $()
 }
 
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @single_owned_return_value
-// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $boo):
+// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $Boo):
 // CHECK: [[IN1:%.*]] = function_ref @$S25single_owned_return_valueTf4n_g
 // CHECK: [[IN2:%.*]] = apply [[IN1]]([[INPUT_ARG0]]
 // CHECK: retain_value [[IN2]]
-sil @single_owned_return_value : $@convention(thin) (@owned boo) -> @owned boo {
-bb0(%0 : $boo):
+sil @single_owned_return_value : $@convention(thin) (@owned Boo) -> @owned Boo {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -421,8 +422,8 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  retain_value %0 : $boo
-  return %0 : $boo
+  retain_value %0 : $Boo
+  return %0 : $Boo
 }
 
 
@@ -430,8 +431,8 @@ bb0(%0 : $boo):
 // CHECK: function_ref @$S45single_owned_return_value_with_self_recursionTf4n_g
 // CHECK: [[RET:%.*]] = apply
 // CHECK: retain_value [[RET]]
-sil @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned boo) -> @owned boo {
-bb0(%0 : $boo):
+sil @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned Boo) -> @owned Boo {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -458,23 +459,23 @@ bb0(%0 : $boo):
 
   cond_br undef, bb1, bb2
 bb1:
-  retain_value %0 : $boo
-  br bb3(%0 : $boo)
+  retain_value %0 : $Boo
+  br bb3(%0 : $Boo)
 bb2:
-  %2 = function_ref @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned boo) -> @owned boo
-  %3 = apply %2(%0) : $@convention(thin) (@owned boo) -> @owned boo
-  br bb3 (%3 : $boo)
-bb3(%4 : $boo):
-  return %4 : $boo
+  %2 = function_ref @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned Boo) -> @owned Boo
+  %3 = apply %2(%0) : $@convention(thin) (@owned Boo) -> @owned Boo
+  br bb3 (%3 : $Boo)
+bb3(%4 : $Boo):
+  return %4 : $Boo
 }
 
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @single_owned_return_value_with_interfering_release
-// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $boo):
+// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $Boo):
 // CHECK: [[IN1:%.*]] = function_ref @$S50single_owned_return_value_with_interfering_releaseTf4x_nTf4gnn_n
 // CHECK-NOT: retain_value
 // CHECK: return
-sil @single_owned_return_value_with_interfering_release : $@convention(thin) (@owned boo) ->  boo {
-bb0(%0 : $boo):
+sil @single_owned_return_value_with_interfering_release : $@convention(thin) (@owned Boo) ->  Boo {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -499,22 +500,22 @@ bb0(%0 : $boo):
   %c21 = builtin "assert_configuration"() : $Builtin.Int32
   %c22 = builtin "assert_configuration"() : $Builtin.Int32
 
-  retain_value %0 : $boo
-  %1 = struct_extract %0 : $boo, #boo.tbaz
-  %2 = struct_extract %1 : $baz, #baz.start
-  release_value %2: $foo
-  return %0 : $boo
+  retain_value %0 : $Boo
+  %1 = struct_extract %0 : $Boo, #Boo.tBaz
+  %2 = struct_extract %1 : $Baz, #Baz.start
+  release_value %2: $Foo
+  return %0 : $Boo
 }
 
 // Make sure we do not move the retain_value in the throw block.
 //
-// CHECK-LABEL: sil [serialized] [signature_optimized_thunk] [always_inline] @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned boo) -> (@owned boo, @error Error) {
-// CHECK: function_ref @$S41owned_to_unowned_retval_with_error_resultTfq4n_g : $@convention(thin) (@owned boo) -> (boo, @error Error)
+// CHECK-LABEL: sil [serialized] [signature_optimized_thunk] [always_inline] @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned Boo) -> (@owned Boo, @error Error) {
+// CHECK: function_ref @$S41owned_to_unowned_retval_with_error_resultTfq4n_g : $@convention(thin) (@owned Boo) -> (Boo, @error Error)
 // CHECK: bb1
 // CHECK-NOT: retain_value
 // CHECK: bb2
-sil [serialized] @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned boo) -> (@owned boo, @error Error) {
-bb0(%0 : $boo):
+sil [serialized] @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned Boo) -> (@owned Boo, @error Error) {
+bb0(%0 : $Boo):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
   %c2 = builtin "assert_configuration"() : $Builtin.Int32
@@ -542,11 +543,11 @@ bb0(%0 : $boo):
   cond_br undef, bb1, bb2
 
 bb1:
-  retain_value %0 : $boo
-  return %0 : $boo
+  retain_value %0 : $Boo
+  return %0 : $Boo
 
 bb2:
-  retain_value %0 : $boo
+  retain_value %0 : $Boo
   throw undef : $Error
 }
 
@@ -690,11 +691,11 @@ bb0(%0 : $Builtin.NativeObject, %1 : $Builtin.NativeObject):
   return %5 : $()
 }
 
-sil [serialized] @exploded_release_to_guaranteed_param_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
-  %2 = function_ref @exploded_release_to_guaranteed_param : $@convention(thin) (@owned boo) -> ()
-  retain_value %0 : $boo
-  %4 = apply %2(%0) : $@convention(thin) (@owned boo) -> ()
+sil [serialized] @exploded_release_to_guaranteed_param_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
+  %2 = function_ref @exploded_release_to_guaranteed_param : $@convention(thin) (@owned Boo) -> ()
+  retain_value %0 : $Boo
+  %4 = apply %2(%0) : $@convention(thin) (@owned Boo) -> ()
   %5 = tuple()
   return %5 : $()
 }
@@ -707,27 +708,27 @@ bb0(%0 : $boo):
 // CHECK: [[RET:%.*]] = apply
 // CHECK: retain_value [[RET]]
 // CHECK: release_value [[RET]]
-sil @single_owned_return_value_with_self_recursion_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
-  %2 = function_ref @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned boo) -> @owned boo
-  %4 = apply %2(%0) : $@convention(thin) (@owned boo) -> @owned boo
-  release_value %4 : $boo
+sil @single_owned_return_value_with_self_recursion_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
+  %2 = function_ref @single_owned_return_value_with_self_recursion : $@convention(thin) (@owned Boo) -> @owned Boo
+  %4 = apply %2(%0) : $@convention(thin) (@owned Boo) -> @owned Boo
+  release_value %4 : $Boo
   %5 = tuple()
   return %5 : $()
 }
 
-sil [serialized] @exploded_release_to_dead_param_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
-  %2 = function_ref @exploded_release_to_dead_argument : $@convention(thin) (@owned boo) -> ()
-  retain_value %0 : $boo
-  %4 = apply %2(%0) : $@convention(thin) (@owned boo) -> ()
+sil [serialized] @exploded_release_to_dead_param_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
+  %2 = function_ref @exploded_release_to_dead_argument : $@convention(thin) (@owned Boo) -> ()
+  retain_value %0 : $Boo
+  %4 = apply %2(%0) : $@convention(thin) (@owned Boo) -> ()
   %5 = tuple()
   return %5 : $()
 }
 
 
-sil [serialized] @single_owned_return_value_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
+sil [serialized] @single_owned_return_value_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
   cond_br undef, bb1, bb2
 
 bb1:
@@ -737,14 +738,14 @@ bb2:
   br bb3
 
 bb3:
-  %2 = function_ref @single_owned_return_value : $@convention(thin) (@owned boo) -> @owned boo
-  %3 = apply %2(%0) : $@convention(thin) (@owned boo) -> @owned boo
+  %2 = function_ref @single_owned_return_value : $@convention(thin) (@owned Boo) -> @owned Boo
+  %3 = apply %2(%0) : $@convention(thin) (@owned Boo) -> @owned Boo
   %4 = tuple()
   return %4 : $()
 }
 
-sil [serialized] @single_owned_return_value_with_interfering_release_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
+sil [serialized] @single_owned_return_value_with_interfering_release_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
   cond_br undef, bb1, bb2
 
 bb1:
@@ -754,14 +755,14 @@ bb2:
   br bb3
 
 bb3:
-  %2 = function_ref @single_owned_return_value_with_interfering_release : $@convention(thin) (@owned boo) -> boo
-  %3 = apply %2(%0) : $@convention(thin) (@owned boo) -> boo
+  %2 = function_ref @single_owned_return_value_with_interfering_release : $@convention(thin) (@owned Boo) -> Boo
+  %3 = apply %2(%0) : $@convention(thin) (@owned Boo) -> Boo
   %4 = tuple()
   return %4 : $()
 }
 
-sil [serialized] @owned_to_unowned_retval_with_error_result_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
+sil [serialized] @owned_to_unowned_retval_with_error_result_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
   cond_br undef, bb1, bb2
 
 bb1:
@@ -771,10 +772,10 @@ bb2:
   br bb3
 
 bb3:
-  %2 = function_ref @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned boo) -> (@owned boo, @error Error)
-  try_apply %2(%0) : $@convention(thin) (@owned boo) -> (@owned boo, @error Error), normal bb4, error bb5
+  %2 = function_ref @owned_to_unowned_retval_with_error_result : $@convention(thin) (@owned Boo) -> (@owned Boo, @error Error)
+  try_apply %2(%0) : $@convention(thin) (@owned Boo) -> (@owned Boo, @error Error), normal bb4, error bb5
 
-bb4(%99 : $boo):
+bb4(%99 : $Boo):
   %4 = tuple()
   return %4 : $()
 
@@ -782,34 +783,34 @@ bb5(%100 : $Error):
   unreachable
 }
 
-sil [serialized] @dead_argument_due_to_only_release_user_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
-  %2 = function_ref @dead_argument_due_to_only_release_user : $@convention(thin) (@owned boo) -> (Int, Int)
-  %4 = apply %2(%0) : $@convention(thin) (@owned boo) -> (Int, Int)
+sil [serialized] @dead_argument_due_to_only_release_user_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
+  %2 = function_ref @dead_argument_due_to_only_release_user : $@convention(thin) (@owned Boo) -> (Int, Int)
+  %4 = apply %2(%0) : $@convention(thin) (@owned Boo) -> (Int, Int)
   %5 = tuple()
   return %5 : $()
 }
 
-sil [serialized] @dead_argument_due_to_only_release_user_but__exploded_callsite : $@convention(thin) (@owned lotsoffield) -> () {
-bb0(%0 : $lotsoffield):
-  %2 = function_ref @dead_argument_due_to_only_release_user_but__exploded : $@convention(thin) (@owned lotsoffield) -> (Int, Int, Int)
-  %4 = apply %2(%0) : $@convention(thin) (@owned lotsoffield) -> (Int, Int, Int)
+sil [serialized] @dead_argument_due_to_only_release_user_but__exploded_callsite : $@convention(thin) (@owned LotsOfFields) -> () {
+bb0(%0 : $LotsOfFields):
+  %2 = function_ref @dead_argument_due_to_only_release_user_but__exploded : $@convention(thin) (@owned LotsOfFields) -> (Int, Int, Int)
+  %4 = apply %2(%0) : $@convention(thin) (@owned LotsOfFields) -> (Int, Int, Int)
   %5 = tuple()
   return %5 : $()
 }
 
-sil [serialized] @dead_argument_due_to_more_than_release_user_callsite : $@convention(thin) (@owned boo) -> () {
-bb0(%0 : $boo):
-  %2 = function_ref @dead_argument_due_to_more_than_release_user : $@convention(thin) (@owned boo) -> (Int, Int)
-  %4 = apply %2(%0) : $@convention(thin) (@owned boo) -> (Int, Int)
+sil [serialized] @dead_argument_due_to_more_than_release_user_callsite : $@convention(thin) (@owned Boo) -> () {
+bb0(%0 : $Boo):
+  %2 = function_ref @dead_argument_due_to_more_than_release_user : $@convention(thin) (@owned Boo) -> (Int, Int)
+  %4 = apply %2(%0) : $@convention(thin) (@owned Boo) -> (Int, Int)
   %5 = tuple()
   return %5 : $()
 }
 
-sil [serialized] @argument_with_incomplete_epilogue_release_callsite : $@convention(thin) (@owned goo) -> () {
-bb0(%0 : $goo):
-  %2 = function_ref @argument_with_incomplete_epilogue_release : $@convention(thin) (@owned goo) -> ()
-  %4 = apply %2(%0) : $@convention(thin) (@owned goo) -> ()
+sil [serialized] @argument_with_incomplete_epilogue_release_callsite : $@convention(thin) (@owned Goo) -> () {
+bb0(%0 : $Goo):
+  %2 = function_ref @argument_with_incomplete_epilogue_release : $@convention(thin) (@owned Goo) -> ()
+  %4 = apply %2(%0) : $@convention(thin) (@owned Goo) -> ()
   %5 = tuple()
   return %5 : $()
 }
@@ -1332,33 +1333,33 @@ bb0(%0 : $Builtin.NativeObject, %1 : $Builtin.NativeObject):
 
 // CHECK-NEGATIVE-NOT: sil {{.*}}_dont_explode_single_enum
 
-sil [noinline] @dont_explode_single_enum : $@convention(thin) (@owned Optional<(foo, foo)>) -> @owned foo {
-bb0(%0 : $Optional<(foo, foo)>):
-  %281 = unchecked_enum_data %0 : $Optional<(foo, foo)>, #Optional.some!enumelt.1
-  %282 = tuple_extract %281 : $(foo, foo), 0
-  %283 = tuple_extract %281 : $(foo, foo), 1
-  strong_release %283 : $foo
-  return %282 : $foo
+sil [noinline] @dont_explode_single_enum : $@convention(thin) (@owned Optional<(Foo, Foo)>) -> @owned Foo {
+bb0(%0 : $Optional<(Foo, Foo)>):
+  %281 = unchecked_enum_data %0 : $Optional<(Foo, Foo)>, #Optional.some!enumelt.1
+  %282 = tuple_extract %281 : $(Foo, Foo), 0
+  %283 = tuple_extract %281 : $(Foo, Foo), 1
+  strong_release %283 : $Foo
+  return %282 : $Foo
 }
 
 
 
 // CHECK-LABEL: sil @call_with_single_enum
-// CHECK: [[F:%[0-9]+]] = function_ref @dont_explode_single_enum : $@convention(thin) (@owned Optional<(foo, foo)>) -> @owned foo
+// CHECK: [[F:%[0-9]+]] = function_ref @dont_explode_single_enum : $@convention(thin) (@owned Optional<(Foo, Foo)>) -> @owned Foo
 // CHECK: apply [[F]](%0)
 // CHECK: return
-sil @call_with_single_enum : $@convention(thin) (@owned Optional<(foo, foo)>) -> @owned foo {
-bb0(%0 : $Optional<(foo, foo)>):
-  %f = function_ref @dont_explode_single_enum : $@convention(thin) (@owned Optional<(foo, foo)>) -> @owned foo
-  %a = apply %f(%0) : $@convention(thin) (@owned Optional<(foo, foo)>) -> @owned foo
-  return %a : $foo
+sil @call_with_single_enum : $@convention(thin) (@owned Optional<(Foo, Foo)>) -> @owned Foo {
+bb0(%0 : $Optional<(Foo, Foo)>):
+  %f = function_ref @dont_explode_single_enum : $@convention(thin) (@owned Optional<(Foo, Foo)>) -> @owned Foo
+  %a = apply %f(%0) : $@convention(thin) (@owned Optional<(Foo, Foo)>) -> @owned Foo
+  return %a : $Foo
 }
 
 
 // Check if externally available functions are optimized.
 
-sil public_external [noinline] @externally_available_with_dead_arg : $@convention(thin) (@guaranteed foo) -> () {
-bb0(%0 : $foo):
+sil public_external [noinline] @externally_available_with_dead_arg : $@convention(thin) (@guaranteed Foo) -> () {
+bb0(%0 : $Foo):
   %r = tuple()
   return %r : $()
 }
@@ -1367,10 +1368,10 @@ bb0(%0 : $foo):
 // CHECK: [[F:%[0-9]+]] = function_ref @$S34externally_available_with_dead_argTf4d_n : $@convention(thin) () -> ()
 // CHECK: apply [[F]]()
 // CHECK: return
-sil @call_externally_available : $@convention(thin) (@guaranteed foo) -> () {
-bb0(%0 : $foo):
-  %f = function_ref @externally_available_with_dead_arg : $@convention(thin) (@guaranteed foo) -> ()
-  %a = apply %f(%0) : $@convention(thin) (@guaranteed foo) -> ()
+sil @call_externally_available : $@convention(thin) (@guaranteed Foo) -> () {
+bb0(%0 : $Foo):
+  %f = function_ref @externally_available_with_dead_arg : $@convention(thin) (@guaranteed Foo) -> ()
+  %a = apply %f(%0) : $@convention(thin) (@guaranteed Foo) -> ()
   %r = tuple()
   return %r : $()
 }
@@ -1378,11 +1379,11 @@ bb0(%0 : $foo):
 
 // We should remove the array semantic from specialized calls.
 
-// CHECK-LABEL: sil [serialized] [signature_optimized_thunk] [always_inline] [_semantics "array.foobar"] @array_semantic : $@convention(method) (@owned Builtin.NativeObject) -> () {
+// CHECK-LABEL: sil [serialized] [signature_optimized_thunk] [always_inline] [_semantics "array.Foobar"] @array_semantic : $@convention(method) (@owned Builtin.NativeObject) -> () {
 // CHECK: [[FUNC_REF:%[0-9]+]] = function_ref @$S14array_semanticTfq4g_n : $@convention(thin) (@guaranteed Builtin.NativeObject) -> ()
 // CHECK: apply [[FUNC_REF]]
 // CHECK: release_value
-sil [serialized] [_semantics "array.foobar"] @array_semantic : $@convention(method) (@owned Builtin.NativeObject) -> () {
+sil [serialized] [_semantics "array.Foobar"] @array_semantic : $@convention(method) (@owned Builtin.NativeObject) -> () {
 bb0(%0 : $Builtin.NativeObject):
   // make it a non-trivial function
   %c1 = builtin "assert_configuration"() : $Builtin.Int32
@@ -1566,7 +1567,7 @@ bb0(%0 : $T):
 
 sil hidden [noinline] @generic_in_to_guaranteed : $@convention(thin) <T where T : P> (@in T) -> Int64 {
 bb0(%0 : $*T):
-  %2 = witness_method $T, #P.foo!1 : <Self where Self : P> (Self) -> () -> Int64 : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (@in_guaranteed τ_0_0) -> Int64
+  %2 = witness_method $T, #P.Foo!1 : <Self where Self : P> (Self) -> () -> Int64 : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (@in_guaranteed τ_0_0) -> Int64
   %3 = apply %2<T>(%0) : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (@in_guaranteed τ_0_0) -> Int64
   destroy_addr %0 : $*T
   return %3 : $Int64
@@ -1591,14 +1592,14 @@ bb0(%0 : $*T):
   return %15 : $Int64
 }
 
-// CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @generic_func_with_dead_non_generic_arg : $@convention(thin) <T> (@owned foo, @in T) -> ()
+// CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @generic_func_with_dead_non_generic_arg : $@convention(thin) <T> (@owned Foo, @in T) -> ()
 // CHECK: function_ref @$S027generic_func_with_dead_non_A4_argTf4dd_n : $@convention(thin) () -> ()
 // Call the specialization which is not polymorphic.
 // CHECK: apply
 // CHECK: destroy_addr
 // CHECK: end sil function 'generic_func_with_dead_non_generic_arg'
-sil [noinline] @generic_func_with_dead_non_generic_arg : $@convention(thin) <T> (@owned foo, @in T) -> () {
-bb0(%0 : $foo, %1 : $*T):
+sil [noinline] @generic_func_with_dead_non_generic_arg : $@convention(thin) <T> (@owned Foo, @in T) -> () {
+bb0(%0 : $Foo, %1 : $*T):
   destroy_addr %1 : $*T
   %r = tuple()
   return %r : $()
@@ -1723,19 +1724,19 @@ bb0(%0 : $*T):
 // CHECK-NOT: strong_release
 // CHECK: return
 
-// CHECK-LABEL: sil shared @$S25single_owned_return_valueTf4n_g : $@convention(thin) (@owned boo) -> boo
-// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $boo):
+// CHECK-LABEL: sil shared @$S25single_owned_return_valueTf4n_g : $@convention(thin) (@owned Boo) -> Boo
+// CHECK: bb0([[INPUT_ARG0:%[0-9]+]] : $Boo):
 // CHECK-NOT: retain_value
 // CHECK: return
 
 // There should not be a single retain in this function.
 //
-// CHECK-LABEL: sil shared @$S45single_owned_return_value_with_self_recursionTf4n_g : $@convention(thin) (@owned boo) -> boo
+// CHECK-LABEL: sil shared @$S45single_owned_return_value_with_self_recursionTf4n_g : $@convention(thin) (@owned Boo) -> Boo
 // CHECK: bb0
 // CHECK-NOT: retain_value
 // CHECK: return
 
-// CHECK-LABEL: @$S41owned_to_unowned_retval_with_error_resultTfq4n_g : $@convention(thin) (@owned boo) -> (boo, @error Error) {
+// CHECK-LABEL: @$S41owned_to_unowned_retval_with_error_resultTfq4n_g : $@convention(thin) (@owned Boo) -> (Boo, @error Error) {
 // CHECK: bb2
 // CHECK: retain_value
 // CHECK: throw

From 84a6e0cff5540dd32cc15286472bfa690f7106ba Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 20 Jun 2018 16:45:35 -0700
Subject: [PATCH 05/12] [func-sig-opts] Change the argument explosion heuristic
 to improve code-size.

The old heuristic was pretty simple:

1. If we had a type that was just a single type wrapped in several levels of
single type, do not optimize.

2. If we have an owned parameter for which we have /some/ releases, we always
tried to split up the parameter regardless of the number of leaf nodes that are
live.

3. Otherwise, if we have > 0 and <= 3 live leaf types, explode the parameter.

This was way too aggressive and in the case of 1 was working around a bug in
Projection Tree.

The new heuristic is guided by the following ideas:

1. We want to explode an argument if by exploding the argument we reduce the
amount of non-trivial leaf types passed into the function.

2. We want to explode an argument if by exploding the argument, we can guarantee
that o2g will occur and then when we run argument explosion a second time, we
will delete one of the exploded dead arguments.

3. We want to be very conservative about creating too many function arguments
since that can potentially increase code-size. Thus we always only want to
explode if we can guarantee that we will not create more than 3 live nodes.

The new heuristic results in better code-size/same performance:

rdar://39957093
---
 include/swift/SIL/Projection.h                |  37 +++-
 .../swift/SILOptimizer/Analysis/ARCAnalysis.h |  13 +-
 lib/SIL/Projection.cpp                        |   8 +
 .../ArgumentExplosionTransform.cpp            | 166 ++++++++++++--
 .../FunctionSignatureOpts.cpp                 |  15 +-
 .../FunctionSignatureOpts.h                   |  13 +-
 .../funcsig_explode_heuristic.sil             | 207 ++++++++++++++++++
 .../funcsig_explode_heuristic_inline.sil      |  93 ++++++++
 test/SILOptimizer/functionsigopts.sil         |   6 +-
 9 files changed, 525 insertions(+), 33 deletions(-)
 create mode 100644 test/SILOptimizer/funcsig_explode_heuristic.sil
 create mode 100644 test/SILOptimizer/funcsig_explode_heuristic_inline.sil

diff --git a/include/swift/SIL/Projection.h b/include/swift/SIL/Projection.h
index eace9a2d001c8..980bc066c2a84 100644
--- a/include/swift/SIL/Projection.h
+++ b/include/swift/SIL/Projection.h
@@ -22,11 +22,12 @@
 #ifndef SWIFT_SIL_PROJECTION_H
 #define SWIFT_SIL_PROJECTION_H
 
+#include "swift/AST/TypeAlignments.h"
 #include "swift/Basic/NullablePtr.h"
 #include "swift/Basic/PointerIntEnum.h"
-#include "swift/AST/TypeAlignments.h"
-#include "swift/SIL/SILValue.h"
+#include "swift/Basic/STLExtras.h"
 #include "swift/SIL/SILInstruction.h"
+#include "swift/SIL/SILValue.h"
 #include "swift/SILOptimizer/Analysis/ARCAnalysis.h"
 #include "swift/SILOptimizer/Analysis/RCIdentityAnalysis.h"
 #include "llvm/ADT/Hashing.h"
@@ -766,13 +767,15 @@ class ProjectionTreeNode {
   ~ProjectionTreeNode() = default;
   ProjectionTreeNode(const ProjectionTreeNode &) = default;
 
-  llvm::ArrayRef<unsigned> getChildProjections() {
-     return llvm::makeArrayRef(ChildProjections);
+  bool isLeaf() const { return ChildProjections.empty(); }
+
+  ArrayRef<unsigned> getChildProjections() const {
+    return llvm::makeArrayRef(ChildProjections);
   }
 
-  llvm::Optional<Projection> &getProjection() { return Proj; }
+  Optional<Projection> &getProjection() { return Proj; }
 
-  llvm::SmallVector<Operand *, 4> getNonProjUsers() const {
+  const SmallVectorImpl<Operand *> &getNonProjUsers() const {
     return NonProjUsers;
   };
 
@@ -929,6 +932,24 @@ class ProjectionTree {
     return false;
   }
 
+  void getAllLeafTypes(llvm::SmallVectorImpl<SILType> &outArray) const {
+    llvm::SmallVector<const ProjectionTreeNode *, 32> worklist;
+    worklist.push_back(getRoot());
+
+    while (!worklist.empty()) {
+      auto *node = worklist.pop_back_val();
+      // If we have a leaf node, add its type.
+      if (node->isLeaf()) {
+        outArray.push_back(node->getType());
+        continue;
+      }
+
+      // Otherwise, add the nodes children to the worklist.
+      transform(node->getChildProjections(), std::back_inserter(worklist),
+                [&](unsigned idx) { return getNode(idx); });
+    }
+  }
+
   void getLiveLeafTypes(llvm::SmallVectorImpl<SILType> &OutArray) const {
     for (unsigned LeafIndex : LiveLeafIndices) {
       const ProjectionTreeNode *Node = getNode(LeafIndex);
@@ -955,7 +976,9 @@ class ProjectionTree {
   void
   replaceValueUsesWithLeafUses(SILBuilder &B, SILLocation Loc,
                                llvm::SmallVectorImpl<SILValue> &Leafs);
- 
+
+  void getUsers(SmallPtrSetImpl<SILInstruction *> &users) const;
+
 private:
   void createRoot(SILType BaseTy) {
     assert(ProjectionTreeNodes.empty() &&
diff --git a/include/swift/SILOptimizer/Analysis/ARCAnalysis.h b/include/swift/SILOptimizer/Analysis/ARCAnalysis.h
index 6ccfb72bc3e6a..d61266a51bec7 100644
--- a/include/swift/SILOptimizer/Analysis/ARCAnalysis.h
+++ b/include/swift/SILOptimizer/Analysis/ARCAnalysis.h
@@ -289,7 +289,7 @@ class ConsumedArgToEpilogueReleaseMatcher {
     auto iter = ArgInstMap.find(arg);
     if (iter == ArgInstMap.end())
       return false;
-    return iter->second.foundSomeButNotAllReleases();
+    return iter->second.getPartiallyPostDomReleases().hasValue();
   }
 
   bool isSingleRelease(SILArgument *arg) const {
@@ -334,6 +334,17 @@ class ConsumedArgToEpilogueReleaseMatcher {
     return completeList.getValue();
   }
 
+  Optional<ArrayRef<SILInstruction *>>
+  getPartiallyPostDomReleaseSet(SILArgument *arg) const {
+    auto iter = ArgInstMap.find(arg);
+    if (iter == ArgInstMap.end())
+      return None;
+    auto partialList = iter->second.getPartiallyPostDomReleases();
+    if (!partialList)
+      return None;
+    return partialList;
+  }
+
   ArrayRef<SILInstruction *> getReleasesForArgument(SILValue value) const {
     auto *arg = dyn_cast<SILArgument>(value);
     if (!arg)
diff --git a/lib/SIL/Projection.cpp b/lib/SIL/Projection.cpp
index 29cc60b72a431..934678578aaa2 100644
--- a/lib/SIL/Projection.cpp
+++ b/lib/SIL/Projection.cpp
@@ -1476,3 +1476,11 @@ replaceValueUsesWithLeafUses(SILBuilder &Builder, SILLocation Loc,
     NewNodes.clear();
   }
 }
+
+void ProjectionTree::getUsers(SmallPtrSetImpl<SILInstruction *> &users) const {
+  for (auto *node : ProjectionTreeNodes) {
+    for (auto *op : node->getNonProjUsers()) {
+      users.insert(op->getUser());
+    }
+  }
+}
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index 21a3ca467e497..33edf8fcd64c8 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -9,6 +9,15 @@
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file contains an implementation of the partial dead argument
+/// elimination optimization. We do this to attempt to remove non-trivial
+/// arguments of callees to eliminate lifetime constraints of a large argument
+/// on values in the caller.
+///
+//===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "fso-argument-explosion-transform"
 #include "FunctionSignatureOpts.h"
@@ -25,12 +34,68 @@ static llvm::cl::opt<bool> FSODisableArgExplosion(
 //                                  Utility
 //===----------------------------------------------------------------------===//
 
+static bool
+shouldExplodeTrivial(FunctionSignatureTransformDescriptor &transformDesc,
+                     ArgumentDescriptor &argDesc, SILType ty) {
+  // Just blow up parameters if we will reduce the size of arguments.
+  //
+  // FIXME: In the future we should attempt to only do this if we can generate a
+  // thunk. This was tried with the current heuristic and it resulted in a 1%
+  // increase in code-size in the standard library.
+  unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();
+  return explosionSize <= 3;
+}
+
 /// Return true if it's both legal and a good idea to explode this argument.
-static bool shouldExplode(ArgumentDescriptor &argDesc,
-                          ConsumedArgToEpilogueReleaseMatcher &ERM) {
-  // We cannot optimize the argument.
-  if (!argDesc.canOptimizeLiveArg())
+///
+/// Our main interest here is to expose more opportunities for ARC. This means
+/// that we are not interested in exploding (and partially DCEing) structs in
+/// the following cases:
+///
+/// 1. Completely dead arguments. This is handled by dead argument elimination.
+///
+/// 2. Values that are completely trivial. By splitting these up we create
+///    more register pressure during argument marshalling and do not really add
+///    any advantage. We only eliminate them
+///
+/// 3. Structs with many live leaf nodes. Our heuristic is 1-3 live leaf
+///    nodes. Otherwise again we run into register pressure/spilling issues.
+///
+/// One important thing to note here is that the last two cases could be dealt
+/// with more effectively by having FSO consider the number of arguments
+/// created in total instead of not reasoning about this and hoping the
+/// heuristic works.
+///
+/// With that in mind, we want to perform argument exploding in the following
+/// cases (assuming our live leaf restriction):
+///
+/// 1. Non-trivial structs that only have live trivial parts. This at the SIL
+///    level eliminates ARC restrictions on the caller by the callee.
+///
+/// 2. Splitting non-trivial structs that have multiple non-trivial live leaf
+///    nodes. This is useful because it enables the low level ARC optimizer to
+///    consider the arguments as having different RC identities and thus pair
+///    retains/releases in an easier way.
+///
+/// What is important to notice here is that we do not want to explode
+/// arguments if.
+static bool
+shouldExplode(FunctionSignatureTransformDescriptor &transformDesc,
+              ArgumentDescriptor &argDesc,
+              ConsumedArgToEpilogueReleaseMatcher &epilogueReleaseMatcher) {
+  // No passes can optimize this argument, so just bail.
+  if (!argDesc.canOptimizeLiveArg()) {
     return false;
+  }
+
+  // We do not explode parameters that are completely dead. This is so we can
+  // rely on normal dead argument elimination to eliminate such parameters.
+  //
+  // We compute this early since it is already computed at this point.
+  unsigned naiveExplosionSize = argDesc.ProjTree.getLiveLeafCount();
+  if (naiveExplosionSize == 0) {
+    return false;
+  }
 
   // See if the projection tree consists of potentially multiple levels of
   // structs containing one field. In such a case, there is no point in
@@ -38,30 +103,95 @@ static bool shouldExplode(ArgumentDescriptor &argDesc,
   //
   // Also, in case of a type can not be exploded, e.g an enum, we treat it
   // as a singleton.
-  if (argDesc.ProjTree.isSingleton())
+  if (argDesc.ProjTree.isSingleton()) {
     return false;
+  }
 
+  // Ok, we have a case that we may be able to handle. First make sure that the
+  // current global size expansion heuristic does not ban us from expanding this
+  // type.
   auto *arg = argDesc.Arg;
-  if (!shouldExpand(arg->getModule(), arg->getType().getObjectType())) {
+  auto &module = arg->getModule();
+  auto ty = arg->getType().getObjectType();
+  if (!shouldExpand(module, ty)) {
     return false;
   }
 
-  // If this argument is @owned and we can not find all the releases for it
-  // try to explode it, maybe we can find some of the releases and O2G some
-  // of its components.
-  //
-  // This is a potentially a very profitable optimization. Ignore other
-  // heuristics.
-  if (arg->hasConvention(SILArgumentConvention::Direct_Owned) &&
-      ERM.hasSomeReleasesForArgument(arg))
+  // Ok, this is something that globally we are not forbidden from
+  // expanded. First check if our type is completely trivial. We never want to
+  // explode arguments that are trivial so return false. See comment above.
+  if (ty.isTrivial(module)) {
+    return shouldExplodeTrivial(transformDesc, argDesc, ty);
+  }
+
+  // Ok, we think that this /may/ be profitable to optimize. Grab our leaf node
+  // types. We already know that we have a strictily non-trivial type. If by
+  // performing partial DCE we will eliminate a non-trivial argument, we want to
+  // eliminate that argument to eliminate an ARC lifetime restriction in our
+  // caller scope.
+  llvm::SmallVector<SILType, 32> allTypes;
+  argDesc.ProjTree.getAllLeafTypes(allTypes);
+  llvm::SmallVector<const ProjectionTreeNode *, 32> liveNodes;
+  argDesc.ProjTree.getLiveLeafNodes(liveNodes);
+
+  unsigned numInputNonTrivialLeafNodes =
+      llvm::count_if(allTypes, [&](SILType t) { return !t.isTrivial(module); });
+  unsigned numNonTrivialLiveLeafNodes =
+      llvm::count_if(liveNodes, [&](const ProjectionTreeNode *n) {
+        return n->getType().isTrivial(module);
+      });
+
+  // TODO: Special case if we have one argument or if all other arguments are
+  // trivial.
+  unsigned maxExplosionSize = 3;
+
+  // If we reduced the number of non-trivial leaf types, we want to split this
+  // given that we already know that we are not going to drastically change the
+  // number of arguments.
+  if (naiveExplosionSize <= maxExplosionSize &&
+      numNonTrivialLiveLeafNodes < numInputNonTrivialLeafNodes) {
     return true;
+  }
 
-  unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();
-  return explosionSize >= 1 && explosionSize <= 3;
+  // Ok, this is an argument with more than 3 live leaf nodes. See if after
+  // performing o2g we will be able to reduce our number of non-trivial nodes.
+  //
+  // *NOTE* This does not create a phase ordering issue since we re-run the
+  // pipeline after we run FSO a first time.
+  if (numNonTrivialLiveLeafNodes > 1 &&
+      argDesc.hasConvention(SILArgumentConvention::Direct_Owned)) {
+    if (auto releases =
+            epilogueReleaseMatcher.getPartiallyPostDomReleaseSet(arg)) {
+      llvm::SmallPtrSet<SILInstruction *, 8> users;
+      for (auto *i : *releases)
+        users.insert(i);
+
+      // *NOTE* This will still include trivial parameters. We only
+      // will delete non-trivial parameters.
+      unsigned newExplosionSize = naiveExplosionSize;
+      for (auto *node : liveNodes) {
+        // If all of our users are epilogue releases, reduce the explosion size.
+        if (llvm::all_of(node->getNonProjUsers(), [&](Operand *op) {
+              return users.count(op->getUser());
+            })) {
+          --newExplosionSize;
+        }
+      }
+
+      // See if newExplosionSize is less than our max allowed explosion size. If
+      // we reduce this value then we know we will reduce the number of
+      // non-trivial nodes. We just don't want to expand the number of arguments
+      // too much.
+      return newExplosionSize <= maxExplosionSize;
+    }
+  }
+
+  // Otherwise, we are not reducing the number of live non-trivial values
+  return false;
 }
 
 //===----------------------------------------------------------------------===//
-//                               Implementation
+//                          Top Level Implementation
 //===----------------------------------------------------------------------===//
 
 bool FunctionSignatureTransform::ArgumentExplosionAnalyzeParameters() {
@@ -95,7 +225,7 @@ bool FunctionSignatureTransform::ArgumentExplosionAnalyzeParameters() {
       continue;
 
     A.ProjTree.computeUsesAndLiveness(A.Arg);
-    A.Explode = shouldExplode(A, ArgToReturnReleaseMap);
+    A.Explode = shouldExplode(TransformDescriptor, A, ArgToReturnReleaseMap);
 
     // Modified self argument.
     if (A.Explode && Args[i]->isSelf()) {
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
index c57706ccfd385..aa17bdb9c3709 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
@@ -64,11 +64,17 @@ using ArgumentIndexMap = llvm::SmallDenseMap<int, int>;
 //===----------------------------------------------------------------------===//
 
 /// Set to true to enable the support for partial specialization.
-llvm::cl::opt<bool>
+static llvm::cl::opt<bool>
     FSOEnableGenerics("sil-fso-enable-generics", llvm::cl::init(true),
                       llvm::cl::desc("Support function signature optimization "
                                      "of generic functions"));
 
+static llvm::cl::opt<bool>
+    FSOOptimizeIfNotCalled("sil-fso-optimize-if-not-called",
+                           llvm::cl::init(false),
+                           llvm::cl::desc("Optimize even if a function isn't "
+                                          "called. For testing only!"));
+
 static bool isSpecializableRepresentation(SILFunctionTypeRepresentation Rep,
                                           bool OptForPartialApply) {
   switch (Rep) {
@@ -613,9 +619,14 @@ void FunctionSignatureTransform::createFunctionSignatureOptimizedFunction() {
 
 // Run the optimization.
 bool FunctionSignatureTransform::run(bool hasCaller) {
-  bool Changed = false;
+  // We use a reference here on purpose so our transformations can know if we
+  // are going to make a thunk and thus should just optimize.
+  bool &Changed = TransformDescriptor.Changed;
   SILFunction *F = TransformDescriptor.OriginalFunction;
 
+  // If we are asked to assume a caller for testing purposes, set the flag.
+  hasCaller |= FSOOptimizeIfNotCalled;
+
   if (!hasCaller && canBeCalledIndirectly(F->getRepresentation())) {
     DEBUG(llvm::dbgs() << "  function has no caller -> abort\n");
     return false;
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
index fb618a0f1d528..dc6b907019370 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
@@ -110,9 +110,13 @@ struct ArgumentDescriptor {
     return Arg->hasConvention(P);
   }
 
+  /// Returns true if all function signature opt passes are able to process
+  /// this.
   bool canOptimizeLiveArg() const {
-    if (Arg->getType().isObject())
+    if (Arg->getType().isObject()) {
       return true;
+    }
+
     // @in arguments of generic types can be processed.
     if (Arg->getType().hasArchetype() &&
         Arg->getType().isAddress() &&
@@ -193,6 +197,9 @@ struct FunctionSignatureTransformDescriptor {
   /// will use during our optimization.
   MutableArrayRef<ResultDescriptor> ResultDescList;
 
+  /// Are we going to make a change to this function?
+  bool Changed;
+
   /// Return a function name based on the current state of ArgumentDescList and
   /// ResultDescList.
   ///
@@ -289,8 +296,8 @@ class FunctionSignatureTransform {
       llvm::SmallDenseMap<int, int> &AIM,
       llvm::SmallVector<ArgumentDescriptor, 4> &ADL,
       llvm::SmallVector<ResultDescriptor, 4> &RDL)
-      : TransformDescriptor{F, nullptr, AIM, false, ADL, RDL}, RCIA(RCIA),
-        EA(EA) {}
+      : TransformDescriptor{F, nullptr, AIM, false, ADL, RDL, false},
+        RCIA(RCIA), EA(EA) {}
 
   /// Return the optimized function.
   SILFunction *getOptimizedFunction() {
diff --git a/test/SILOptimizer/funcsig_explode_heuristic.sil b/test/SILOptimizer/funcsig_explode_heuristic.sil
new file mode 100644
index 0000000000000..09d91c6f8523a
--- /dev/null
+++ b/test/SILOptimizer/funcsig_explode_heuristic.sil
@@ -0,0 +1,207 @@
+// RUN: %target-sil-opt -enable-objc-interop -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all -function-signature-opts -sil-fso-disable-dead-argument -sil-fso-disable-owned-to-guaranteed -enable-expand-all -sil-fso-optimize-if-not-called %s | %FileCheck %s
+
+// *NOTE* We turn off all other fso optimizations including dead arg so we can
+// make sure that we are not exploding those.
+
+sil_stage canonical
+
+import Builtin
+
+//////////////////
+// Declarations //
+//////////////////
+
+struct BigTrivial {
+  var x1: Builtin.Int32
+  var x2: Builtin.Int32
+  var x3: Builtin.Int32
+  var x4: Builtin.Int32
+  var x5: Builtin.Int32
+  var x6: Builtin.Int32
+}
+
+class Klass {}
+
+struct LargeNonTrivialStructOneNonTrivialField {
+  var k1: Klass
+  var k2: Klass
+  var x1: Builtin.Int32
+  var x2: Builtin.Int32
+  var x3: Builtin.Int32
+  var x4: Builtin.Int32
+}
+
+sil @int_user : $@convention(thin) (Builtin.Int32) -> ()
+sil @consuming_user : $@convention(thin) (@owned Klass) -> ()
+sil @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+
+///////////
+// Tests //
+///////////
+
+// We should never optimize this. If we did this would become a thunk, so we
+// know that just be checking NFC we have proven no optimization has occured.
+//
+// CHECK-LABEL: sil @never_explode_trivial : $@convention(thin) (BigTrivial) -> () {
+// CHECK: } // end sil function 'never_explode_trivial'
+sil @never_explode_trivial : $@convention(thin) (BigTrivial) -> () {
+bb0(%0 : $BigTrivial):
+  %1 = struct_extract %0 : $BigTrivial, #BigTrivial.x1
+  %intfunc = function_ref @int_user : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%1) : $@convention(thin) (Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// If a value is never used, do not touch it. We leave it for dead argument
+// elimination. We have delibrately turned this off to test that behavior.
+//
+// CHECK-LABEL: sil @big_arg_with_no_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK-NOT: apply
+// CHECK: } // end sil function 'big_arg_with_no_uses'
+sil @big_arg_with_no_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We are using a single non-trivial field of the struct. We should explode this
+// so we eliminate the second non-trivial leaf.
+//
+// CHECK-LABEL: sil [thunk] [always_inline] @big_arg_with_one_nontrivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FUNC:%.*]] = function_ref @$S31big_arg_with_one_nontrivial_useTf4x_n
+// CHECK:   [[FIELD:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+// CHECK:   apply [[FUNC]]([[FIELD]])
+// CHECK: } // end sil function 'big_arg_with_one_nontrivial_use'
+sil @big_arg_with_one_nontrivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %2(%1) : $@convention(thin) (@guaranteed Klass) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We are using a single non-trivial field and a single trivial field. We are
+// willing to blow this up.
+//
+// CHECK-LABEL: sil [thunk] [always_inline] @big_arg_with_one_nontrivial_use_one_trivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FUNC:%.*]] = function_ref @$S032big_arg_with_one_nontrivial_use_d9_trivial_F0Tf4x_n : $@convention(thin) (@guaranteed Klass, Builtin.Int32) -> ()
+// CHECK:   [[TRIVIAL_FIELD:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x1
+// CHECK:   [[NON_TRIVIAL_FIELD:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+// CHECK:   apply [[FUNC]]([[NON_TRIVIAL_FIELD]], [[TRIVIAL_FIELD]])
+// CHECK: } // end sil function 'big_arg_with_one_nontrivial_use_one_trivial_use'
+sil @big_arg_with_one_nontrivial_use_one_trivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x1
+  %3 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %3(%1) : $@convention(thin) (@guaranteed Klass) -> ()
+  %intfunc = function_ref @int_user : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%2) : $@convention(thin) (Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We can still explode this, since our limit is 3 values.
+//
+// CHECK-LABEL: sil [thunk] [always_inline] @big_arg_with_one_nontrivial_use_two_trivial_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FUNC:%.*]] = function_ref @$S48big_arg_with_one_nontrivial_use_two_trivial_usesTf4x_n : $@convention(thin)
+// CHECK:   [[TRIVIAL_FIELD1:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x2
+// CHECK:   [[TRIVIAL_FIELD2:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x1
+// CHECK:   [[NON_TRIVIAL_FIELD:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+// CHECK:   apply [[FUNC]]([[NON_TRIVIAL_FIELD]], [[TRIVIAL_FIELD2]], [[TRIVIAL_FIELD1]])
+sil @big_arg_with_one_nontrivial_use_two_trivial_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x1
+  %3 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x2
+  %4 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %4(%1) : $@convention(thin) (@guaranteed Klass) -> ()
+  %intfunc = function_ref @int_user : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%2) : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%3) : $@convention(thin) (Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// We do not blow up the struct here since we have 4 uses, not 3.
+//
+// CHECK-LABEL: sil @big_arg_with_one_nontrivial_use_three_trivial_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+sil @big_arg_with_one_nontrivial_use_three_trivial_uses : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x1
+  %3 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x2
+  %3a = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.x3
+  %4 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %4(%1) : $@convention(thin) (@guaranteed Klass) -> ()
+  %intfunc = function_ref @int_user : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%2) : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%3) : $@convention(thin) (Builtin.Int32) -> ()
+  apply %intfunc(%3a) : $@convention(thin) (Builtin.Int32) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// In this case, we shouldn't blow up the struct since we have not reduced the
+// number of non-trivial leaf nodes used.
+//
+// CHECK-LABEL: sil @big_arg_with_two_nontrivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+sil @big_arg_with_two_nontrivial_use : $@convention(thin) (@guaranteed LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+  %3 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %3(%1) : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %3(%2) : $@convention(thin) (@guaranteed Klass) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// If we have one non-trivial value that is live and only live because of a
+// destroy, we can delete the argument after performing o2g.
+//
+// We are using a single non-trivial field of the struct. We should explode this
+// so we eliminate the second non-trivial leaf.
+//
+// CHECK-LABEL: sil [thunk] [always_inline] @big_arg_with_one_nontrivial_use_o2g_other_dead : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK-NOT: release_value
+// CHECK: apply
+// CHECK-NOT: release_value
+// CHECK: } // end sil function 'big_arg_with_one_nontrivial_use_o2g_other_dead'
+sil @big_arg_with_one_nontrivial_use_o2g_other_dead : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  release_value %1 : $Klass
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// If we have two non-trivial values that are live and one is always dead and
+// the other is kept alive due to a release, we can get rid of both since FSO
+// reruns with o2g. Test here that we explode it appropriatel even though we
+// aren't reducing the number of non-trivial uses. The
+// funcsig_explode_heuristic_inline.sil test makes sure we in combination
+// produce the appropriate SIL.
+//
+// We check that we can inline this correctly in the inline test.
+//
+// CHECK-LABEL: sil [thunk] [always_inline] @big_arg_with_one_nontrivial_use_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FUNC:%.*]] = function_ref @$S35big_arg_with_one_nontrivial_use_o2gTf4x_n : $@convention(thin) (@owned Klass, @owned Klass) -> ()
+// CHECK:   apply [[FUNC]](
+// CHECK: } // end sil function 'big_arg_with_one_nontrivial_use_o2g'
+sil @big_arg_with_one_nontrivial_use_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+  %3 = function_ref @consuming_user : $@convention(thin) (@owned Klass) -> ()
+  apply %3(%2) : $@convention(thin) (@owned Klass) -> ()
+  release_value %1 : $Klass
+  %9999 = tuple()
+  return %9999 : $()
+}
diff --git a/test/SILOptimizer/funcsig_explode_heuristic_inline.sil b/test/SILOptimizer/funcsig_explode_heuristic_inline.sil
new file mode 100644
index 0000000000000..dfa09681aaac1
--- /dev/null
+++ b/test/SILOptimizer/funcsig_explode_heuristic_inline.sil
@@ -0,0 +1,93 @@
+// RUN: %target-sil-opt -enable-objc-interop -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all -sil-inline-generics -inline -function-signature-opts -enable-expand-all %s | %FileCheck %s
+
+sil_stage canonical
+
+import Builtin
+
+//////////////////
+// Declarations //
+//////////////////
+
+class Klass {}
+
+struct LargeNonTrivialStructOneNonTrivialField {
+  var k1: Klass
+  var k2: Klass
+  var x1: Builtin.Int32
+  var x2: Builtin.Int32
+  var x3: Builtin.Int32
+  var x4: Builtin.Int32
+}
+
+sil @consuming_user : $@convention(thin) (@owned Klass) -> ()
+sil @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+
+// This test makes sure that if we have two non-trivial values that are live and
+// one is always dead and the other is a value that we have a release for, we
+// can get rid of the first argument and FSO the other. Test here that we
+// explode it appropriately and do a partial o2g even though we aren't reducing
+// the number of non-trivial uses.
+
+// CHECK-LABEL: sil @caller1 : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FIELD1:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+// CHECK:   [[FIELD2:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+// CHECK:   [[FUNC:%.*]] = function_ref @$S11partial_o2gTf4x_nTf4gn_n : $@convention(thin) (@guaranteed Klass, @owned Klass) -> ()
+// CHECK:   apply [[FUNC]]([[FIELD2]], [[FIELD1]]) : $@convention(thin) (@guaranteed Klass, @owned Klass) -> ()
+// CHECK:   release_value [[FIELD2]]
+// CHECK: } // end sil function 'caller1'
+sil @caller1 : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = function_ref @partial_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> ()
+  apply %1(%0) : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// If we have two non-trivial values that are live and one is always dead and
+// the other is kept alive due to a release, we can get rid of both since FSO
+// reruns with o2g. Test here that we explode it appropriately and do a partial
+// o2g even though we aren't reducing the number of non-trivial uses.
+sil [noinline] @partial_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+  %3 = function_ref @consuming_user : $@convention(thin) (@owned Klass) -> ()
+  apply %3(%2) : $@convention(thin) (@owned Klass) -> ()
+  %4 = function_ref @guaranteed_user : $@convention(thin) (@guaranteed Klass) -> ()
+  apply %4(%1) :$@convention(thin) (@guaranteed Klass) -> ()
+  release_value %1 : $Klass
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: sil @caller2 : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+// CHECK: bb0([[ARG:%.*]] : $LargeNonTrivialStructOneNonTrivialField):
+// CHECK:   [[FIELD1:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+// CHECK:   [[FIELD2:%.*]] = struct_extract [[ARG]] : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+// CHECK:   [[FUNC:%.*]] = function_ref @$S23partiallydead_after_o2gTf4x_nTf4dn_n : $@convention(thin) (@owned Klass) -> ()
+// CHECK:   apply [[FUNC]]([[FIELD1]]) : $@convention(thin) (@owned Klass) -> ()
+// CHECK:   release_value [[FIELD2]]
+// CHECK: } // end sil function 'caller2'
+sil @caller2 : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = function_ref @partiallydead_after_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> ()
+  apply %1(%0) : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> ()
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// If we have two non-trivial values that are live and one is always dead and
+// the other is kept alive due to a release, we can get rid of both since FSO
+// reruns with o2g. Test here that we explode it appropriately and do a partial
+// o2g even though we aren't reducing the number of non-trivial uses.
+sil [noinline] @partiallydead_after_o2g : $@convention(thin) (@owned LargeNonTrivialStructOneNonTrivialField) -> () {
+bb0(%0 : $LargeNonTrivialStructOneNonTrivialField):
+  %1 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k1
+  %2 = struct_extract %0 : $LargeNonTrivialStructOneNonTrivialField, #LargeNonTrivialStructOneNonTrivialField.k2
+  %3 = function_ref @consuming_user : $@convention(thin) (@owned Klass) -> ()
+  apply %3(%2) : $@convention(thin) (@owned Klass) -> ()
+  release_value %1 : $Klass
+  %9999 = tuple()
+  return %9999 : $()
+}
diff --git a/test/SILOptimizer/functionsigopts.sil b/test/SILOptimizer/functionsigopts.sil
index e6c5027e85311..a5e7b0a0b11f6 100644
--- a/test/SILOptimizer/functionsigopts.sil
+++ b/test/SILOptimizer/functionsigopts.sil
@@ -220,10 +220,12 @@ bb0(%0 : $LotsOfFields):
 }
 
 // Since this is a value that contains only a singular owned type, there is no
-// point from an ARC perspective in splitting it up. We still want to perform owned to guaranteed though.
+// point from an ARC perspective in splitting it up. We still want to perform
+// owned to guaranteed though so we know we are oging to already create a
+// thunk. Thus we are not creating code-size.
 //
 // CHECK-LABEL: sil [signature_optimized_thunk] [always_inline] @dead_argument_due_to_more_than_release_user
-// CHECK: [[FN1:%.*]] = function_ref @$S43dead_argument_due_to_more_than_release_userTf4g_n : $@convention(thin) (@guaranteed Boo) -> (Int, Int)
+// CHECK: [[FN1:%.*]] = function_ref @$S43dead_argument_due_to_more_than_release_userTf4gX_n : $@convention(thin) (@guaranteed Baz, Int) -> (Int, Int)
 sil @dead_argument_due_to_more_than_release_user : $@convention(thin) (@owned Boo) -> (Int, Int) {
 bb0(%0 : $Boo):
   // make it a non-trivial function

From 8d6e2c19a135f34aedbeeb5bc1773b5b42a7bed1 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 21 Jun 2018 16:25:47 -0700
Subject: [PATCH 06/12] Increase the max size.

---
 .../ArgumentExplosionTransform.cpp            | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index 33edf8fcd64c8..2b1d4d039d7db 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -36,14 +36,15 @@ static llvm::cl::opt<bool> FSODisableArgExplosion(
 
 static bool
 shouldExplodeTrivial(FunctionSignatureTransformDescriptor &transformDesc,
-                     ArgumentDescriptor &argDesc, SILType ty) {
+                     ArgumentDescriptor &argDesc, SILType ty,
+                     unsigned maxExplosionSize) {
   // Just blow up parameters if we will reduce the size of arguments.
   //
   // FIXME: In the future we should attempt to only do this if we can generate a
   // thunk. This was tried with the current heuristic and it resulted in a 1%
   // increase in code-size in the standard library.
   unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();
-  return explosionSize <= 3;
+  return explosionSize <= maxExplosionSize;
 }
 
 /// Return true if it's both legal and a good idea to explode this argument.
@@ -113,15 +114,25 @@ shouldExplode(FunctionSignatureTransformDescriptor &transformDesc,
   auto *arg = argDesc.Arg;
   auto &module = arg->getModule();
   auto ty = arg->getType().getObjectType();
+#if false
   if (!shouldExpand(module, ty)) {
     return false;
   }
+#endif
+
+  // If we have a singular argument, be more aggressive about our max explosion
+  // size. If we were unable to expand the value we know that it will be
+  // exploded so use UINT_MAX.
+  unsigned maxExplosionSize = 3;
+  if (transformDesc.ArgumentDescList.size() == 1) {
+    maxExplosionSize = UINT_MAX;
+  }
 
   // Ok, this is something that globally we are not forbidden from
   // expanded. First check if our type is completely trivial. We never want to
   // explode arguments that are trivial so return false. See comment above.
   if (ty.isTrivial(module)) {
-    return shouldExplodeTrivial(transformDesc, argDesc, ty);
+    return shouldExplodeTrivial(transformDesc, argDesc, ty, maxExplosionSize);
   }
 
   // Ok, we think that this /may/ be profitable to optimize. Grab our leaf node
@@ -141,10 +152,6 @@ shouldExplode(FunctionSignatureTransformDescriptor &transformDesc,
         return n->getType().isTrivial(module);
       });
 
-  // TODO: Special case if we have one argument or if all other arguments are
-  // trivial.
-  unsigned maxExplosionSize = 3;
-
   // If we reduced the number of non-trivial leaf types, we want to split this
   // given that we already know that we are not going to drastically change the
   // number of arguments.

From a70c3f03c9c4feb8a7885656fb0c0f5f4df5657d Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 3 Jul 2018 20:43:48 -0700
Subject: [PATCH 07/12] Test

---
 .../ArgumentExplosionTransform.cpp                   | 12 +++++-------
 .../FunctionSignatureOpts.cpp                        |  8 +++++---
 .../FunctionSignatureOpts.h                          | 10 ++++++++--
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index 2b1d4d039d7db..ea0a28aaf74c8 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -38,11 +38,11 @@ static bool
 shouldExplodeTrivial(FunctionSignatureTransformDescriptor &transformDesc,
                      ArgumentDescriptor &argDesc, SILType ty,
                      unsigned maxExplosionSize) {
-  // Just blow up parameters if we will reduce the size of arguments.
-  //
-  // FIXME: In the future we should attempt to only do this if we can generate a
-  // thunk. This was tried with the current heuristic and it resulted in a 1%
-  // increase in code-size in the standard library.
+  // Only blow up trivial parameters if we will not form a thunk...
+  if (!transformDesc.hasOnlyDirectCallers)
+    return false;
+
+  // ... and reduce the size of the argument by a reasonable amount.
   unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();
   return explosionSize <= maxExplosionSize;
 }
@@ -114,11 +114,9 @@ shouldExplode(FunctionSignatureTransformDescriptor &transformDesc,
   auto *arg = argDesc.Arg;
   auto &module = arg->getModule();
   auto ty = arg->getType().getObjectType();
-#if false
   if (!shouldExpand(module, ty)) {
     return false;
   }
-#endif
 
   // If we have a singular argument, be more aggressive about our max explosion
   // size. If we were unable to expand the value we know that it will be
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
index aa17bdb9c3709..bdd9d28cfbca5 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.cpp
@@ -622,6 +622,7 @@ bool FunctionSignatureTransform::run(bool hasCaller) {
   // We use a reference here on purpose so our transformations can know if we
   // are going to make a thunk and thus should just optimize.
   bool &Changed = TransformDescriptor.Changed;
+  bool hasOnlyDirectCallers = TransformDescriptor.hasOnlyDirectCallers;
   SILFunction *F = TransformDescriptor.OriginalFunction;
 
   // If we are asked to assume a caller for testing purposes, set the flag.
@@ -642,7 +643,7 @@ bool FunctionSignatureTransform::run(bool hasCaller) {
   // Run DeadArgument elimination transformation. We only specialize
   // if this function has a caller inside the current module or we have
   // already created a thunk.
-  if ((hasCaller || Changed) && DeadArgumentAnalyzeParameters()) {
+  if ((hasCaller || Changed || hasOnlyDirectCallers) && DeadArgumentAnalyzeParameters()) {
     Changed = true;
     DEBUG(llvm::dbgs() << "  remove dead arguments\n");
     DeadArgumentTransformFunction();
@@ -660,7 +661,7 @@ bool FunctionSignatureTransform::run(bool hasCaller) {
   // In order to not miss any opportunity, we send the optimized function
   // to the passmanager to optimize any opportunities exposed by argument
   // explosion.
-  if ((hasCaller || Changed) && ArgumentExplosionAnalyzeParameters()) {
+  if ((hasCaller || Changed || hasOnlyDirectCallers) && ArgumentExplosionAnalyzeParameters()) {
     Changed = true;
   }
 
@@ -810,7 +811,8 @@ class FunctionSignatureOpts : public SILFunctionTransform {
 
     // Owned to guaranteed optimization.
     FunctionSignatureTransform FST(F, RCIA, EA, Mangler, AIM,
-                                   ArgumentDescList, ResultDescList);
+                                   ArgumentDescList, ResultDescList,
+                                   FuncInfo.hasAllCallers());
 
     bool Changed = false;
     if (OptForPartialApply) {
diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
index dc6b907019370..3401fa4289c35 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/FunctionSignatureOpts.h
@@ -200,6 +200,10 @@ struct FunctionSignatureTransformDescriptor {
   /// Are we going to make a change to this function?
   bool Changed;
 
+  /// Does this function only have direct callers. In such a case we know that
+  /// all thunks we create will be eliminated so we can be more aggressive.
+  bool hasOnlyDirectCallers;
+
   /// Return a function name based on the current state of ArgumentDescList and
   /// ResultDescList.
   ///
@@ -295,8 +299,10 @@ class FunctionSignatureTransform {
       Mangle::FunctionSignatureSpecializationMangler &Mangler,
       llvm::SmallDenseMap<int, int> &AIM,
       llvm::SmallVector<ArgumentDescriptor, 4> &ADL,
-      llvm::SmallVector<ResultDescriptor, 4> &RDL)
-      : TransformDescriptor{F, nullptr, AIM, false, ADL, RDL, false},
+      llvm::SmallVector<ResultDescriptor, 4> &RDL,
+      bool hasOnlyDirectCallers)
+      : TransformDescriptor{F, nullptr, AIM, false, ADL, RDL, false,
+                            hasOnlyDirectCallers},
         RCIA(RCIA), EA(EA) {}
 
   /// Return the optimized function.

From d8a76da116253876c82c488f88da3b42dc44a0cf Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 17:04:27 -0700
Subject: [PATCH 08/12] Add stats

---
 .../ArgumentExplosionTransform.cpp                     | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index ea0a28aaf74c8..417d2147df57a 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -30,6 +30,11 @@ static llvm::cl::opt<bool> FSODisableArgExplosion(
     llvm::cl::desc("Do not perform argument explosion during FSO. Intended "
                    "only for testing purposes"));
 
+STATISTICS(NumTrivialArgumentsWithUnknownCallersNotExploded,
+           "Number of trivial arguments not exploded due to unknown callers");
+STATISTICS(NumTrivialArgumentsWithOnlyDirectCallersExploded,
+           "Number of trivial arguments we did explode since we saw all callers");
+
 //===----------------------------------------------------------------------===//
 //                                  Utility
 //===----------------------------------------------------------------------===//
@@ -39,8 +44,11 @@ shouldExplodeTrivial(FunctionSignatureTransformDescriptor &transformDesc,
                      ArgumentDescriptor &argDesc, SILType ty,
                      unsigned maxExplosionSize) {
   // Only blow up trivial parameters if we will not form a thunk...
-  if (!transformDesc.hasOnlyDirectCallers)
+  if (!transformDesc.hasOnlyDirectCallers) {
+    ++NumTrivialArgumentsWithUnknownCallersNotExploded;
     return false;
+  }
+  ++NumTrivialArgumentsWithOnlyDirectCallersExploded;
 
   // ... and reduce the size of the argument by a reasonable amount.
   unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();

From e98bdcd76ce0632c76e6cabb26847fe076b7e231 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 17:44:53 -0700
Subject: [PATCH 09/12] [caller-analysis] Shared functions today can not be
 called indirectly.

In the future, when we support specializing vtables and witness tables this will
become a different question. But for today, this is correct.
---
 lib/SILOptimizer/Analysis/CallerAnalysis.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/SILOptimizer/Analysis/CallerAnalysis.cpp b/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
index 9b2d51d8e10fa..a4d62171014cd 100644
--- a/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/CallerAnalysis.cpp
@@ -61,6 +61,14 @@ CallerAnalysis::CallerAnalysis(SILModule *m)
   }
 }
 
+static bool mayHaveIndirectCallers(SILFunction *calleeFn) {
+  // We do not support specialized vtables so specialized methods should never
+  // be able to be called indirectly.
+  if (calleeFn->getLinkage() == SILLinkage::Shared)
+    return false;
+  return canBeCalledIndirectly(calleeFn->getRepresentation());
+}
+
 void CallerAnalysis::processFunctionCallSites(SILFunction *callerFn) {
   // First grab our caller info so that we can store back references
   // from our callerFn to the calleeFn so that we can invalidate all
@@ -89,10 +97,8 @@ void CallerAnalysis::processFunctionCallSites(SILFunction *callerFn) {
           auto *calleeFn = fri->getReferencedFunction();
           FunctionInfo &calleeInfo = getOrInsertCallerInfo(calleeFn);
 
-          // TODO: Make this more aggressive by considering
-          // final/visibility/etc.
           calleeInfo.mayHaveIndirectCallers =
-              canBeCalledIndirectly(calleeFn->getRepresentation());
+            mayHaveIndirectCallers(calleeFn);
 
           // Next create our caller state.
           auto iter = calleeInfo.callerStates.insert({callerFn, {}});

From 1a0d762571ee73ccac91be1fa32aa8e937786a1c Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 17:45:57 -0700
Subject: [PATCH 10/12] eliminate bad stats

---
 .../ArgumentExplosionTransform.cpp                         | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index 417d2147df57a..fff85317fd2c2 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -30,11 +30,6 @@ static llvm::cl::opt<bool> FSODisableArgExplosion(
     llvm::cl::desc("Do not perform argument explosion during FSO. Intended "
                    "only for testing purposes"));
 
-STATISTICS(NumTrivialArgumentsWithUnknownCallersNotExploded,
-           "Number of trivial arguments not exploded due to unknown callers");
-STATISTICS(NumTrivialArgumentsWithOnlyDirectCallersExploded,
-           "Number of trivial arguments we did explode since we saw all callers");
-
 //===----------------------------------------------------------------------===//
 //                                  Utility
 //===----------------------------------------------------------------------===//
@@ -45,10 +40,8 @@ shouldExplodeTrivial(FunctionSignatureTransformDescriptor &transformDesc,
                      unsigned maxExplosionSize) {
   // Only blow up trivial parameters if we will not form a thunk...
   if (!transformDesc.hasOnlyDirectCallers) {
-    ++NumTrivialArgumentsWithUnknownCallersNotExploded;
     return false;
   }
-  ++NumTrivialArgumentsWithOnlyDirectCallersExploded;
 
   // ... and reduce the size of the argument by a reasonable amount.
   unsigned explosionSize = argDesc.ProjTree.getLiveLeafCount();

From 442986187dd6cdde56d3e6f78b5628744b8d8e50 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 5 Jul 2018 18:07:20 -0700
Subject: [PATCH 11/12] Ignore leaf node thing if we have only direct caller.

---
 .../ArgumentExplosionTransform.cpp             | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
index fff85317fd2c2..572e9c695caa3 100644
--- a/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
+++ b/lib/SILOptimizer/FunctionSignatureTransforms/ArgumentExplosionTransform.cpp
@@ -151,12 +151,18 @@ shouldExplode(FunctionSignatureTransformDescriptor &transformDesc,
         return n->getType().isTrivial(module);
       });
 
-  // If we reduced the number of non-trivial leaf types, we want to split this
-  // given that we already know that we are not going to drastically change the
-  // number of arguments.
-  if (naiveExplosionSize <= maxExplosionSize &&
-      numNonTrivialLiveLeafNodes < numInputNonTrivialLeafNodes) {
-    return true;
+  // If we will reduce the number of values without adding too many arguments...
+  if (naiveExplosionSize <= maxExplosionSize) {
+    // ... and we know that we will not introduce a thunk, be aggressive.
+    if (transformDesc.hasOnlyDirectCallers) {
+      return true;
+    }
+
+    // Otherwise, only explode if we will reduce the number of non-trivial leaf
+    // types.
+    if (numNonTrivialLiveLeafNodes < numInputNonTrivialLeafNodes) {
+      return true;
+    }
   }
 
   // Ok, this is an argument with more than 3 live leaf nodes. See if after

From 970b94294c92513d760735d45bf55c0e898f1bcf Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Fri, 6 Jul 2018 22:38:33 -0700
Subject: [PATCH 12/12] compile with asserts

---
 include/swift/SILOptimizer/Analysis/CallerAnalysis.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/swift/SILOptimizer/Analysis/CallerAnalysis.h b/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
index 54fa3b64a2a3a..cd741fb0be076 100644
--- a/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
+++ b/include/swift/SILOptimizer/Analysis/CallerAnalysis.h
@@ -108,10 +108,8 @@ class CallerAnalysis : public SILAnalysis {
   /// invalidating parts of the call graph.
   const FunctionInfo &getCallerInfo(SILFunction *f) const;
 
-#ifndef NDEBUG
   LLVM_ATTRIBUTE_DEPRECATED(void dump() const LLVM_ATTRIBUTE_USED,
                             "Only for use in the debugger");
-#endif
 
   /// Print the state of the caller analysis as a sequence of yaml documents for
   /// each callee we are tracking.