Revert "[IRSim] Check largest sections first when analyzing similarity"

llvm-sim test still misbehaving on other platforms. This reverts commit 082ec26.
llvm · Mar 21, 2023 · 9baaf4f · 9baaf4f
1 parent 4277d93
commit 9baaf4f
Show file tree

Hide file tree

Showing 7 changed files with 62 additions and 320 deletions.
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -850,49 +850,6 @@ class IRSimilarityCandidate {
       IRSimilarityCandidate &SourceCand,
       DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
       DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
-
-  /// Create a mapping for the value numbering of the calling
-  /// IRSimilarityCandidate, to a different separate set of numbers, based on
-  /// the canonical ordering in \p SourceCand. These are defined based on the
-  /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
-  /// these relationships should have the same information, just in opposite
-  /// directions.  Uses the \p OneToOne mapping from target candidate to \p
-  /// SourceCand GVNs to determine the mapping first for values with multiple
-  /// mappings.  This mapping is created by the ordering of operands in the
-  /// instruction they are first seen in the candidates.
-  ///
-  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
-  /// canonical numbering from.
-  /// \param [in,out] OneToOne - A mapping of value numbers from candidate
-  /// \p A to candidate \B using the structure of the original instructions.
-  /// \param ToSourceMapping - The mapping of value numbers from this candidate
-  /// to \p SourceCand.
-  /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
-  /// to this candidate.
-  void createCanonicalRelationFrom(
-      IRSimilarityCandidate &SourceCand,
-      DenseMap<unsigned, unsigned> &OneToOne,
-      DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
-      DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
-
-  /// Create a mapping for the value numbering of the calling
-  /// IRSimilarityCandidate, to a different separate set of numbers, based on
-  /// the canonical ordering in \p SourceCand. These are defined based on the
-  /// canonical mapping defined between \p SoureCandLarge and
-  /// \p TargetCandLarge.  These IRSimilarityCandidates are already structurally
-  /// similar, and fully encapsulate the IRSimilarityCandidates in question.
-  /// These are used as a "bridge" from the \p SourceCand to the target.
-  ///
-  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
-  /// canonical numbering from.
-  /// \param SoureCandLarge - The IRSimilarityCandidate fully containing
-  /// \p SourceCand.
-  /// \param TargetCandLarge -  The IRSimilarityCandidate fully containing
-  /// this Candidate.
-  void createCanonicalRelationFrom(
-      IRSimilarityCandidate &SourceCand,
-      IRSimilarityCandidate &SourceCandLarge,
-      IRSimilarityCandidate &TargetCandLarge);
 
   /// \param [in,out] BBSet - The set to track the basic blocks.
   void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {

diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -1101,76 +1101,6 @@ void IRSimilarityCandidate::createCanonicalRelationFrom(
   }
 }
 
-void IRSimilarityCandidate::createCanonicalRelationFrom(
-    IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
-    IRSimilarityCandidate &TargetCandLarge) {
-  assert(!SourceCand.CanonNumToNumber.empty() &&
-         "Canonical Relationship is non-empty");
-  assert(!SourceCand.NumberToCanonNum.empty() &&
-         "Canonical Relationship is non-empty");
-
-  assert(!SourceCandLarge.CanonNumToNumber.empty() &&
-         "Canonical Relationship is non-empty");
-  assert(!SourceCandLarge.NumberToCanonNum.empty() &&
-         "Canonical Relationship is non-empty");
-
-  assert(!TargetCandLarge.CanonNumToNumber.empty() &&
-         "Canonical Relationship is non-empty");
-  assert(!TargetCandLarge.NumberToCanonNum.empty() &&
-         "Canonical Relationship is non-empty");
-
-  assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
-  assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
-
-  // We're going to use the larger candidates as a "bridge" to create the
-  // canonical number for the target candidate since we have idetified two
-  // candidates as subsequences of larger sequences, and therefore must be
-  // structurally similar.
-  for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
-    Value *CurrVal = ValueNumPair.first;
-    unsigned TargetCandGVN = ValueNumPair.second;
-
-    // Find the numbering in the large candidate that surrounds the 
-    // current candidate.
-    std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
-    assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
-
-    // Get the canonical numbering in the large target candidate.
-    std::optional<unsigned> OTargetCandCanon =
-        TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
-    assert(OTargetCandCanon.has_value() &&
-           "Canononical Number not found for GVN");
-
-    // Get the GVN in the large source candidate from the canonical numbering.
-    std::optional<unsigned> OLargeSourceGVN =
-        SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
-    assert(OLargeSourceGVN.has_value() &&
-           "GVN Number not found for Canonical Number");
-
-    // Get the Value from the GVN in the large source candidate.
-    std::optional<Value *> OLargeSourceV =
-        SourceCandLarge.fromGVN(OLargeSourceGVN.value());
-    assert(OLargeSourceV.has_value() && "Value not found for GVN");
-
-    // Get the GVN number for the Value in the source candidate.
-    std::optional<unsigned> OSourceGVN =
-        SourceCand.getGVN(OLargeSourceV.value());
-    assert(OSourceGVN.has_value() && "GVN Number not found for Value");
-
-    // Get the canonical numbering from the GVN/
-    std::optional<unsigned> OSourceCanon =
-        SourceCand.getCanonicalNum(OSourceGVN.value());
-    assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
-
-    // Insert the canonical numbering and GVN pair into their respective
-    // mappings.
-    CanonNumToNumber.insert(
-        std::make_pair(OSourceCanon.value(), TargetCandGVN));
-    NumberToCanonNum.insert(
-        std::make_pair(TargetCandGVN, OSourceCanon.value()));
-  }
-}
-
 void IRSimilarityCandidate::createCanonicalMappingFor(
     IRSimilarityCandidate &CurrCand) {
   assert(CurrCand.CanonNumToNumber.size() == 0 &&
@@ -1188,81 +1118,6 @@ void IRSimilarityCandidate::createCanonicalMappingFor(
   }
 }
 
-/// Look for larger IRSimilarityCandidates From the previously matched
-/// IRSimilarityCandidates that fully contain \p CandA or \p CandB.  If there is
-/// an overlap, return a pair of structurally similar, larger
-/// IRSimilarityCandidates.
-///
-/// \param [in] CandA - The first candidate we are trying to determine the
-/// structure of.
-/// \param [in] CandB - The second candidate we are trying to determine the
-/// structure of.
-/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
-/// a circuit to the IRSimilarityCandidates that include this instruction.
-/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
-/// number representing the structural group assigned to it.
-static std::optional<
-    std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
-CheckLargerCands(
-    IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
-    DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
-    DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
-  DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
-  DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
-  DenseSet<unsigned> IncludedGroupsA;
-  DenseSet<unsigned> IncludedGroupsB;
-
-  // Find the overall similarity group numbers that fully contain the candidate,
-  // and record the larger candidate for each group.
-  auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
-  std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
-      Result;
-
-  unsigned CandAStart = CandA.getStartIdx();
-  unsigned CandAEnd = CandA.getEndIdx();
-  unsigned CandBStart = CandB.getStartIdx();
-  unsigned CandBEnd = CandB.getEndIdx();
-  if (IdxToCandidateIt == IndexToIncludedCand.end())
-    return Result;
-  for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
-    if (MatchedCand->getStartIdx() > CandAStart ||
-        (MatchedCand->getEndIdx() < CandAEnd))
-      continue;
-    unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
-    IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
-    IncludedGroupsA.insert(GroupNum);
-  }
-
-  // Find the overall similarity group numbers that fully contain the next
-  // candidate, and record the larger candidate for each group.
-  IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
-  if (IdxToCandidateIt == IndexToIncludedCand.end())
-    return Result;
-  for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
-    if (MatchedCand->getStartIdx() > CandBStart ||
-        MatchedCand->getEndIdx() < CandBEnd)
-      continue;
-    unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
-    IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
-    IncludedGroupsB.insert(GroupNum);
-  }
-
-  // Find the intersection between the two groups, these are the groups where
-  // the larger candidates exist.
-  set_intersect(IncludedGroupsA, IncludedGroupsB);
-
-  // If there is no intersection between the sets, then we cannot determine
-  // whether or not there is a match.
-  if (IncludedGroupsA.empty())
-    return Result;
-
-  // Create a pair that contains the larger candidates.
-  auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
-  auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
-  Result = std::make_pair(ItA->second, ItB->second);
-  return Result;
-}
-
 /// From the list of IRSimilarityCandidates, perform a comparison between each
 /// IRSimilarityCandidate to determine if there are overlapping
 /// IRInstructionData, or if they do not have the same structure.
@@ -1272,16 +1127,9 @@ CheckLargerCands(
 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
 /// vector are structurally similar to one another.
-/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
-/// a circuit to the IRSimilarityCandidates that include this instruction.
-/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
-/// number representing the structural group assigned to it.
 static void findCandidateStructures(
     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
-    DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
-    DenseMap<unsigned,  DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
-    DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
-    ) {
+    DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
       InnerCandEndIt;
 
@@ -1344,24 +1192,6 @@ static void findCandidateStructures(
       if (CandToGroupItInner != CandToGroup.end())
         continue;
 
-      // Check if we have found structural similarity between two candidates
-      // that fully contains the first and second candidates.
-      std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
-          LargerPair = CheckLargerCands(
-              *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
-
-      // If a pair was found, it means that we can assume that these smaller
-      // substrings are also structurally similar.  Use the larger candidates to
-      // determine the canonical mapping between the two sections.
-      if (LargerPair.has_value()) {
-        SameStructure = true;
-        InnerCandIt->createCanonicalRelationFrom(
-            *CandIt, *LargerPair.value().first, *LargerPair.value().second);
-        CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
-        CurrentGroupPair->second.push_back(*InnerCandIt);
-        continue;
-      }
-
       // Otherwise we determine if they have the same structure and add it to
       // vector if they match.
       ValueNumberMappingA.clear();
@@ -1388,58 +1218,24 @@ void IRSimilarityIdentifier::findCandidates(
   std::vector<SimilarityGroup> NewCandidateGroups;
 
   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
-  DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
-  DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 
 
   // Iterate over the subsequences found by the Suffix Tree to create
   // IRSimilarityCandidates for each repeated subsequence and determine which
   // instances are structurally similar to one another.
-
-  // Sort the suffix tree from longest substring to shortest.
-  std::vector<SuffixTree::RepeatedSubstring> RSes;
-  for (SuffixTree::RepeatedSubstring &RS : ST)
-    RSes.push_back(RS);
-
-  llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
-                             const SuffixTree::RepeatedSubstring &RHS) {
-    return LHS.Length > RHS.Length;
-  });
-  for (SuffixTree::RepeatedSubstring &RS : RSes) {
+  for (SuffixTree::RepeatedSubstring &RS : ST) {
     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
                                    CandsForRepSubstring);
 
     if (CandsForRepSubstring.size() < 2)
       continue;
 
-    findCandidateStructures(CandsForRepSubstring, StructuralGroups,
-                            IndexToIncludedCand, CandToGroup);
-    for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
+    findCandidateStructures(CandsForRepSubstring, StructuralGroups);
+    for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
       // We only add the group if it contains more than one
       // IRSimilarityCandidate.  If there is only one, that means there is no
       // other repeated subsequence with the same structure.
-      if (Group.second.size() > 1) {
+      if (Group.second.size() > 1)
         SimilarityCandidates->push_back(Group.second);
-        // Iterate over each candidate in the group, and add an entry for each
-        // instruction included with a mapping to a set of
-        // IRSimilarityCandidates that include that instruction.
-        for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
-          for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
-               Idx <= Edx; ++Idx) {
-            DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
-                IdIt;
-            IdIt = IndexToIncludedCand.find(Idx);
-            bool Inserted = false;
-            if (IdIt == IndexToIncludedCand.end())
-              std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
-                  std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
-            IdIt->second.insert(&IRCand);
-          }
-          // Add mapping of candidate to the overall similarity group number.
-          CandToGroup.insert(
-              std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
-        }
-      }
-    }
 
     CandsForRepSubstring.clear();
     StructuralGroups.clear();