Skip to content

Commit

Permalink
Revert "[IRSim] Check largest sections first when analyzing similarity"
Browse files Browse the repository at this point in the history
llvm-sim test still misbehaving on other platforms.

This reverts commit 082ec26.
  • Loading branch information
AndrewLitteken committed Mar 21, 2023
1 parent 4277d93 commit 9baaf4f
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 320 deletions.
43 changes: 0 additions & 43 deletions llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -850,49 +850,6 @@ class IRSimilarityCandidate {
IRSimilarityCandidate &SourceCand,
DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);

/// Create a mapping for the value numbering of the calling
/// IRSimilarityCandidate, to a different separate set of numbers, based on
/// the canonical ordering in \p SourceCand. These are defined based on the
/// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
/// these relationships should have the same information, just in opposite
/// directions. Uses the \p OneToOne mapping from target candidate to \p
/// SourceCand GVNs to determine the mapping first for values with multiple
/// mappings. This mapping is created by the ordering of operands in the
/// instruction they are first seen in the candidates.
///
/// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
/// canonical numbering from.
/// \param [in,out] OneToOne - A mapping of value numbers from candidate
/// \p A to candidate \B using the structure of the original instructions.
/// \param ToSourceMapping - The mapping of value numbers from this candidate
/// to \p SourceCand.
/// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
/// to this candidate.
void createCanonicalRelationFrom(
IRSimilarityCandidate &SourceCand,
DenseMap<unsigned, unsigned> &OneToOne,
DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);

/// Create a mapping for the value numbering of the calling
/// IRSimilarityCandidate, to a different separate set of numbers, based on
/// the canonical ordering in \p SourceCand. These are defined based on the
/// canonical mapping defined between \p SoureCandLarge and
/// \p TargetCandLarge. These IRSimilarityCandidates are already structurally
/// similar, and fully encapsulate the IRSimilarityCandidates in question.
/// These are used as a "bridge" from the \p SourceCand to the target.
///
/// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
/// canonical numbering from.
/// \param SoureCandLarge - The IRSimilarityCandidate fully containing
/// \p SourceCand.
/// \param TargetCandLarge - The IRSimilarityCandidate fully containing
/// this Candidate.
void createCanonicalRelationFrom(
IRSimilarityCandidate &SourceCand,
IRSimilarityCandidate &SourceCandLarge,
IRSimilarityCandidate &TargetCandLarge);

/// \param [in,out] BBSet - The set to track the basic blocks.
void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
Expand Down
214 changes: 5 additions & 209 deletions llvm/lib/Analysis/IRSimilarityIdentifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,76 +1101,6 @@ void IRSimilarityCandidate::createCanonicalRelationFrom(
}
}

void IRSimilarityCandidate::createCanonicalRelationFrom(
IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
IRSimilarityCandidate &TargetCandLarge) {
assert(!SourceCand.CanonNumToNumber.empty() &&
"Canonical Relationship is non-empty");
assert(!SourceCand.NumberToCanonNum.empty() &&
"Canonical Relationship is non-empty");

assert(!SourceCandLarge.CanonNumToNumber.empty() &&
"Canonical Relationship is non-empty");
assert(!SourceCandLarge.NumberToCanonNum.empty() &&
"Canonical Relationship is non-empty");

assert(!TargetCandLarge.CanonNumToNumber.empty() &&
"Canonical Relationship is non-empty");
assert(!TargetCandLarge.NumberToCanonNum.empty() &&
"Canonical Relationship is non-empty");

assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");

// We're going to use the larger candidates as a "bridge" to create the
// canonical number for the target candidate since we have idetified two
// candidates as subsequences of larger sequences, and therefore must be
// structurally similar.
for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
Value *CurrVal = ValueNumPair.first;
unsigned TargetCandGVN = ValueNumPair.second;

// Find the numbering in the large candidate that surrounds the
// current candidate.
std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
assert(OLargeTargetGVN.has_value() && "GVN not found for Value");

// Get the canonical numbering in the large target candidate.
std::optional<unsigned> OTargetCandCanon =
TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
assert(OTargetCandCanon.has_value() &&
"Canononical Number not found for GVN");

// Get the GVN in the large source candidate from the canonical numbering.
std::optional<unsigned> OLargeSourceGVN =
SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
assert(OLargeSourceGVN.has_value() &&
"GVN Number not found for Canonical Number");

// Get the Value from the GVN in the large source candidate.
std::optional<Value *> OLargeSourceV =
SourceCandLarge.fromGVN(OLargeSourceGVN.value());
assert(OLargeSourceV.has_value() && "Value not found for GVN");

// Get the GVN number for the Value in the source candidate.
std::optional<unsigned> OSourceGVN =
SourceCand.getGVN(OLargeSourceV.value());
assert(OSourceGVN.has_value() && "GVN Number not found for Value");

// Get the canonical numbering from the GVN/
std::optional<unsigned> OSourceCanon =
SourceCand.getCanonicalNum(OSourceGVN.value());
assert(OSourceCanon.has_value() && "Canon Number not found for GVN");

// Insert the canonical numbering and GVN pair into their respective
// mappings.
CanonNumToNumber.insert(
std::make_pair(OSourceCanon.value(), TargetCandGVN));
NumberToCanonNum.insert(
std::make_pair(TargetCandGVN, OSourceCanon.value()));
}
}

void IRSimilarityCandidate::createCanonicalMappingFor(
IRSimilarityCandidate &CurrCand) {
assert(CurrCand.CanonNumToNumber.size() == 0 &&
Expand All @@ -1188,81 +1118,6 @@ void IRSimilarityCandidate::createCanonicalMappingFor(
}
}

/// Look for larger IRSimilarityCandidates From the previously matched
/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
/// an overlap, return a pair of structurally similar, larger
/// IRSimilarityCandidates.
///
/// \param [in] CandA - The first candidate we are trying to determine the
/// structure of.
/// \param [in] CandB - The second candidate we are trying to determine the
/// structure of.
/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
/// a circuit to the IRSimilarityCandidates that include this instruction.
/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
/// number representing the structural group assigned to it.
static std::optional<
std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
CheckLargerCands(
IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
DenseSet<unsigned> IncludedGroupsA;
DenseSet<unsigned> IncludedGroupsB;

// Find the overall similarity group numbers that fully contain the candidate,
// and record the larger candidate for each group.
auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
Result;

unsigned CandAStart = CandA.getStartIdx();
unsigned CandAEnd = CandA.getEndIdx();
unsigned CandBStart = CandB.getStartIdx();
unsigned CandBEnd = CandB.getEndIdx();
if (IdxToCandidateIt == IndexToIncludedCand.end())
return Result;
for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
if (MatchedCand->getStartIdx() > CandAStart ||
(MatchedCand->getEndIdx() < CandAEnd))
continue;
unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
IncludedGroupsA.insert(GroupNum);
}

// Find the overall similarity group numbers that fully contain the next
// candidate, and record the larger candidate for each group.
IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
if (IdxToCandidateIt == IndexToIncludedCand.end())
return Result;
for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
if (MatchedCand->getStartIdx() > CandBStart ||
MatchedCand->getEndIdx() < CandBEnd)
continue;
unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
IncludedGroupsB.insert(GroupNum);
}

// Find the intersection between the two groups, these are the groups where
// the larger candidates exist.
set_intersect(IncludedGroupsA, IncludedGroupsB);

// If there is no intersection between the sets, then we cannot determine
// whether or not there is a match.
if (IncludedGroupsA.empty())
return Result;

// Create a pair that contains the larger candidates.
auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
Result = std::make_pair(ItA->second, ItB->second);
return Result;
}

/// From the list of IRSimilarityCandidates, perform a comparison between each
/// IRSimilarityCandidate to determine if there are overlapping
/// IRInstructionData, or if they do not have the same structure.
Expand All @@ -1272,16 +1127,9 @@ CheckLargerCands(
/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
/// vector are structurally similar to one another.
/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
/// a circuit to the IRSimilarityCandidates that include this instruction.
/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
/// number representing the structural group assigned to it.
static void findCandidateStructures(
std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
) {
DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
InnerCandEndIt;

Expand Down Expand Up @@ -1344,24 +1192,6 @@ static void findCandidateStructures(
if (CandToGroupItInner != CandToGroup.end())
continue;

// Check if we have found structural similarity between two candidates
// that fully contains the first and second candidates.
std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
LargerPair = CheckLargerCands(
*CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);

// If a pair was found, it means that we can assume that these smaller
// substrings are also structurally similar. Use the larger candidates to
// determine the canonical mapping between the two sections.
if (LargerPair.has_value()) {
SameStructure = true;
InnerCandIt->createCanonicalRelationFrom(
*CandIt, *LargerPair.value().first, *LargerPair.value().second);
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
CurrentGroupPair->second.push_back(*InnerCandIt);
continue;
}

// Otherwise we determine if they have the same structure and add it to
// vector if they match.
ValueNumberMappingA.clear();
Expand All @@ -1388,58 +1218,24 @@ void IRSimilarityIdentifier::findCandidates(
std::vector<SimilarityGroup> NewCandidateGroups;

DenseMap<unsigned, SimilarityGroup> StructuralGroups;
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;

// Iterate over the subsequences found by the Suffix Tree to create
// IRSimilarityCandidates for each repeated subsequence and determine which
// instances are structurally similar to one another.

// Sort the suffix tree from longest substring to shortest.
std::vector<SuffixTree::RepeatedSubstring> RSes;
for (SuffixTree::RepeatedSubstring &RS : ST)
RSes.push_back(RS);

llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
const SuffixTree::RepeatedSubstring &RHS) {
return LHS.Length > RHS.Length;
});
for (SuffixTree::RepeatedSubstring &RS : RSes) {
for (SuffixTree::RepeatedSubstring &RS : ST) {
createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
CandsForRepSubstring);

if (CandsForRepSubstring.size() < 2)
continue;

findCandidateStructures(CandsForRepSubstring, StructuralGroups,
IndexToIncludedCand, CandToGroup);
for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
findCandidateStructures(CandsForRepSubstring, StructuralGroups);
for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
// We only add the group if it contains more than one
// IRSimilarityCandidate. If there is only one, that means there is no
// other repeated subsequence with the same structure.
if (Group.second.size() > 1) {
if (Group.second.size() > 1)
SimilarityCandidates->push_back(Group.second);
// Iterate over each candidate in the group, and add an entry for each
// instruction included with a mapping to a set of
// IRSimilarityCandidates that include that instruction.
for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
Idx <= Edx; ++Idx) {
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
IdIt;
IdIt = IndexToIncludedCand.find(Idx);
bool Inserted = false;
if (IdIt == IndexToIncludedCand.end())
std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
IdIt->second.insert(&IRCand);
}
// Add mapping of candidate to the overall similarity group number.
CandToGroup.insert(
std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
}
}
}

CandsForRepSubstring.clear();
StructuralGroups.clear();
Expand Down

0 comments on commit 9baaf4f

Please sign in to comment.