Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2267 quorum intersection pick any scc with a quorum #2279

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 49 additions & 51 deletions src/herder/QuorumIntersectionCheckerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,16 @@ MinQuorumEnumerator::pickSplitNode() const
size_t
MinQuorumEnumerator::maxCommit() const
{
return (mQic.mMaxSCC.count() / 2) + 1;
return (mScanSCC.count() / 2) + 1;
}

MinQuorumEnumerator::MinQuorumEnumerator(
BitSet const& committed, BitSet const& remaining,
BitSet const& committed, BitSet const& remaining, BitSet const& scanSCC,
QuorumIntersectionCheckerImpl const& qic)
: mCommitted(committed)
, mRemaining(remaining)
, mPerimeter(committed | remaining)
, mScanSCC(scanSCC)
, mQic(qic)
{
}
Expand Down Expand Up @@ -245,7 +246,7 @@ MinQuorumEnumerator::anyMinQuorumHasDisjointQuorum()
<< "early exit 3.1: minimal quorum=" << committedQuorum;
}
mQic.mStats.mEarlyExit31s++;
return mQic.hasDisjointQuorum(committedQuorum);
return hasDisjointQuorum(committedQuorum);
}
if (mQic.mLogTrace)
{
Expand Down Expand Up @@ -308,7 +309,8 @@ MinQuorumEnumerator::anyMinQuorumHasDisjointQuorum()
CLOG(TRACE, "SCP") << "recursing into subproblems, split=" << split;
}
mRemaining.unset(split);
MinQuorumEnumerator childExcludingSplit(mCommitted, mRemaining, mQic);
MinQuorumEnumerator childExcludingSplit(mCommitted, mRemaining, mScanSCC,
mQic);
mQic.mStats.mFirstRecursionsTaken++;
if (childExcludingSplit.anyMinQuorumHasDisjointQuorum())
{
Expand All @@ -320,7 +322,8 @@ MinQuorumEnumerator::anyMinQuorumHasDisjointQuorum()
return true;
}
mCommitted.set(split);
MinQuorumEnumerator childIncludingSplit(mCommitted, mRemaining, mQic);
MinQuorumEnumerator childIncludingSplit(mCommitted, mRemaining, mScanSCC,
mQic);
mQic.mStats.mSecondRecursionsTaken++;
return childIncludingSplit.anyMinQuorumHasDisjointQuorum();
}
Expand Down Expand Up @@ -359,7 +362,7 @@ QuorumIntersectionCheckerImpl::Stats::log() const
size_t exits = (mEarlyExit1s + mEarlyExit21s + mEarlyExit22s +
mEarlyExit31s + mEarlyExit32s);
CLOG(DEBUG, "SCP") << "[Nodes: " << mTotalNodes << ", SCCs: " << mNumSCCs
<< ", MaxSCC: " << mMaxSCC
<< ", ScanSCC: " << mScanSCCSize
<< ", MaxQs:" << mMaxQuorumsSeen
<< ", MinQs:" << mMinQuorumsSeen
<< ", Calls:" << mCallsStarted
Expand Down Expand Up @@ -563,19 +566,19 @@ QuorumIntersectionCheckerImpl::noteFoundDisjointQuorums(
}

bool
QuorumIntersectionCheckerImpl::hasDisjointQuorum(BitSet const& nodes) const
MinQuorumEnumerator::hasDisjointQuorum(BitSet const& nodes) const
{
BitSet disj = contractToMaximalQuorum(mMaxSCC - nodes);
BitSet disj = mQic.contractToMaximalQuorum(mScanSCC - nodes);
if (disj)
{
noteFoundDisjointQuorums(nodes, disj);
mQic.noteFoundDisjointQuorums(nodes, disj);
}
else
{
if (mLogTrace)
if (mQic.mLogTrace)
{
CLOG(TRACE, "SCP")
<< "no quorum in complement = " << (mMaxSCC - nodes);
<< "no quorum in complement = " << (mScanSCC - nodes);
}
}
return disj;
Expand Down Expand Up @@ -674,18 +677,7 @@ void
QuorumIntersectionCheckerImpl::buildSCCs()
{
mTSC.calculateSCCs();
mMaxSCC.clear();
for (auto const& scc : mTSC.mSCCs)
{
if (scc.count() > mMaxSCC.count())
{
mMaxSCC = scc;
}
CLOG(DEBUG, "SCP") << "Found " << scc.count() << "-node SCC " << scc;
}
CLOG(DEBUG, "SCP") << "Maximal SCC is " << mMaxSCC;
mStats.mNumSCCs = mTSC.mSCCs.size();
mStats.mMaxSCC = mMaxSCC.count();
}

std::string
Expand All @@ -697,68 +689,74 @@ QuorumIntersectionCheckerImpl::nodeName(size_t node) const
bool
QuorumIntersectionCheckerImpl::networkEnjoysQuorumIntersection() const
{
// First stage: check the graph-level SCCs for disjoint quorums,
// and filter out nodes that aren't in the main SCC.
bool foundDisjoint = false;
size_t nNodes = mPubKeyBitNums.size();
if (!mQuiet)
{
CLOG(INFO, "SCP") << "Calculating " << nNodes
<< "-node network quorum intersection";
}

// First stage: do a single pass over the SCCs searching for one with a
// quorum (on which to focus second stage enumeration); also note and bypass
// second stage exhaustive scan if there are _two_ such SCCs with quorums,
// as they necessarily contain disjoint min-quorums.
bool foundDisjoint = false;
BitSet scanSCC;
for (auto const& scc : mTSC.mSCCs)
{
if (scc == mMaxSCC)
if (auto q = contractToMaximalQuorum(scc))
{
continue;
}
if (auto other = contractToMaximalQuorum(scc))
{
CLOG(DEBUG, "SCP") << "found SCC-disjoint quorum = " << other;
CLOG(DEBUG, "SCP") << "disjoint from quorum = "
<< contractToMaximalQuorum(mMaxSCC);
noteFoundDisjointQuorums(contractToMaximalQuorum(mMaxSCC), other);
foundDisjoint = true;
break;
if (scanSCC.empty())
{
// This is the first SCC with a quorum, we'll make it the
// scan SCC.
scanSCC = scc;
mStats.mScanSCCSize = scanSCC.count();
CLOG(DEBUG, "SCP") << "Found scan SCC: " << scc;
CLOG(DEBUG, "SCP") << "Containing quorum: " << q;
for (size_t i = 0; scanSCC.nextSet(i); ++i)
{
CLOG(DEBUG, "SCP") << "SCC node to scan: " << nodeName(i);
}
}
else
{
CLOG(DEBUG, "SCP") << "Found extra SCC: " << scc;
CLOG(DEBUG, "SCP") << "Containing quorum: " << q;
noteFoundDisjointQuorums(contractToMaximalQuorum(scanSCC), q);
foundDisjoint = true;
break;
}
}
else
{
CLOG(DEBUG, "SCP") << "SCC contains no quorums = " << scc;
for (size_t i = 0; scc.nextSet(i); ++i)
{
CLOG(DEBUG, "SCP") << "Node outside main SCC: " << nodeName(i);
CLOG(DEBUG, "SCP") << "Node outside scan-SCC: " << nodeName(i);
}
}
}
for (size_t i = 0; mMaxSCC.nextSet(i); ++i)
{
CLOG(DEBUG, "SCP") << "Main SCC node: " << nodeName(i);
}

auto q = contractToMaximalQuorum(mMaxSCC);
if (q)
{
CLOG(DEBUG, "SCP") << "Maximal main SCC quorum: " << q;
}
else
if (scanSCC.empty())
{
// We vacuously "enjoy quorum intersection" if there are no quorums,
// though this is probably enough of a potential problem itself that
// it's worth warning about.
if (!mQuiet)
{
CLOG(WARNING, "SCP") << "No quorum found in transitive closure "
CLOG(WARNING, "SCP") << "No quorums found in any SCC "
"(possible network halt)";
}
return true;
}

// Second stage: scan the main SCC powerset, potentially expensive.
// Second stage: scan the scan-SCC powerset, potentially expensive.
if (!foundDisjoint)
{
BitSet committed;
BitSet remaining = mMaxSCC;
MinQuorumEnumerator mqe(committed, remaining, *this);
BitSet remaining = scanSCC;
MinQuorumEnumerator mqe(committed, remaining, scanSCC, *this);
foundDisjoint = mqe.anyMinQuorumHasDisjointQuorum();
mStats.log();
}
Expand Down
15 changes: 10 additions & 5 deletions src/herder/QuorumIntersectionCheckerImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ class MinQuorumEnumerator
// any set enumerated by this enumerator and its children.
BitSet mPerimeter;

// The initial value of mRemaining at the root of the search, representing
// the overall SCC we're considering subsets of.
BitSet const& mScanSCC;

// Checker that owns us, contains state of stats, graph, etc.
QuorumIntersectionCheckerImpl const& mQic;

Expand All @@ -447,8 +451,10 @@ class MinQuorumEnumerator

public:
MinQuorumEnumerator(BitSet const& committed, BitSet const& remaining,
BitSet const& scanSCC,
QuorumIntersectionCheckerImpl const& qic);

bool hasDisjointQuorum(BitSet const& nodes) const;
bool anyMinQuorumHasDisjointQuorum();
};

Expand All @@ -465,7 +471,7 @@ class QuorumIntersectionCheckerImpl : public stellar::QuorumIntersectionChecker
{
size_t mTotalNodes = {0};
size_t mNumSCCs = {0};
size_t mMaxSCC = {0};
size_t mScanSCCSize = {0};
size_t mCallsStarted = {0};
size_t mFirstRecursionsTaken = {0};
size_t mSecondRecursionsTaken = {0};
Expand Down Expand Up @@ -502,10 +508,10 @@ class QuorumIntersectionCheckerImpl : public stellar::QuorumIntersectionChecker
std::unordered_map<stellar::PublicKey, size_t> mPubKeyBitNums;
QGraph mGraph;

// This just calculates SCCs and stores the maximal one, which we use for
// the remainder of the search.
// This just calculates SCCs, from which we extract the first one found with
// a quorum, which (assuming no other SCCs have quorums) we'll use for the
// remainder of the search.
TarjanSCCCalculator mTSC;
BitSet mMaxSCC;

QBitSet convertSCPQuorumSet(stellar::SCPQuorumSet const& sqs);
void buildGraph(stellar::QuorumTracker::QuorumMap const& qmap);
Expand All @@ -516,7 +522,6 @@ class QuorumIntersectionCheckerImpl : public stellar::QuorumIntersectionChecker
BitSet contractToMaximalQuorum(BitSet nodes) const;
bool isAQuorum(BitSet const& nodes) const;
bool isMinimalQuorum(BitSet const& nodes) const;
bool hasDisjointQuorum(BitSet const& nodes) const;
void noteFoundDisjointQuorums(BitSet const& nodes,
BitSet const& disj) const;
std::string nodeName(size_t node) const;
Expand Down
64 changes: 61 additions & 3 deletions src/herder/test/QuorumIntersectionTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ interconnectOrgs(xdr::xvector<xdr::xvector<PublicKey>> const& orgs,
}
if (shouldDepend(i, j))
{
CLOG(DEBUG, "SCP") << "dep: org#" << i << " => org#" << j;
CLOG(DEBUG, "Herder") << "dep: org#" << i << " => org#" << j;
auto& otherOrg = orgs.at(j);
auto thresh = roundUpPct(otherOrg.size(), innerThreshPct);
depOrgs.emplace_back(thresh, otherOrg, emptySet);
Expand Down Expand Up @@ -764,12 +764,12 @@ debugQmap(Config const& cfg, QuorumTracker::QuorumMap const& qm)
LocalNode::toJson(*pair.second, [&cfg](PublicKey const& k) {
return cfg.toShortString(k);
});
CLOG(DEBUG, "SCP")
CLOG(DEBUG, "Herder")
<< "qmap[" << cfg.toShortString(pair.first) << "] = " << str;
}
else
{
CLOG(DEBUG, "SCP")
CLOG(DEBUG, "Herder")
<< "qmap[" << cfg.toShortString(pair.first) << "] = nullptr";
}
}
Expand Down Expand Up @@ -840,3 +840,61 @@ TEST_CASE("quorum intersection criticality",
REQUIRE(groups.size() == 1);
REQUIRE(groups == std::set<std::set<PublicKey>>{{orgs[3][0]}});
}

TEST_CASE("quorum intersection finds smaller SCC with quorums",
"[herder][quorumintersectionsize]")
{
// This test checks that the SCC examined by the quorum intersection
// checker's enumeration phase is "the SCC that actually has quorums", even
// if it's not the largest one.
//
// We test this by manufacturing a large SCC A that contains no quorums and
// a smaller SCC B that contains quorums, and checking that we actually
// found some quorums (meaning: we scanned SCC B). Previously we picked the
// larger SCC no matter what, and this would cause the checker to focus on
// A and see "no quorums, vacuously enjoys intersection".
//
//
// SCC A
//
// org0 <--+
// | |
// v |
// org1 |
// | | SCC B
// v |
// org2 | org7 <-+
// | | ^ |
// | | | |
// v | v |
// org3 | org6 |
// | | ^ |
// | | | |
// v | v |
// org4 ---+---> org5 <-+
//

auto orgs = generateOrgs(8, {1});
auto qm = interconnectOrgsUnidir(orgs,
{
{0, 1},
{1, 2},
{2, 3},
{3, 4},
{4, 0},
{4, 5},
{5, 6},
{6, 5},
{6, 7},
{7, 6},
{7, 5},
{5, 7},
},
/* ownThreshPct=*/100);
Config cfg(getTestConfig());
cfg = configureShortNames(cfg, orgs);
debugQmap(cfg, qm);
auto qic = QuorumIntersectionChecker::create(qm, cfg);
REQUIRE(qic->networkEnjoysQuorumIntersection());
REQUIRE(qic->getMaxQuorumsFound() != 0);
}
6 changes: 6 additions & 0 deletions src/util/BitSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ class BitSet
BitSet(BitSet&& other) = default;
BitSet& operator=(BitSet&& other) = default;

bool
operator!=(BitSet const& other) const
{
return !((*this) == other);
}

bool
operator==(BitSet const& other) const
{
Expand Down