Skip to content

Commit

Permalink
Merge pull request #1179 from PetholzA/feature/20240131_generators_rmat
Browse files Browse the repository at this point in the history
improves rmat generator + adds tests
  • Loading branch information
fabratu committed May 23, 2024
2 parents 39467ba + 035cc17 commit 3e7e940
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 40 deletions.
18 changes: 16 additions & 2 deletions include/networkit/generators/RmatGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef NETWORKIT_GENERATORS_RMAT_GENERATOR_HPP_
#define NETWORKIT_GENERATORS_RMAT_GENERATOR_HPP_

#include <stdint.h>
#include <networkit/generators/StaticGraphGenerator.hpp>
#include <networkit/graph/Graph.hpp>

Expand All @@ -24,10 +25,19 @@ namespace NetworKit {
class RmatGenerator final : public StaticGraphGenerator {
count scale; ///< n = 2^scale
count edgeFactor;
double a, b, c; ///< probabilities
double defaultEdgeWeight;
bool weighted;
count reduceNodes;
bool discardSelfLoops;

// Data for the alias table:
std::vector<std::pair<uint32_t, uint32_t>> bits;
std::vector<uint8_t> numberOfBits;
std::vector<uint32_t> coinFlipProbability;
std::vector<uint32_t> coinFlipReplacement;
uint32_t mask;
std::pair<uint32_t, uint32_t> curBits{0, 0};
uint32_t remainingBits = 0;

public:
/**
Expand All @@ -39,14 +49,18 @@ class RmatGenerator final : public StaticGraphGenerator {
* @param[in] d Probability for quadrant lower right
* @param[in] weighted result graph weighted?
* @param[in] reduceNodes number of random nodes to delete to achieve a given node count
* @param[in] discardSelfLoops ignore self loops
*/
RmatGenerator(count scale, count edgeFactor, double a, double b, double c, double d,
bool weighted = false, count reduceNodes = 0);
bool weighted = false, count reduceNodes = 0, bool discardSelfLoops = true);

/**
* @return Graph to be generated according to parameters specified in constructor.
*/
Graph generate() override;

private:
std::pair<node, node> sampleEdge(uint8_t bits);
};

} /* namespace NetworKit */
Expand Down
203 changes: 165 additions & 38 deletions networkit/cpp/generators/RmatGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,169 @@
*
* Created on: 18.03.2014
* Author: Henning, cls
*
* Uses the algorithm described by Hübschle-Schneider and Sanders in
* "Linear Work Generation of R-MAT Graphs" https://arxiv.org/abs/1905.03525
*/

#include <networkit/auxiliary/Log.hpp>
#include <networkit/auxiliary/NumericTools.hpp>
#include <networkit/auxiliary/PrioQueue.hpp>
#include <networkit/auxiliary/Random.hpp>
#include <networkit/generators/RmatGenerator.hpp>

namespace NetworKit {

struct Entry {
uint32_t i;
uint32_t j;
uint32_t numberOfBits;
double probability;
};

bool priorityQueueCompare(const Entry &a, const Entry &b) {
if (a.numberOfBits == 32 || b.numberOfBits == 32) {
return a.numberOfBits > b.numberOfBits;
}
return a.probability < b.probability;
}

// std::priority_queue doesn't support iterating over its elements.
struct PriorityQueue {
std::vector<Entry> &internal;

PriorityQueue(std::vector<Entry> &vec) : internal(vec) {}

void push(Entry &&entry) {
internal.push_back(entry);
std::push_heap(internal.begin(), internal.end(), priorityQueueCompare);
}

Entry pop() {
std::pop_heap(internal.begin(), internal.end(), priorityQueueCompare);
Entry result = internal.back();
internal.pop_back();
return result;
}
};

void generateEntries(std::vector<Entry> &entryList, count size, double a, double b, double c,
double d) {
entryList.reserve(size);
PriorityQueue priorityQueue(entryList);
priorityQueue.push(Entry{0, 0, 1, a});
priorityQueue.push(Entry{0, 1, 1, b});
priorityQueue.push(Entry{1, 0, 1, c});
priorityQueue.push(Entry{1, 1, 1, d});
while (entryList.size() <= size - 3) {
// Take the entry with the highest probability and split it up.
// This increases the average number of bits compared to a uniform distribution.
Entry old = priorityQueue.pop();
priorityQueue.push(
Entry{old.i << 1 | 0, old.j << 1 | 0, old.numberOfBits + 1, a * old.probability});
priorityQueue.push(
Entry{old.i << 1 | 0, old.j << 1 | 1, old.numberOfBits + 1, b * old.probability});
priorityQueue.push(
Entry{old.i << 1 | 1, old.j << 1 | 0, old.numberOfBits + 1, c * old.probability});
priorityQueue.push(
Entry{old.i << 1 | 1, old.j << 1 | 1, old.numberOfBits + 1, d * old.probability});
}
}

RmatGenerator::RmatGenerator(count scale, count edgeFactor, double a, double b, double c, double d,
bool weighted, count reduceNodes)
: scale(scale), edgeFactor(edgeFactor), a(a), b(b), c(c), weighted(weighted),
reduceNodes(reduceNodes) {
bool weighted, count reduceNodes, bool discardSelfLoops)
: scale(scale), edgeFactor(edgeFactor), weighted(weighted), reduceNodes(reduceNodes),
discardSelfLoops(discardSelfLoops) {
if (scale > 63)
throw std::runtime_error("Cannot generate more than 2^63 nodes");
double sum = a + b + c + d;
INFO("sum of probabilities: ", sum);
if (!Aux::NumericTools::equal(sum, 1.0, 0.0001))
throw std::runtime_error("Probabilities in Rmat have to sum to 1.");
defaultEdgeWeight = 1.0;

count size = 1 << std::min(static_cast<count>(12),
static_cast<count>(std::round(scale * 5.0 / 8.0) + 1));

assert((size & (size - 1)) == 0); // The size must be a power of two.
std::vector<Entry> entryList;
generateEntries(entryList, size, a, b, c, d);
while (entryList.size() < size) { // Fill the remaining entries with 0-probability entries.
entryList.push_back(Entry{0, 0, 0, 0});
}

// Construct the alias table:
mask = size - 1;
bits.resize(size);
numberOfBits.resize(size);
coinFlipProbability.resize(size);
coinFlipReplacement.resize(size);
for (count i = 0; i < size; i++) {
bits[i] = std::make_pair(entryList[i].i, entryList[i].j);
numberOfBits[i] = entryList[i].numberOfBits;
coinFlipProbability[i] = 0;
coinFlipReplacement[i] = 0;
}
double baseProbability = 1.0 / static_cast<double>(size);
count lastOverfullIndex = 0;
count lastUnderfullIndex = 0;
while (lastUnderfullIndex != size && lastOverfullIndex != size) {
while (entryList[lastUnderfullIndex].probability >= baseProbability) {
lastUnderfullIndex++;
if (lastUnderfullIndex == size)
return;
}
int curUnderfullIndex = lastUnderfullIndex;
while (lastOverfullIndex != size) {
while (entryList[lastOverfullIndex].probability <= baseProbability) {
lastOverfullIndex++;
if (lastOverfullIndex == size)
return;
}
double delta = baseProbability - entryList[curUnderfullIndex].probability;
entryList[curUnderfullIndex].probability = baseProbability;
entryList[lastOverfullIndex].probability -= delta;
coinFlipReplacement[curUnderfullIndex] = lastOverfullIndex;
coinFlipProbability[curUnderfullIndex] = static_cast<uint32_t>(
delta / baseProbability * std::numeric_limits<uint32_t>::max());
if (entryList[lastOverfullIndex].probability < baseProbability
&& lastOverfullIndex < lastUnderfullIndex) {
curUnderfullIndex = lastOverfullIndex;
} else
break;
}
}
}

std::pair<node, node> RmatGenerator::sampleEdge(uint8_t input_bits) {
std::pair<node, node> result{0, 0};

auto sample = [this]() {
uint64_t randomNumber = Aux::Random::integer();

uint32_t index = randomNumber & mask;
uint32_t coinFlip = randomNumber >> 32;
if (coinFlip <= coinFlipProbability[index]) {
index = coinFlipReplacement[index];
}
curBits = bits[index];
remainingBits = numberOfBits[index];
};

do {
if (remainingBits >= input_bits) {
remainingBits -= input_bits;
result.first = result.first << input_bits | curBits.first >> remainingBits;
result.second = result.second << input_bits | curBits.second >> remainingBits;
curBits.first &= (1 << remainingBits) - 1;
curBits.second &= (1 << remainingBits) - 1;
return result;
}
result.first = result.first << remainingBits | curBits.first;
result.second = result.second << remainingBits | curBits.second;
input_bits -= remainingBits;
sample();
} while (true);
}

Graph RmatGenerator::generate() {
Expand All @@ -34,36 +177,11 @@ Graph RmatGenerator::generate() {
count numEdges = n * edgeFactor * n * 1.0 / static_cast<double>(n - reduceNodes);
count wantedEdges = (n - reduceNodes) * edgeFactor;
Graph G(n - reduceNodes, weighted);
double ab = a + b;
double abc = ab + c;

auto quadrant([&]() {
double r = Aux::Random::probability();
TRACE("r: ", r);

if (r <= a) {
return 0;
} else if (r <= ab) {
return 1;
} else if (r <= abc) {
return 2;
} else
return 3;
});

auto drawEdge([&]() {
node u = 0;
node v = 0;
for (index i = 0; i < scale; ++i) {
count q = quadrant();
u = u << 1;
v = v << 1;
u = u | (q >> 1);
v = v | (q & 1);
}
// Reset the internal state of the alias table:
curBits = {0, 0};
remainingBits = 0;

return std::make_pair(u, v);
});
Aux::Random::setSeed(Aux::Random::integer(), false);

if (reduceNodes > 0) {
std::vector<node> nodemap(n, 0);
Expand All @@ -86,33 +204,42 @@ Graph RmatGenerator::generate() {
node u, v;
if (weighted) {
for (index e = 0; e < numEdges; ++e) {
std::tie(u, v) = drawEdge();
std::tie(u, v) = sampleEdge(static_cast<uint8_t>(scale));
u = nodemap[u];
v = nodemap[v];
if (discardSelfLoops && u == v)
continue;
if (u != none && v != none) {
G.increaseWeight(u, v, defaultEdgeWeight);
}
}
} else {
while (G.numberOfEdges() < wantedEdges) {
std::tie(u, v) = drawEdge();
std::tie(u, v) = sampleEdge(static_cast<uint8_t>(scale));
u = nodemap[u];
v = nodemap[v];
if (u != none && v != none && u != v && !G.hasEdge(u, v)) {
if (discardSelfLoops && u == v)
continue;
if (u != none && v != none && !G.hasEdge(u, v)) {
G.addEdge(u, v);
}
}
}
} else {
if (weighted) {
for (index e = 0; e < numEdges; ++e) {
std::pair<node, node> drawnEdge = drawEdge();
std::pair<node, node> drawnEdge = sampleEdge(static_cast<uint8_t>(scale));
if (discardSelfLoops && drawnEdge.first == drawnEdge.second)
continue;
G.increaseWeight(drawnEdge.first, drawnEdge.second, defaultEdgeWeight);
}
} else {
while (G.numberOfEdges() < wantedEdges) {
std::pair<node, node> drawnEdge = drawEdge();
if (!G.hasEdge(drawnEdge.first, drawnEdge.second)) {
std::pair<node, node> drawnEdge = sampleEdge(static_cast<uint8_t>(scale));
if (discardSelfLoops && drawnEdge.first == drawnEdge.second)
continue;
if (!G.hasEdge(drawnEdge.first, drawnEdge.second) && G.hasNode(drawnEdge.first)
&& G.hasNode(drawnEdge.second)) {
G.addEdge(drawnEdge.first, drawnEdge.second);
}
}
Expand Down
34 changes: 34 additions & 0 deletions networkit/cpp/generators/test/GeneratorsGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,40 @@ TEST_F(GeneratorsGTest, testRmatGenerator) {
EXPECT_TRUE(G.checkConsistency());
}

TEST_F(GeneratorsGTest, testRmatGeneratorDistribution) {
constexpr count scale = 2;
constexpr count n = (1 << scale);
count edgeFactor = 1;
double a = 0.51;
double b = 0.12;
double c = 0.12;
double d = 0.25;
double edgeExpectedProbability[n][n] = {
{0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00},
{2.242834007823646e-01, 0.0e+00, 0.0e+00, 0.0e+00},
{2.242834007823646e-01, 1.0219127659785153e-01, 0.0e+00, 0.0e+00},
{1.0219127659785153e-01, 1.7352532261978387e-01, 1.7352532261978387e-01, 0.0e+00},
};

RmatGenerator rmat(scale, edgeFactor, a, b, c, d, false, 0, true);
count edgeCount[n][n]{{0}};
count totalEdges = 0;
// Now we generate a bunch of graphs and count the edges.
for (index k = 0; k < 1000; k++) {
Graph G = rmat.generate();
G.forEdges([&edgeCount, &totalEdges](node u, node v) {
edgeCount[u][v] += 1;
totalEdges += 1;
});
}
for (index i = 0; i < n; ++i) {
for (index j = 0; j < n; ++j) {
EXPECT_NEAR((static_cast<double>(edgeCount[i][j]) / static_cast<double>(totalEdges)),
edgeExpectedProbability[i][j], 0.01);
}
}
}

TEST_F(GeneratorsGTest, testRmatGeneratorReduceNodes) {
count scale = 9;
count n = (1 << scale);
Expand Down

0 comments on commit 3e7e940

Please sign in to comment.