Skip to content

Commit 78c2165

Browse files
committed
Add CPartialMerkleTree
This adds a compact representation for a subset of a merkle tree's nodes.
1 parent 86406da commit 78c2165

File tree

2 files changed

+290
-0
lines changed

2 files changed

+290
-0
lines changed

src/main.h

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,11 +1023,203 @@ class CMerkleTx : public CTransaction
10231023

10241024

10251025

1026+
/** Data structure that represents a partial merkle tree.
1027+
*
1028+
* It respresents a subset of the txid's of a known block, in a way that
1029+
* allows recovery of the list of txid's and the merkle root, in an
1030+
* authenticated way.
1031+
*
1032+
* The encoding works as follows: we traverse the tree in depth-first order,
1033+
* storing a bit for each traversed node, signifying whether the node is the
1034+
* parent of at least one matched leaf txid (or a matched txid itself). In
1035+
* case we are at the leaf level, or this bit is 0, its merkle node hash is
1036+
* stored, and its children are not explorer further. Otherwise, no hash is
1037+
* stored, but we recurse into both (or the only) child branch. During
1038+
* decoding, the same depth-first traversal is performed, consuming bits and
1039+
* hashes as they written during encoding.
1040+
*
1041+
* The serialization is fixed and provides a hard guarantee about the
1042+
* encoded size:
1043+
*
1044+
* SIZE <= 10 + ceil(32.25*N)
1045+
*
1046+
* Where N represents the number of leaf nodes of the partial tree. N itself
1047+
* is bounded by:
1048+
*
1049+
* N <= total_transactions
1050+
* N <= 1 + matched_transactions*tree_height
1051+
*
1052+
* The serialization format:
1053+
* - uint32 total_transactions (4 bytes)
1054+
* - varint number of hashes (1-3 bytes)
1055+
* - uint256[] hashes in depth-first order (<= 32*N bytes)
1056+
* - varint number of bytes of flag bits (1-3 bytes)
1057+
* - byte[] flag bits, packed per 8 in a byte, least significant bit first (<= 2*N-1 bits)
1058+
* The size constraints follow from this.
1059+
*/
1060+
class CPartialMerkleTree
1061+
{
1062+
protected:
1063+
// the total number of transactions in the block
1064+
unsigned int nTransactions;
10261065

1066+
// node-is-parent-of-matched-txid bits
1067+
std::vector<bool> vBits;
10271068

1069+
// txids and internal hashes
1070+
std::vector<uint256> vHash;
10281071

1072+
// flag set when encountering invalid data
1073+
bool fBad;
10291074

1075+
// helper function to efficiently calculate the number of nodes at given height in the merkle tree
1076+
unsigned int CalcTreeWidth(int height) {
1077+
return (nTransactions+(1 << height)-1) >> height;
1078+
}
10301079

1080+
// calculate the hash of a node in the merkle tree (at leaf level: the txid's themself)
1081+
uint256 CalcHash(int height, unsigned int pos, const std::vector<uint256> &vTxid) {
1082+
if (height == 0) {
1083+
// hash at height 0 is the txids themself
1084+
return vTxid[pos];
1085+
} else {
1086+
// calculate left hash
1087+
uint256 left = CalcHash(height-1, pos*2, vTxid), right;
1088+
// calculate right hash if not beyong the end of the array - copy left hash otherwise1
1089+
if (pos*2+1 < CalcTreeWidth(height-1))
1090+
right = CalcHash(height-1, pos*2+1, vTxid);
1091+
else
1092+
right = left;
1093+
// combine subhashes
1094+
return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
1095+
}
1096+
}
1097+
1098+
// recursive function that traverses tree nodes, storing the data as bits and hashes
1099+
void TraverseAndBuild(int height, unsigned int pos, const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) {
1100+
// determine whether this node is the parent of at least one matched txid
1101+
bool fParentOfMatch = false;
1102+
for (unsigned int p = pos << height; p < (pos+1) << height && p < nTransactions; p++)
1103+
fParentOfMatch |= vMatch[p];
1104+
// store as flag bit
1105+
vBits.push_back(fParentOfMatch);
1106+
if (height==0 || !fParentOfMatch) {
1107+
// if at height 0, or nothing interesting below, store hash and stop
1108+
vHash.push_back(CalcHash(height, pos, vTxid));
1109+
} else {
1110+
// otherwise, don't store any hash, but descend into the subtrees
1111+
TraverseAndBuild(height-1, pos*2, vTxid, vMatch);
1112+
if (pos*2+1 < CalcTreeWidth(height-1))
1113+
TraverseAndBuild(height-1, pos*2+1, vTxid, vMatch);
1114+
}
1115+
}
1116+
1117+
// recursive function that traverses tree nodes, consuming the bits and hashes produced by TraverseAndBuild.
1118+
// it returns the hash of the respective node.
1119+
uint256 TraverseAndExtract(int height, unsigned int pos, unsigned int &nBitsUsed, unsigned int &nHashUsed, std::vector<uint256> &vMatch) {
1120+
if (nBitsUsed >= vBits.size()) {
1121+
// overflowed the bits array - failure
1122+
fBad = true;
1123+
return 0;
1124+
}
1125+
bool fParentOfMatch = vBits[nBitsUsed++];
1126+
if (height==0 || !fParentOfMatch) {
1127+
// if at height 0, or nothing interesting below, use stored hash and do not descend
1128+
if (nHashUsed >= vHash.size()) {
1129+
// overflowed the hash array - failure
1130+
fBad = true;
1131+
return 0;
1132+
}
1133+
const uint256 &hash = vHash[nHashUsed++];
1134+
if (height==0 && fParentOfMatch) // in case of height 0, we have a matched txid
1135+
vMatch.push_back(hash);
1136+
return hash;
1137+
} else {
1138+
// otherise, descend into the subtrees to extract matched txids and hashes
1139+
uint256 left = TraverseAndExtract(height-1, pos*2, nBitsUsed, nHashUsed, vMatch), right;
1140+
if (pos*2+1 < CalcTreeWidth(height-1))
1141+
right = TraverseAndExtract(height-1, pos*2+1, nBitsUsed, nHashUsed, vMatch);
1142+
else
1143+
right = left;
1144+
// and combine them before returning
1145+
return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
1146+
}
1147+
}
1148+
1149+
public:
1150+
1151+
// serialization implementation
1152+
IMPLEMENT_SERIALIZE(
1153+
READWRITE(nTransactions);
1154+
READWRITE(vHash);
1155+
std::vector<unsigned char> vBytes;
1156+
if (fRead) {
1157+
READWRITE(vBytes);
1158+
CPartialMerkleTree &us = *(const_cast<CPartialMerkleTree*>(this));
1159+
us.vBits.resize(vBytes.size() * 8);
1160+
for (unsigned int p = 0; p < us.vBits.size(); p++)
1161+
us.vBits[p] = (vBytes[p / 8] & (1 << (p % 8))) != 0;
1162+
us.fBad = false;
1163+
} else {
1164+
vBytes.resize((vBits.size()+7)/8);
1165+
for (unsigned int p = 0; p < vBits.size(); p++)
1166+
vBytes[p / 8] |= vBits[p] << (p % 8);
1167+
READWRITE(vBytes);
1168+
}
1169+
)
1170+
1171+
// Construct a partial merkle tree from a list of transaction id's, and a mask that selects a subset of them
1172+
CPartialMerkleTree(const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) : nTransactions(vTxid.size()), fBad(false) {
1173+
// reset state
1174+
vBits.clear();
1175+
vHash.clear();
1176+
1177+
// calculate height of tree
1178+
int nHeight = 0;
1179+
while (CalcTreeWidth(nHeight) > 1)
1180+
nHeight++;
1181+
1182+
// traverse the partial tree
1183+
TraverseAndBuild(nHeight, 0, vTxid, vMatch);
1184+
}
1185+
1186+
CPartialMerkleTree() : nTransactions(0), fBad(true) {}
1187+
1188+
// extract the matching txid's represented by this partial merkle tree.
1189+
// returns the merkle root, or 0 in case of failure
1190+
uint256 ExtractMatches(std::vector<uint256> &vMatch) {
1191+
vMatch.clear();
1192+
// An empty set will not work
1193+
if (nTransactions == 0)
1194+
return 0;
1195+
// check for excessively high numbers of transactions
1196+
if (nTransactions > MAX_BLOCK_SIZE / 60) // 60 is the lower bound for the size of a serialized CTransaction
1197+
return 0;
1198+
// there can never be more hashes provided than one for every txid
1199+
if (vHash.size() > nTransactions)
1200+
return 0;
1201+
// there must be at least one bit per node in the partial tree, and at least one node per hash
1202+
if (vBits.size() < vHash.size())
1203+
return 0;
1204+
// calculate height of tree
1205+
int nHeight = 0;
1206+
while (CalcTreeWidth(nHeight) > 1)
1207+
nHeight++;
1208+
// traverse the partial tree
1209+
unsigned int nBitsUsed = 0, nHashUsed = 0;
1210+
uint256 hashMerkleRoot = TraverseAndExtract(nHeight, 0, nBitsUsed, nHashUsed, vMatch);
1211+
// verify that no problems occured during the tree traversal
1212+
if (fBad)
1213+
return 0;
1214+
// verify that all bits were consumed (except for the padding caused by serializing it as a byte sequence)
1215+
if ((nBitsUsed+7)/8 != (vBits.size()+7)/8)
1216+
return 0;
1217+
// verify that all hashes were consumed
1218+
if (nHashUsed != vHash.size())
1219+
return 0;
1220+
return hashMerkleRoot;
1221+
}
1222+
};
10311223

10321224

10331225
/** Nodes collect new transactions into a block, hash them into a hash tree,

src/test/pmt_tests.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#include <boost/test/unit_test.hpp>
2+
3+
#include "uint256.h"
4+
#include "main.h"
5+
6+
using namespace std;
7+
8+
class CPartialMerkleTreeTester : public CPartialMerkleTree
9+
{
10+
public:
11+
// flip one bit in one of the hashes - this should break the authentication
12+
void Damage() {
13+
unsigned int n = rand() % vHash.size();
14+
int bit = rand() % 256;
15+
uint256 &hash = vHash[n];
16+
hash ^= ((uint256)1 << bit);
17+
}
18+
};
19+
20+
BOOST_AUTO_TEST_SUITE(pmt_tests)
21+
22+
BOOST_AUTO_TEST_CASE(pmt_test1)
23+
{
24+
static const unsigned int nTxCounts[] = {1, 4, 7, 17, 56, 100, 127, 256, 312, 513, 1000, 4095};
25+
26+
for (int n = 0; n < 12; n++) {
27+
unsigned int nTx = nTxCounts[n];
28+
29+
// build a block with some dummy transactions
30+
CBlock block;
31+
for (unsigned int j=0; j<nTx; j++) {
32+
CTransaction tx;
33+
tx.nLockTime = rand(); // actual transaction data doesn't matter; just make the nLockTime's unique
34+
block.vtx.push_back(tx);
35+
}
36+
37+
// calculate actual merkle root and height
38+
uint256 merkleRoot1 = block.BuildMerkleTree();
39+
std::vector<uint256> vTxid(nTx, 0);
40+
for (unsigned int j=0; j<nTx; j++)
41+
vTxid[j] = block.vtx[j].GetHash();
42+
int nHeight = 1, nTx_ = nTx;
43+
while (nTx_ > 1) {
44+
nTx_ = (nTx_+1)/2;
45+
nHeight++;
46+
}
47+
48+
// check with random subsets with inclusion chances 1, 1/2, 1/4, ..., 1/128
49+
for (int att = 1; att < 15; att++) {
50+
// build random subset of txid's
51+
std::vector<bool> vMatch(nTx, false);
52+
std::vector<uint256> vMatchTxid1;
53+
for (unsigned int j=0; j<nTx; j++) {
54+
bool fInclude = (rand() & ((1 << (att/2)) - 1)) == 0;
55+
vMatch[j] = fInclude;
56+
if (fInclude)
57+
vMatchTxid1.push_back(vTxid[j]);
58+
}
59+
60+
// build the partial merkle tree
61+
CPartialMerkleTree pmt1(vTxid, vMatch);
62+
63+
// serialize
64+
CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
65+
ss << pmt1;
66+
67+
// verify CPartialMerkleTree's size guarantees
68+
unsigned int n = std::min<unsigned int>(nTx, 1 + vMatchTxid1.size()*nHeight);
69+
BOOST_CHECK(ss.size() <= 10 + (258*n+7)/8);
70+
71+
// deserialize into a tester copy
72+
CPartialMerkleTreeTester pmt2;
73+
ss >> pmt2;
74+
75+
// extract merkle root and matched txids from copy
76+
std::vector<uint256> vMatchTxid2;
77+
uint256 merkleRoot2 = pmt2.ExtractMatches(vMatchTxid2);
78+
79+
// check that it has the same merkle root as the original, and a valid one
80+
BOOST_CHECK(merkleRoot1 == merkleRoot2);
81+
BOOST_CHECK(merkleRoot2 != 0);
82+
83+
// check that it contains the matched transactions (in the same order!)
84+
BOOST_CHECK(vMatchTxid1 == vMatchTxid2);
85+
86+
// check that random bit flips break the authentication
87+
for (int j=0; j<4; j++) {
88+
CPartialMerkleTreeTester pmt3(pmt2);
89+
pmt3.Damage();
90+
std::vector<uint256> vMatchTxid3;
91+
uint256 merkleRoot3 = pmt3.ExtractMatches(vMatchTxid3);
92+
BOOST_CHECK(merkleRoot3 != merkleRoot1);
93+
}
94+
}
95+
}
96+
}
97+
98+
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)