forked from dashpay/dash
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test: Add test_framework/bdb.py module for inspecting bdb files
For upgrade tests and possibly other tests, it is useful to inspect the bdb file for the wallet (i.e. the wallet.dat file). test_framework/bdb.py is an implementation of bdb file deserialization specific for Bitcoin Core's usage.
- Loading branch information
Showing
1 changed file
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright (c) 2020 The Bitcoin Core developers | ||
# Distributed under the MIT software license, see the accompanying | ||
# file COPYING or http://www.opensource.org/licenses/mit-license.php. | ||
""" | ||
Utilities for working directly with the wallet's BDB database file | ||
This is specific to the configuration of BDB used in this project: | ||
- pagesize: 4096 bytes | ||
- Outer database contains single subdatabase named 'main' | ||
- btree | ||
- btree leaf pages | ||
Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows | ||
is the value. And so on. Note that the entry data is itself not in the correct order. Instead | ||
entry offsets are stored in the correct order and those offsets are needed to then retrieve | ||
the data itself. | ||
Page format can be found in BDB source code dbinc/db_page.h | ||
This only implements the deserialization of btree metadata pages and normal btree pages. Overflow | ||
pages are not implemented but may be needed in the future if dealing with wallets with large | ||
transactions. | ||
`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file | ||
""" | ||
|
||
import binascii | ||
import struct | ||
|
||
# Important constants | ||
PAGESIZE = 4096 | ||
OUTER_META_PAGE = 0 | ||
INNER_META_PAGE = 2 | ||
|
||
# Page type values | ||
BTREE_INTERNAL = 3 | ||
BTREE_LEAF = 5 | ||
BTREE_META = 9 | ||
|
||
# Some magic numbers for sanity checking | ||
BTREE_MAGIC = 0x053162 | ||
DB_VERSION = 9 | ||
|
||
# Deserializes a leaf page into a dict. | ||
# Btree internal pages have the same header, for those, return None. | ||
# For the btree leaf pages, deserialize them and put all the data into a dict | ||
def dump_leaf_page(data): | ||
page_info = {} | ||
page_header = data[0:26] | ||
_, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header) | ||
page_info['pgno'] = pgno | ||
page_info['prev_pgno'] = prev_pgno | ||
page_info['next_pgno'] = next_pgno | ||
page_info['entries'] = entries | ||
page_info['hf_offset'] = hf_offset | ||
page_info['level'] = level | ||
page_info['pg_type'] = pg_type | ||
page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2]) | ||
page_info['entries'] = [] | ||
|
||
if pg_type == BTREE_INTERNAL: | ||
# Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us | ||
return None | ||
|
||
assert pg_type == BTREE_LEAF, 'A non-btree leaf page has been encountered while dumping leaves' | ||
|
||
for i in range(0, entries): | ||
offset = page_info['entry_offsets'][i] | ||
entry = {'offset': offset} | ||
page_data_header = data[offset:offset + 3] | ||
e_len, pg_type = struct.unpack('HB', page_data_header) | ||
entry['len'] = e_len | ||
entry['pg_type'] = pg_type | ||
entry['data'] = data[offset + 3:offset + 3 + e_len] | ||
page_info['entries'].append(entry) | ||
|
||
return page_info | ||
|
||
# Deserializes a btree metadata page into a dict. | ||
# Does a simple sanity check on the magic value, type, and version | ||
def dump_meta_page(page): | ||
# metadata page | ||
# general metadata | ||
metadata = {} | ||
meta_page = page[0:72] | ||
_, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page) | ||
metadata['pgno'] = pgno | ||
metadata['magic'] = magic | ||
metadata['version'] = version | ||
metadata['pagesize'] = pagesize | ||
metadata['encrypt_alg'] = encrypt_alg | ||
metadata['pg_type'] = pg_type | ||
metadata['metaflags'] = metaflags | ||
metadata['free'] = free | ||
metadata['last_pgno'] = last_pgno | ||
metadata['nparts'] = nparts | ||
metadata['key_count'] = key_count | ||
metadata['record_count'] = record_count | ||
metadata['flags'] = flags | ||
metadata['uid'] = binascii.hexlify(uid) | ||
|
||
assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic' | ||
assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page' | ||
assert version == DB_VERSION, 'Database too new' | ||
|
||
# btree metadata | ||
btree_meta_page = page[72:512] | ||
_, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page) | ||
metadata['minkey'] = minkey | ||
metadata['re_len'] = re_len | ||
metadata['re_pad'] = re_pad | ||
metadata['root'] = root | ||
metadata['crypto_magic'] = crypto_magic | ||
metadata['iv'] = binascii.hexlify(iv) | ||
metadata['chksum'] = binascii.hexlify(chksum) | ||
return metadata | ||
|
||
# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict | ||
def extract_kv_pairs(page_data): | ||
out = {} | ||
last_key = None | ||
for i, entry in enumerate(page_data['entries']): | ||
# By virtue of these all being pairs, even number entries are keys, and odd are values | ||
if i % 2 == 0: | ||
out[entry['data']] = b'' | ||
last_key = entry['data'] | ||
else: | ||
out[last_key] = entry['data'] | ||
return out | ||
|
||
# Extract the key-value pairs of the BDB file given in filename | ||
def dump_bdb_kv(filename): | ||
# Read in the BDB file and start deserializing it | ||
pages = [] | ||
with open(filename, 'rb') as f: | ||
data = f.read(PAGESIZE) | ||
while len(data) > 0: | ||
pages.append(data) | ||
data = f.read(PAGESIZE) | ||
|
||
# Sanity check the meta pages | ||
dump_meta_page(pages[OUTER_META_PAGE]) | ||
dump_meta_page(pages[INNER_META_PAGE]) | ||
|
||
# Fetch the kv pairs from the leaf pages | ||
kv = {} | ||
for i in range(3, len(pages)): | ||
info = dump_leaf_page(pages[i]) | ||
if info is not None: | ||
info_kv = extract_kv_pairs(info) | ||
kv = {**kv, **info_kv} | ||
return kv |