# Block Data Collection

In [40]:
import time
import urllib.request
import gzip

import numpy as np
import pandas as pd

start_date = pd.Timestamp("2022-09-01")
end_date = pd.Timestamp("2022-12-31")
blocks = []

start_time = time.time()
for date in pd.date_range(start_date, end_date):
    url = f"https://gz.blockchair.com/bitcoin/blocks/blockchair_bitcoin_blocks_{date.strftime('%Y%m%d')}.tsv.gz"
    print(date.strftime('%Y%m%d'))

    with urllib.request.urlopen(url) as response:
        with gzip.GzipFile(fileobj=response) as uncompressed:
            df = pd.read_csv(uncompressed, delimiter='\t')
    blocks.append(df)

blocks_df = pd.concat(blocks)
end_time = time.time()

print(f"Time taken: {end_time - start_time:.2f} seconds")

20220901
20220902
20220903
20220904
20220905
20220906
20220907
20220908
20220909
20220910
20220911
20220912
20220913
20220914
20220915
20220916
20220917
20220918
20220919
20220920
20220921
20220922
20220923
20220924
20220925
20220926
20220927
20220928
20220929
20220930
20221001
20221002
20221003
20221004
20221005
20221006
20221007
20221008
20221009
20221010
20221011
20221012
20221013
20221014
20221015
20221016
20221017
20221018
20221019
20221020
20221021
20221022
20221023
20221024
20221025
20221026
20221027
20221028
20221029
20221030
20221031
20221101
20221102
20221103
20221104
20221105
20221106
20221107
20221108
20221109
20221110
20221111
20221112
20221113
20221114
20221115
20221116
20221117
20221118
20221119
20221120
20221121
20221122
20221123
20221124
20221125
20221126
20221127
20221128
20221129
20221130
20221201
20221202
20221203
20221204
20221205
20221206
20221207
20221208
20221209
20221210
20221211
20221212
20221213
20221214
20221215
20221216
20221217
20221218
20221219
20221220
2

In [41]:
blocks_df.head()

Unnamed: 0,id,hash,time,median_time,size,stripped_size,weight,version,version_hex,version_bits,...,fee_per_kb,fee_per_kb_usd,fee_per_kwu,fee_per_kwu_usd,cdd_total,generation,generation_usd,reward,reward_usd,guessed_miner
0,752076,000000000000000000080c5d32b46eb0f76b3d73f2e226...,2022-09-01 00:00:31,2022-08-31 23:18:40,1553233,813251,3992986,1073676288,3fff0000,111111111111110000000000000000,...,9623.808,1.927,3743.8691,0.7497,10909.170413,625000000,125143.75,639945129,128136.2109,Foundry USA Pool
1,752077,0000000000000000000173411bffe1ac604a132ed1e105...,2022-09-01 00:04:05,2022-08-31 23:33:18,1085256,969318,3993210,545259520,20800000,100000100000000000000000000000,...,13023.431,2.6076,3539.4724,0.7086,308.317894,625000000,125143.75,639128208,127972.6406,Unknown
2,752078,000000000000000000055a7e7b72fc0c36ce8efe8a5b19...,2022-09-01 00:09:10,2022-08-31 23:36:53,1366347,875553,3993006,1073676288,3fff0000,111111111111110000000000000000,...,11504.734,2.3036,3936.9604,0.7883,9286.351203,625000000,125143.75,640716008,128290.5625,Foundry USA Pool
3,752079,000000000000000000077cc94bc7f55ddc7ede0c4dc752...,2022-09-01 00:34:21,2022-08-31 23:43:06,1446047,849009,3993074,1073676288,3fff0000,111111111111110000000000000000,...,20450.81,4.0949,7406.521,1.4831,59183.39432,625000000,125143.75,654566699,131063.8906,Foundry USA Pool
4,752080,00000000000000000008f5ce64c347e18f321a30dc9a52...,2022-09-01 00:36:21,2022-08-31 23:47:49,1720548,757428,3992832,611926016,24794000,100100011110010100000000000000,...,1680.234,0.3364,724.09705,0.145,2679.39664,625000000,125143.75,627890436,125722.5,Unknown


# Block Time Variance

## Block Time Calculation

In [51]:
from datetime import datetime


def get_block_times(_blocks_df):
    block_times = list()
    prev_block_time = 0
    for i, block in _blocks_df["time"].iteritems():
        block_time = datetime.strptime(block, "%Y-%m-%d %H:%M:%S")
        if i == 0:
            prev_block_time = block_time
        else:
            curr_block_time = block_time
            block_times.append((curr_block_time - prev_block_time).total_seconds() / 60)
            prev_block_time = curr_block_time
    return block_times

In [52]:
import numpy as np

all_block_times = get_block_times(blocks_df)

np.var(all_block_times)

98.79263658718499

## Chi-Square Test for Block Time Variance

In [53]:
from scipy.stats import chi2
from cmath import sqrt

n = len(all_block_times)
t = (n - 1) * pow(sqrt(np.var(all_block_times)) / 10, 2)
alpha = 0.05
q1 = chi2.ppf(alpha / 2, n - 1)
q2 = chi2.ppf(1 - (alpha / 2), n - 1)
if t <= q1 or t >= q2:
    print("HO rejected")
else:
    print("HO accepted")

HO accepted


The test is accepted. Our model for block mining is approximately correct.

# Miner Block Time Validation

# Block Time Calculation

In [54]:
foundry_usa_block_times = get_block_times(blocks_df[blocks_df["guessed_miner"] == "Foundry USA Pool"].reset_index())
ant_block_times = get_block_times(blocks_df[blocks_df["guessed_miner"] == "AntPool"].reset_index())

In [None]:
from statsmodels.stats.diagnostic import lilliefors

_, p_value = lilliefors(foundry_usa_block_times, "exp")
if p_value < alpha:
    print("HO rejected")
else:
    print("HO accepted")

In [None]:
_, p_value = lilliefors(ant_block_times, "exp")
if p_value < alpha:
    print("HO rejected")
else:
    print("HO accepted")

The distribution of block time for these pools is indeed exponential.