In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import requests
import time
import tqdm
from datetime import datetime
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['axes.titlesize'] = 18

Finding the probability that given a 20-byte Ethereum EOA, when you line up the letters into 3 rows of 14, there are exactly 5 matches.

Eg., 

```
0x3d84a438Af72
F6396785EEa97B
32F903520e36C8
```
we have the distribution $X \sim Bin(26, 1/16)$. So $P(X = 5) = \binom{26}{5} \left(\frac{1}{16}\right)^5 \left(\frac{15}{16}\right)^{21}.$

$$
\begin{align}
P(X=5) &= \frac{82025770389239788055419921875}{5070602400912917605986812821504} \\
&\approx 0.0161767
\end{align}
$$

In [27]:
def getMatches(inp):
    row1 = inp[:12]
    row2 = inp[12:26]
    row3 = inp[26:]

    return np.sum(row1 == row2[2:]) + np.sum(row2 == row3)

In [32]:
inp = '0x3d84a438Af72F6396785EEa97B32F903520e36C8'
inp = np.asarray([int(inp[i:i+1],16) for i in range(2, len(inp))])

getMatches(inp)

5

In [60]:
rnd = np.random.randint(16, size=40)
rnd

array([ 5, 13,  0,  3,  9, 15, 15, 13,  5,  9,  5,  5,  0,  3,  5,  3, 13,
       12, 15,  8, 15,  5,  0,  6,  9,  8,  2,  3, 14,  5,  0,  0,  2, 14,
        7,  5, 13,  8, 10,  3])

In [61]:
getMatches(rnd)

4

In [70]:
n = int(1e6)
res = dict.fromkeys(range(20),0)
for _ in tqdm.tqdm(range(n)):
    rnd = np.random.randint(16, size=40)
    matches = getMatches(rnd)
    res[matches] += 1

100%|██████████| 1000000/1000000 [00:30<00:00, 32997.65it/s]


In [71]:
res[5] /  n

0.01644