In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import requests
import time
import tqdm
from datetime import datetime
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['axes.titlesize'] = 18

Finding the probability that given a 20-byte Ethereum EOA, when you line up the letters into 3 rows of 14, there are exactly 5 matches.

Eg., 

```
0x3d84a438Af72
F6396785EEa97B
32F903520e36C8
```
we have the distribution $X \sim Bin(26, 1/16)$. So $P(X = 5) = \binom{26}{5} \left(\frac{1}{16}\right)^5 \left(\frac{15}{16}\right)^{21}.$

$$
\begin{align}
P(X=5) &= \frac{82025770389239788055419921875}{5070602400912917605986812821504} \\
&\approx 0.0161767
\end{align}
$$

In [2]:
def getMatches(inp):
    row1 = inp[:12]
    row2 = inp[12:26]
    row3 = inp[26:]

    return np.sum(row1 == row2[2:]) + np.sum(row2 == row3)

In [3]:
inp = '0x3d84a438Af72F6396785EEa97B32F903520e36C8'
inp = np.asarray([int(inp[i:i+1],16) for i in range(2, len(inp))])

getMatches(inp)

5

In [4]:
rnd = np.random.randint(16, size=40)
rnd

array([ 9,  2,  6, 10, 12, 10,  7,  8, 12,  8, 15, 10,  7,  0,  2,  2,  6,
        9,  7, 11,  8, 10,  3,  7,  1, 14, 14, 11, 15,  1,  1,  5, 15, 12,
       12,  8,  6,  7,  9,  1])

In [5]:
getMatches(rnd)

3

In [19]:
for i in range(5):
    n = int(1e5)
    res = dict.fromkeys(range(20),0)
    for _ in tqdm.tqdm(range(n)):
        rnd = np.random.randint(16, size=40)
        matches = getMatches(rnd)
        res[matches] += 1
    print(res[5] / n)

100%|██████████| 100000/100000 [00:03<00:00, 32010.84it/s]
  6%|▌         | 5813/100000 [00:00<00:03, 27672.10it/s]

0.01682


100%|██████████| 100000/100000 [00:03<00:00, 33085.18it/s]
  7%|▋         | 6933/100000 [00:00<00:02, 34810.34it/s]

0.01565


100%|██████████| 100000/100000 [00:02<00:00, 34423.92it/s]
  7%|▋         | 6744/100000 [00:00<00:02, 33930.87it/s]

0.0165


100%|██████████| 100000/100000 [00:03<00:00, 33152.01it/s]
  6%|▌         | 6166/100000 [00:00<00:03, 30499.54it/s]

0.01591


100%|██████████| 100000/100000 [00:03<00:00, 33138.01it/s]

0.01546





In [48]:
# distinct matches
def getMatchValues(inp):
    row1 = inp[:12]
    row2 = inp[12:26]
    row3 = inp[26:]

    return np.append(row1[row1 == row2[2:]], row2[2:][row2[2:] == row3[2:]])

In [49]:
inp = '0x3d84a438Af72F6396785EEa97B32F903520e36C8'
inp = np.asarray([int(inp[i:i+1],16) for i in range(2, len(inp))])

getMatchValues(inp)

array([ 3, 10,  7,  9, 14])

In [50]:
v

array([ 3, 10,  7,  9, 14])

In [51]:
len(set(v)) == len(v)

True

In [78]:
2 * 1/16 * 15/16, 2 * 1/16 * 15/16 * 15/16, 2 * 1/16 * 15/16 * 14/16, 2 * 1/16 * 15/16 * 13/16, 2 * 1/16 * 15/16 * 12/16

(0.1171875, 0.11666666666666667, 0.1025390625, 0.09521484375, 0.087890625)

In [77]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    rnd = np.random.randint(16, size=3)
    if (rnd[0] == rnd[1]) and (rnd[1] != rnd[2]) \
      and (rnd[1] != 0) and (rnd[1] != 1) \
      and (rnd[1] != 2) and (rnd[1] != 3):
        successes += 1
    if (rnd[0] != rnd[1]) and (rnd[1] == rnd[2]) \
      and (rnd[1] != 0) and (rnd[1] != 1) \
      and (rnd[1] != 2) and (rnd[1] != 3):
        successes += 1
successes / n

100%|██████████| 1000000/1000000 [00:14<00:00, 68029.81it/s]


0.087514

In [271]:
for i in range(5):
    n = int(1e5)
    res = dict.fromkeys(range(20),0)
    for _ in tqdm.tqdm(range(n)):
        rnd = np.random.randint(16, size=40)
        matches = getMatchValues(rnd)
        matchCount = len(matches)
        if len(set(matches)) == matchCount:
            res[matchCount] += 1
    print(res[5] / n)

100%|██████████| 100000/100000 [00:03<00:00, 31486.21it/s]
  3%|▎         | 2777/100000 [00:00<00:03, 27763.61it/s]

0.00361


100%|██████████| 100000/100000 [00:03<00:00, 31646.13it/s]
  6%|▌         | 6119/100000 [00:00<00:03, 30420.03it/s]

0.00367


100%|██████████| 100000/100000 [00:03<00:00, 32010.96it/s]
  6%|▌         | 6200/100000 [00:00<00:03, 30814.19it/s]

0.00363


100%|██████████| 100000/100000 [00:03<00:00, 32196.54it/s]
  6%|▋         | 6262/100000 [00:00<00:03, 31191.63it/s]

0.00371


100%|██████████| 100000/100000 [00:03<00:00, 32229.63it/s]

0.00355





In [81]:
# prob of 5 unique matching hex characters in a row

top   16/16
match 1/16

top   15/16
match 1/16

top   14/16
match 1/16

top   13/16
match 1/16

top   12/16
match 1/16


middle 16/16
match  1/16
nmatch 15/16

middle 15/16
match  1/16
nmatch 15/16

middle 14/16
match  1/16
nmatch 15/16

SyntaxError: invalid syntax (<ipython-input-81-013e096afc94>, line 3)

In [84]:
1/16, 15/(16**2), 14/(16**2), 13/(16**2), 12/(16**2)

(0.0625, 0.05859375, 0.0546875, 0.05078125, 0.046875)

In [89]:
(1/16)**5 * 15*14*13*12 / (16**4) * 729

0.0003475294215604663

In [88]:
(1/16)**5 *(15/16)**5 * (2**5) * 15*14*13*12 / (16**4) * 729

0.008053746169967013

In [227]:
# prob exact 5 unique out of 2 rows of 12

In [172]:
792 * (1/16)**5 * (15/16)**7 * 15*14*13*12 / 16**4

0.00024031903176442368

In [92]:
def generateRows():
    r1 = np.random.randint(16, size=12)
    r2 = np.random.randint(16, size=12)
    return r1, r2

In [160]:
r1, r2 = generateRows()
r1, r2, r1[r1 == r2], len(r1[r1 == r2])

(array([ 9, 11,  6,  6,  6,  7,  4,  7,  6,  5,  9,  9]),
 array([ 9, 14, 11, 10,  8,  6,  6,  7, 15,  3, 14, 11]),
 array([9, 7]),
 2)

In [251]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    r1, r2 = generateRows()
    matches = r1[r1 == r2]
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 1000000/1000000 [00:29<00:00, 34419.25it/s]


0.000272

In [181]:
# prob exact 5 unique out of 3 rows of 12

In [192]:
def generate3Rows():
    r1 = np.random.randint(16, size=12)
    r2 = np.random.randint(16, size=12)
    r3 = np.random.randint(16, size=12)
    return r1, r2, r3

In [189]:
# single middle match, all 3 match, all differ, outer match
2*1/16*15/16, 1/16*1/16, 15/16*14/16, 1/16 * 15/16

(0.1171875, 0.00390625, 0.8203125, 0.05859375)

In [190]:
0.8203125 + 0.00390625 + 0.1171875 + 0.05859375

1.0

In [246]:
792 * (0.1171875)**5 * (1-0.1171875)**7 * 15*14*13*12 / 16**4

0.0036565785553990668

In [212]:
r1, r2, r3 = generate3Rows()
matches = np.append(r1[r1 == r2], r2[r2 == r3])
r1, r2, r3, matches

(array([ 2,  2, 15,  4, 15, 12,  5, 12, 11,  0,  0, 11]),
 array([ 2,  1, 14,  6,  7,  9, 13, 10,  4,  0, 13, 15]),
 array([ 9, 14, 10,  6,  1,  4,  6,  3,  6,  6, 13, 13]),
 array([ 2,  0,  6, 13]))

In [258]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    r1, r2, r3 = generate3Rows()
    matches = np.append(r1[r1 == r2], r2[r2 == r3])
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 1000000/1000000 [00:51<00:00, 19547.31it/s]


0.003512

In [267]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    matches = []
    for i in range(12):
        triple = np.random.randint(16, size=3)
        if triple[0] == triple[1] and triple[1] != triple[2]:
            matches.append(triple[1])
        if triple[0] != triple[1] and triple[1] == triple[2]:
            matches.append(triple[1])
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 1000000/1000000 [03:07<00:00, 5336.14it/s]


0.003719