In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import requests
import time
import tqdm
from datetime import datetime
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['axes.titlesize'] = 18

Finding the probability that given a 20-byte Ethereum EOA, when you line up the letters into 3 rows of 14, there are exactly 5 matches.

Eg., 

```
0x3d84a438Af72
F6396785EEa97B
32F903520e36C8
```
we have the distribution $X \sim Bin(26, 1/16)$. So $P(X = 5) = \binom{26}{5} \left(\frac{1}{16}\right)^5 \left(\frac{15}{16}\right)^{21}.$

$$
\begin{align}
P(X=5) &= \frac{82025770389239788055419921875}{5070602400912917605986812821504} \\
&\approx 0.0161767
\end{align}
$$

In [2]:
def getMatches(inp):
    row1 = inp[:12]
    row2 = inp[12:26]
    row3 = inp[26:]

    return np.sum(row1 == row2[2:]) + np.sum(row2 == row3)

In [3]:
inp = '0x3d84a438Af72F6396785EEa97B32F903520e36C8'
inp = np.asarray([int(inp[i:i+1],16) for i in range(2, len(inp))])

getMatches(inp)

5

In [4]:
rnd = np.random.randint(16, size=40)
rnd

array([ 9,  2,  6, 10, 12, 10,  7,  8, 12,  8, 15, 10,  7,  0,  2,  2,  6,
        9,  7, 11,  8, 10,  3,  7,  1, 14, 14, 11, 15,  1,  1,  5, 15, 12,
       12,  8,  6,  7,  9,  1])

In [5]:
getMatches(rnd)

3

In [19]:
for i in range(5):
    n = int(1e5)
    res = dict.fromkeys(range(20),0)
    for _ in tqdm.tqdm(range(n)):
        rnd = np.random.randint(16, size=40)
        matches = getMatches(rnd)
        res[matches] += 1
    print(res[5] / n)

100%|██████████| 100000/100000 [00:03<00:00, 32010.84it/s]
  6%|▌         | 5813/100000 [00:00<00:03, 27672.10it/s]

0.01682


100%|██████████| 100000/100000 [00:03<00:00, 33085.18it/s]
  7%|▋         | 6933/100000 [00:00<00:02, 34810.34it/s]

0.01565


100%|██████████| 100000/100000 [00:02<00:00, 34423.92it/s]
  7%|▋         | 6744/100000 [00:00<00:02, 33930.87it/s]

0.0165


100%|██████████| 100000/100000 [00:03<00:00, 33152.01it/s]
  6%|▌         | 6166/100000 [00:00<00:03, 30499.54it/s]

0.01591


100%|██████████| 100000/100000 [00:03<00:00, 33138.01it/s]

0.01546





In [274]:
# distinct matches
def getMatchValues(inp):
    row1 = inp[:12]
    row2 = inp[12:26]
    row3 = inp[26:]

    return np.append(row1[row1 == row2[2:]], row2[row2 == row3])

In [275]:
inp = '0x3d84a438Af72F6396785EEa97B32F903520e36C8'
inp = np.asarray([int(inp[i:i+1],16) for i in range(2, len(inp))])

getMatchValues(inp)

array([ 3, 10,  7,  9, 14])

In [276]:
v

array([ 3, 10,  7,  9, 14])

In [277]:
len(set(v)) == len(v)

True

In [278]:
2 * 1/16 * 15/16, 2 * 1/16 * 15/16 * 15/16, 2 * 1/16 * 15/16 * 14/16, 2 * 1/16 * 15/16 * 13/16, 2 * 1/16 * 15/16 * 12/16

(0.1171875, 0.10986328125, 0.1025390625, 0.09521484375, 0.087890625)

In [279]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    rnd = np.random.randint(16, size=3)
    if (rnd[0] == rnd[1]) and (rnd[1] != rnd[2]) \
      and (rnd[1] != 0) and (rnd[1] != 1) \
      and (rnd[1] != 2) and (rnd[1] != 3):
        successes += 1
    if (rnd[0] != rnd[1]) and (rnd[1] == rnd[2]) \
      and (rnd[1] != 0) and (rnd[1] != 1) \
      and (rnd[1] != 2) and (rnd[1] != 3):
        successes += 1
successes / n

 24%|██▍       | 241015/1000000 [00:03<00:11, 65484.89it/s]


KeyboardInterrupt: 

In [280]:
for i in range(5):
    n = int(1e5)
    res = dict.fromkeys(range(20),0)
    for _ in tqdm.tqdm(range(n)):
        rnd = np.random.randint(16, size=40)
        matches = getMatchValues(rnd)
        matchCount = len(matches)
        if len(set(matches)) == matchCount:
            res[matchCount] += 1
    print(res[5] / n)

100%|██████████| 100000/100000 [00:03<00:00, 32165.96it/s]
  6%|▋         | 6364/100000 [00:00<00:02, 31498.93it/s]

0.00494


100%|██████████| 100000/100000 [00:03<00:00, 32270.56it/s]
  6%|▋         | 6411/100000 [00:00<00:02, 31881.42it/s]

0.00526


100%|██████████| 100000/100000 [00:03<00:00, 32822.53it/s]
  6%|▌         | 5992/100000 [00:00<00:03, 28938.05it/s]

0.00518


100%|██████████| 100000/100000 [00:03<00:00, 32516.81it/s]
  6%|▋         | 6425/100000 [00:00<00:02, 32332.53it/s]

0.00544


100%|██████████| 100000/100000 [00:03<00:00, 32728.23it/s]

0.00512





In [81]:
# prob of 5 unique matching hex characters in a row

top   16/16
match 1/16

top   15/16
match 1/16

top   14/16
match 1/16

top   13/16
match 1/16

top   12/16
match 1/16


middle 16/16
match  1/16
nmatch 15/16

middle 15/16
match  1/16
nmatch 15/16

middle 14/16
match  1/16
nmatch 15/16

SyntaxError: invalid syntax (<ipython-input-81-013e096afc94>, line 3)

In [84]:
1/16, 15/(16**2), 14/(16**2), 13/(16**2), 12/(16**2)

(0.0625, 0.05859375, 0.0546875, 0.05078125, 0.046875)

In [89]:
(1/16)**5 * 15*14*13*12 / (16**4) * 729

0.0003475294215604663

In [88]:
(1/16)**5 *(15/16)**5 * (2**5) * 15*14*13*12 / (16**4) * 729

0.008053746169967013

In [227]:
# prob exact 5 unique out of 2 rows of 12

In [272]:
792 * (1/16)**5 * (15/16)**7

0.00048075543546133304

In [172]:
792 * (1/16)**5 * (15/16)**7 * 15*14*13*12 / 16**4

0.00024031903176442368

In [92]:
def generateRows():
    r1 = np.random.randint(16, size=12)
    r2 = np.random.randint(16, size=12)
    return r1, r2

In [160]:
r1, r2 = generateRows()
r1, r2, r1[r1 == r2], len(r1[r1 == r2])

(array([ 9, 11,  6,  6,  6,  7,  4,  7,  6,  5,  9,  9]),
 array([ 9, 14, 11, 10,  8,  6,  6,  7, 15,  3, 14, 11]),
 array([9, 7]),
 2)

In [289]:
n = int(1e5)
successes = 0
for _ in tqdm.tqdm(range(n)):
    r1, r2 = generateRows()
    matches = r1[r1 == r2]
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 100000/100000 [00:03<00:00, 32046.61it/s]


0.00024

In [181]:
# prob exact 5 unique out of 3 rows of 12

In [290]:
def generate3Rows():
    r1 = np.random.randint(16, size=12)
    r2 = np.random.randint(16, size=12)
    r3 = np.random.randint(16, size=12)
    return r1, r2, r3

In [189]:
# single middle match, all 3 match, all differ, outer match
2*1/16*15/16, 1/16*1/16, 15/16*14/16, 1/16 * 15/16

(0.1171875, 0.00390625, 0.8203125, 0.05859375)

In [190]:
0.8203125 + 0.00390625 + 0.1171875 + 0.05859375

1.0

In [291]:
792 * (0.1171875)**5 * (1-0.1171875)**7

0.0073149429855504655

In [292]:
792 * (0.1171875)**5 * (1-0.1171875)**7 * 15*14*13*12 / 16**4

0.0036565785553990668

In [293]:
r1, r2, r3 = generate3Rows()
matches = np.append(r1[r1 == r2], r2[r2 == r3])
r1, r2, r3, matches

(array([ 0, 11,  7, 10,  7,  3, 14,  6, 14, 11,  6,  6]),
 array([ 6,  4, 11,  3, 11,  9, 10,  8,  8, 15,  6, 15]),
 array([ 9,  4,  9,  5, 11, 10,  5,  9,  9,  7, 12, 12]),
 array([ 6,  4, 11]))

In [392]:
res = []
for i in range(5):
    n = int(1e4)
    successes = 0
    for _ in tqdm.tqdm(range(n)):
        r1, r2, r3 = generate3Rows()
        matches = np.append(r1[r1 == r2], r2[r2 == r3])
        if len(matches) == 5 and len(set(matches)) == len(matches):
            successes += 1
    print(successes / n)
    res.append(successes / n)
res

100%|██████████| 10000/10000 [00:00<00:00, 17647.21it/s]
 38%|███▊      | 3750/10000 [00:00<00:00, 18856.29it/s]

0.0041


100%|██████████| 10000/10000 [00:00<00:00, 18597.21it/s]
 37%|███▋      | 3677/10000 [00:00<00:00, 18173.65it/s]

0.003


100%|██████████| 10000/10000 [00:00<00:00, 18417.43it/s]
 38%|███▊      | 3764/10000 [00:00<00:00, 18445.64it/s]

0.0045


100%|██████████| 10000/10000 [00:00<00:00, 18172.50it/s]
 38%|███▊      | 3778/10000 [00:00<00:00, 19037.79it/s]

0.0023


100%|██████████| 10000/10000 [00:00<00:00, 18742.11it/s]

0.0035





[0.0041, 0.003, 0.0045, 0.0023, 0.0035]

In [393]:
np.mean(res)

0.0034799999999999996

In [310]:
15/128

0.1171875

In [312]:
n = int(1e5)
successes = 0
for _ in tqdm.tqdm(range(n)):
    double1 = np.random.randint(16, size=2)
    double2 = np.random.randint(16, size=2)
    if double1[0] == double1[1] and double2[0] != double2[1]:
        successes += 1
    if double1[0] != double1[1] and double2[0] == double2[1]:
        successes += 1
#     r1, r2, r3 = generate3Rows()
#     matches = np.append(r1[r1 == r2], r2[r2 == r3])
#     if len(matches) == 5 and len(set(matches)) == len(matches):
#         successes += 1
successes / n

100%|██████████| 100000/100000 [00:02<00:00, 36141.18it/s]


0.11721

In [321]:
495 * (0.1171875)**4 * (1-0.1171875)**8 * 15*14*13*12 / 16**4

0.017216390698337275

In [323]:
n = int(1e5)
successes = 0
for _ in tqdm.tqdm(range(n)):
    r1, r2, r3 = generate3Rows()
    matches = np.append(r1[r1 == r2], r2[r2 == r3])
    if len(matches) == 4 and len(set(matches)) == len(matches) and 0 not in matches:
        successes += 1
successes / n

100%|██████████| 100000/100000 [00:05<00:00, 19208.12it/s]


0.01706

In [326]:
0.00201

0.002

In [403]:
res = []
for i in range(5):
    n = int(1e4)
    successes = 0
    for _ in tqdm.tqdm(range(n)):
        double1 = np.random.randint(16, size=2)
        double2 = np.random.randint(16, size=2)
        matches = []
        # single match in first two cols
        if double1[0] == double1[1] and double2[0] != double2[1]:
            matches.append(double1[0])
        elif double1[0] != double1[1] and double2[0] == double2[1]:
            matches.append(double2[0])
        else:
            continue

        r1, r2, r3 = generate3Rows()

        matches = np.asarray(matches)
        newmatches = np.append(r1[r1 == r2], r2[r2 == r3])
        matches = np.append(matches, newmatches)
        if len(matches) == 5 and len(set(matches)) == len(matches):
            successes += 1
    print(successes / n)
    res.append(successes / n)
res

100%|██████████| 10000/10000 [00:00<00:00, 26564.17it/s]
 57%|█████▊    | 5750/10000 [00:00<00:00, 28972.20it/s]

0.002


100%|██████████| 10000/10000 [00:00<00:00, 28107.02it/s]
 55%|█████▌    | 5508/10000 [00:00<00:00, 27888.47it/s]

0.0017


100%|██████████| 10000/10000 [00:00<00:00, 26927.70it/s]
 51%|█████     | 5116/10000 [00:00<00:00, 25593.36it/s]

0.0023


100%|██████████| 10000/10000 [00:00<00:00, 26042.76it/s]
 55%|█████▌    | 5515/10000 [00:00<00:00, 27674.21it/s]

0.0028


100%|██████████| 10000/10000 [00:00<00:00, 27747.41it/s]

0.0015





[0.002, 0.0017, 0.0023, 0.0028, 0.0015]

In [404]:
np.mean(res)

0.00206

In [334]:
15/4096

0.003662109375

In [374]:
n = int(1e5)
successes = 0
for _ in tqdm.tqdm(range(n)):
    double1 = np.random.randint(16, size=2)
    double2 = np.random.randint(16, size=2)
    if double1[0] == double1[1] and double2[0] == double2[1] and double1[0] != double2[0]:
        successes += 1
successes / n

100%|██████████| 100000/100000 [00:02<00:00, 36851.74it/s]


0.00379

In [336]:
0.000225

0.000225

In [409]:
res = []
for i in range(5):
    n = int(1e4)
    successes = 0
    for _ in tqdm.tqdm(range(n)):
        double1 = np.random.randint(16, size=2)
        double2 = np.random.randint(16, size=2)
        matches = []
        # two matches in first two cols
        if double1[0] == double1[1] and double2[0] == double2[1] and double1[0] != double2[0]:
            matches.append(double1[0])
            matches.append(double2[0])
        else:
            continue

        r1, r2, r3 = generate3Rows()
        matches = np.asarray(matches)
        newmatches = np.append(r1[r1 == r2], r2[r2 == r3])
        matches = np.append(matches, newmatches)
        if len(matches) == 5 and len(set(matches)) == len(matches):
            successes += 1
    print(successes / n)
    res.append(successes / n)
res

100%|██████████| 10000/10000 [00:00<00:00, 34267.13it/s]
 73%|███████▎  | 7276/10000 [00:00<00:00, 36567.87it/s]

0.0003


100%|██████████| 10000/10000 [00:00<00:00, 36186.37it/s]
 62%|██████▏   | 6200/10000 [00:00<00:00, 29752.58it/s]

0.0002


100%|██████████| 10000/10000 [00:00<00:00, 32656.79it/s]
 67%|██████▋   | 6745/10000 [00:00<00:00, 34192.54it/s]

0.0002


100%|██████████| 10000/10000 [00:00<00:00, 33820.70it/s]
 69%|██████▊   | 6867/10000 [00:00<00:00, 33250.07it/s]

0.0003


100%|██████████| 10000/10000 [00:00<00:00, 34581.62it/s]

0.0002





[0.0003, 0.0002, 0.0002, 0.0003, 0.0002]

In [410]:
np.mean(res)

0.00024000000000000003

In [338]:
0.005899

0.005899

In [371]:
n = int(1e5)
successes = 0
for _ in tqdm.tqdm(range(n)):
    double1 = np.random.randint(16, size=2)
    double2 = np.random.randint(16, size=2)
    matches = []
    if double1[0] == double1[1]:
        matches.append(double1[0])
    if double2[0] == double2[1]:
        matches.append(double2[0])
    
    r1, r2, r3 = generate3Rows()
    matches = np.asarray(matches)
    newmatches = np.append(r1[r1 == r2], r2[r2 == r3])
    matches = np.append(matches, newmatches)
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 100000/100000 [00:09<00:00, 10845.48it/s]


0.00533

In [267]:
n = int(1e6)
successes = 0
for _ in tqdm.tqdm(range(n)):
    matches = []
    for i in range(12):
        triple = np.random.randint(16, size=3)
        if triple[0] == triple[1] and triple[1] != triple[2]:
            matches.append(triple[1])
        if triple[0] != triple[1] and triple[1] == triple[2]:
            matches.append(triple[1])
    if len(matches) == 5 and len(set(matches)) == len(matches):
        successes += 1
successes / n

100%|██████████| 1000000/1000000 [03:07<00:00, 5336.14it/s]


0.003719