## Find the sets of 5 words with no char in common

In [1]:
import os
import sys
import requests
import numpy as np
from collections import defaultdict, Counter
from string import ascii_lowercase
from typing import List, Set, Dict
char_map = {c:i for i,c in enumerate(ascii_lowercase)}

In [2]:
def get_words():
    # https://gist.github.com/subhrm/5362767af06597bd1e216c59b760f6cb
    url="https://gist.githubusercontent.com/subhrm/5362767af06597bd1e216c59b760f6cb/raw/6bfa15d263d6d5b63840a8e5b64e04b382fdb079/valid-wordle-words.txt"
    resp = requests.get(url)
    print(f"{resp.status_code=}")
    word_list = [w for w in resp.text.split("\n") if w]
    print(f"{len(word_list)=}")
    for word in word_list:
        assert len(word) == 5, f"{word} is not a 5 letter word"
    return word_list

word_list = get_words()

resp.status_code=200
len(word_list)=14855


In [3]:
def word_to_num(word):
    return sum( (1<<char_map[c]) for c in word)

# test
assert word_to_num("abc") == word_to_num("bac")


In [4]:
word_map = defaultdict(list)
for w in word_list:
    if len(set(w)) == 5:
        # only keep words with 5 unique characters
        num = word_to_num(w)
        word_map[num].append(w)

print(len(word_map))

5650


In [5]:
unique_list = sorted(word_map.keys())
n = len(unique_list)

In [None]:

two_set = []
ex_map = defaultdict(list)
for i,a in enumerate(unique_list):
    for j in range(i+1,n):
        b =  unique_list[j]
        if (a & b) == 0:
            two_set.append((i,j))
            ex_map[i].append(j)

print(f"{len(two_set)=:,}  {len(ex_map)=}")
i,j = two_set[0]
a,b = unique_list[i], unique_list[j]
print(f"{i=} {j=} {a=} {word_map[a]=} {b=} {word_map[b]=} {ex_map[i]=}")

len(two_set)=2,838,433  len(ex_map)=5418
i=0 j=203 a=61 word_map[a]=['decaf', 'faced'] b=10562 word_map[b]=['bling'] ex_map[i]=[203, 214, 299, 337, 345, 359, 361, 393, 398, 401, 404, 409, 411, 429, 431, 445, 491, 513, 515, 535, 555, 569, 571, 574, 575, 576, 720, 731, 738, 744, 768, 808, 812, 813, 825, 830, 836, 852, 856, 857, 866, 905, 907, 918, 924, 925, 928, 929, 930, 1042, 1048, 1057, 1061, 1102, 1107, 1109, 1144, 1151, 1154, 1165, 1170, 1171, 1172, 1181, 1184, 1186, 1194, 1197, 1199, 1200, 1227, 1232, 1236, 1237, 1252, 1255, 1272, 1283, 1291, 1295, 1296, 1311, 1314, 1319, 1323, 1324, 1325, 1339, 1347, 1349, 1354, 1356, 1357, 1358, 1360, 1363, 1364, 1365, 1369, 1370, 1373, 1374, 1375, 1376, 1411, 1415, 1416, 1419, 1428, 1430, 1432, 1440, 1443, 1445, 1464, 1470, 1471, 1472, 1478, 1479, 1480, 1481, 1486, 1526, 1532, 1545, 1550, 1560, 1565, 1568, 1583, 1589, 1592, 1606, 1611, 1614, 1616, 1632, 1640, 1644, 1645, 1646, 1648, 1653, 1654, 1655, 1660, 1661, 1663, 1668, 1669, 1670, 1671, 167

In [None]:
multi_set = [s for s in two_set]
for size in range(3,6):
    print(f"Computing sets of size {size}")
    new_multi_set = []
    for s in multi_set:
        candidates = ex_map[s[-1]]
        for x in candidates:
            all_ok = True
            for y in s[:-1]:
                if (unique_list[x] & unique_list[y]) > 0:
                    all_ok = False
                    break
            if all_ok:
                new_multi_set.append(s + (x,))
    multi_set = new_multi_set
    print(f"Done {size}. Number of sets = {len(multi_set)} ")

print(multi_set)

Computing sets of size 3
Done 3. Number of sets = 78359800 
Computing sets of size 4
Done 4. Number of sets = 18688861 
Computing sets of size 5
Done 5. Number of sets = 23 
[(203, 2010, 3978, 5120, 5604), (268, 2692, 3978, 5404, 5544), (513, 2010, 3978, 5077, 5604), (634, 2516, 3978, 5404, 5544), (638, 1835, 3978, 5120, 5604), (730, 1945, 3978, 5077, 5604), (782, 2443, 4298, 4840, 5615), (782, 2619, 3902, 5404, 5544), (782, 2859, 4298, 4554, 5615), (786, 2334, 3978, 5404, 5546), (815, 2341, 3978, 5404, 5544), (815, 3430, 3978, 5404, 5598), (815, 3434, 3978, 5404, 5598), (888, 1835, 3978, 5077, 5604), (1004, 4107, 4194, 5250, 5560), (1835, 2668, 3978, 4545, 5604), (1948, 2714, 3978, 5395, 5604), (2460, 3978, 4155, 5001, 5604), (2461, 3978, 4154, 5001, 5604), (2668, 3473, 3978, 5404, 5546), (2668, 3496, 3978, 5404, 5544), (2668, 3627, 3978, 5404, 5459), (3045, 3712, 4194, 5250, 5560)]


In [7]:
samples = [(203, 2010, 3978, 5120, 5604), (268, 2692, 3978, 5404, 5544), (513, 2010, 3978, 5077, 5604)]
for s in samples:
    print("|".join(word_map[unique_list[i]][0] for i in s))

bling|treck|waqfs|jumpy|vozhd
joked|crumb|waqfs|phynx|glitz
pling|treck|waqfs|jumby|vozhd
