In [1]:
import numpy as np
from collections import Counter, defaultdict

vocab = 'anpa ante awen esun insa jaki jelo kala kama kasi kili kule kute lape laso lawa lete lili lipu loje luka lupa mama mani meli mije moku moli musi mute nasa nena nimi noka olin open pali pana pini pipi poka poki pona sama seli selo seme sewi sike sina sona suli suno supa suwi taso tawa telo toki tomo unpa walo waso wawa weka wile'.split()
print(len(vocab))
chars = list(set([char for word in vocab for char in word]))
chars.sort()
print(len(chars))

def get_hint(ans, cand):
    show = [2, 2, 2, 2] # 0: green, correct(C), 1: yellow, wrong(W), 2: no (X)
    
    # 2 -> 0
    for i in range(4):
        if ans[i] == cand[i]:
            show[i] = 0 # correct
    
    # 2 -> 1
    # candidate characters for yellow character
    rest = list(set([cand[i] for i in range(4) if show[i] == 2]))
    for r in rest: 
        if any(r == a for i, a in enumerate(ans) if show[i] in {1, 2}):
            for i in range(4): # search index to yellow
                if cand[i] == r and show[i] == 2:
                    show[i] = 1
                    break
    hint = ''.join(['0', '1', '2'][n] for n in show)
    return hint

print(get_hint('ante', 'wawa')) # 正解 ante に対して，候補 wawa を入力した結果 -> XWXX
print(get_hint('anpa', 'sina')) # 正解 anpa に対して，候補 sina を入力した結果 -> XXWC
print(get_hint('sina', 'anpa')) # 正解 sina に対して，候補 anpa を入力した結果 -> XWXC
print(get_hint('nena', 'anpa')) # 正解 nena に対して，候補 anpa を入力した結果 -> XWXC
print(get_hint('nasa', 'anpa')) # 正解 nasa に対して，候補 anpa を入力した結果 -> WWXC

def filter_words(words, hint, cand):
    words = [x for x in words if x != cand]
    
    # Wの定義を，厳密に「ここではなく他のところにある」にしてjを書き換え
    yellow_chars = ''
    for i in range(4):
        if hint[i] in 'CW':
            yellow_chars += cand[i] # 少なからずあたっている
    for i in range(4):
        if (cand[i] in yellow_chars) and (hint[i] == 'X'):
            hint = hint[:i] + 'W' + hint[i+1:]
    
    for i in range(4):
        if hint[i] == 'C':
            words = [word for word in words if word[i] == cand[i]]
        elif hint[i] == 'W':
            words = [word for word in words if cand[i] in word]
            words = [word for word in words if word[i] != cand[i]]
        elif hint[i] == 'X':
            words = [word for word in words if cand[i] not in word]
            
    return words

print(filter_words(vocab, 'XWXX', 'wawa')) # 初手wawaに対する回答がXWXXだったとき，次に候補となる単語のリスト
print(filter_words(['kama', 'kasi'], 'CCXX', 'kama'))

def split_cand(words, cand):
    dct = defaultdict(list)
    for word in words:
        dct[get_hint(word, cand)].append(word)
    return dict(dct)

def count_cand(words, cand):
    dct = split_cand(words, cand)
    dct = {key: len(value) for key, value in dct.items()}
    return dct

def calc_ent(words, cand):
    dct = count_cand(words, cand)
    ent = 0
    denom = sum(dct.values())
    for i in dct.values():
        p = i / len(words)
        ent -= p * np.log2(p)
    return ent

66
14
2122
2210
2120
2120
1120
['ante']
['kasi']


In [2]:
# 1語だけでのエントロピーが大きくなるような語を列挙する
ents = [(word, calc_ent(vocab, word)) for word in vocab]
ents.sort(key = lambda x: -x[1])
ents[:20]

[('laso', 4.441508514179351),
 ('seli', 4.3505994232702605),
 ('selo', 4.345318884700614),
 ('sina', 4.263741945530095),
 ('lape', 4.219632176743091),
 ('pali', 4.21794961263142),
 ('sona', 4.209357788586154),
 ('sike', 4.20140083556324),
 ('meli', 4.164487015692192),
 ('kasi', 4.158572477426081),
 ('walo', 4.148402787392285),
 ('suli', 4.144784136520656),
 ('mani', 4.120924946057055),
 ('moli', 4.099779766428733),
 ('luka', 4.09145695284965),
 ('pona', 4.061153922546621),
 ('wile', 4.053092271400567),
 ('poka', 4.048839620832478),
 ('noka', 4.027827167076222),
 ('waso', 3.9943907110834886)]

In [3]:
def depth_tree(tree):
    if tree == None:
        return 0
    else:
        lst = []
        for key, value in tree.items():
            lst.append(depth_tree(value[1]) + 1)
        return max(lst)
    
def avg_tree(tree):
    if tree == None:
        return 0
    else:
        lst = []
        for key, value in tree.items():
            lst.append(depth_tree(value[1]) + 1)
        return np.mean(lst)

def make_tree(words, word):
    dct = {}
    for key, value in split_cand(words, word).items():
        if key == '0000':
            dct[key] = (word, None)
        elif len(value) == 0:
            assert False
        elif len(value) == 1:
            dct[key] = (value[0], make_tree(value, value[0]))
        elif len(value) == 2:
            dct[key] = (value[0], make_tree(value, value[0]))
        else:
            ents = [(word, calc_ent(value, word)) for word in vocab]
            max_ent = max([ent for _, ent in ents])
            bests = [word for word, ent in ents if ent == max_ent]
            if any(best in value for best in bests):
                best = [best for best in bests if best in value][0]
            else:
                best = bests[0]
            dct[key] = (best, make_tree(value, best))
    return dct

make_tree(vocab, 'walo')

{'2122': ('supa',
  {'2200': ('anpa', {'0000': ('anpa', None)}),
   '2221': ('ante', {'0000': ('ante', None)}),
   '1220': ('insa', {'0000': ('insa', None)}),
   '2220': ('nena', {'0000': ('nena', None)}),
   '0220': ('sina', {'0000': ('sina', None)}),
   '0000': ('supa', None),
   '2100': ('unpa', {'0000': ('unpa', None)})}),
 '1122': ('awen', {'0000': ('awen', None)}),
 '2222': ('nimi',
  {'1222': ('esun', {'0000': ('esun', None)}),
   '2222': ('kute', {'0000': ('kute', None)}),
   '2012': ('mije', {'0000': ('mije', None)}),
   '2210': ('musi', {'0000': ('musi', None)}),
   '2212': ('mute', {'0000': ('mute', None)}),
   '0000': ('nimi', None),
   '1020': ('pini', {'0000': ('pini', None)}),
   '2020': ('pipi', {'0000': ('pipi', None)}),
   '2202': ('seme', {'0000': ('seme', None)}),
   '2022': ('sike', {'0000': ('sike', None)})}),
 '2022': ('musi',
  {'2220': ('jaki', {'0000': ('jaki', None)}),
   '1222': ('kama', {'0000': ('kama', None)}),
   '2200': ('kasi', {'0000': ('kasi', None)}

In [4]:
lst = [(word, avg_tree(make_tree(vocab, word))) for word in vocab]
lst.sort(key = lambda x : x[1])
for w, a in lst[:20]:
    print(w, a)

selo 2.413793103448276
sewi 2.44
mute 2.45
kule 2.4583333333333335
lipu 2.4583333333333335
luka 2.4615384615384617
kute 2.473684210526316
jelo 2.4761904761904763
lupa 2.4782608695652173
walo 2.48
pona 2.4814814814814814
sina 2.4814814814814814
sona 2.4814814814814814
suli 2.4814814814814814
seli 2.4827586206896552
anpa 2.5
suno 2.5
poka 2.52
wile 2.52
pana 2.5217391304347827


In [5]:
lst = [(word, depth_tree(make_tree(vocab, word))) for word in vocab]
lst.sort(key = lambda x : x[1])
for w, a in lst[:20]:
    print(w, a)

walo 3
anpa 4
ante 4
awen 4
esun 4
insa 4
jaki 4
jelo 4
kala 4
kama 4
kasi 4
kili 4
kule 4
kute 4
lape 4
laso 4
lawa 4
lete 4
lili 4
lipu 4


トキポナwordle

最長手数が短い語: walo (3)

平均手数が短い語: selo (2.41)

In [6]:
make_tree(vocab, 'selo')

{'2222': ('mani',
  {'2112': ('anpa',
    {'0000': ('anpa', None), '2000': ('unpa', {'0000': ('unpa', None)})}),
   '2020': ('jaki', {'0000': ('jaki', None)}),
   '1022': ('kama', {'0000': ('kama', None)}),
   '0022': ('mama', {'0000': ('mama', None)}),
   '0000': ('mani', None),
   '1210': ('nimi', {'0000': ('nimi', None)}),
   '2002': ('pana', {'0000': ('pana', None)}),
   '2200': ('pini', {'0000': ('pini', None)}),
   '2220': ('pipi', {'0000': ('pipi', None)}),
   '2022': ('tawa',
    {'0000': ('tawa', None), '2000': ('wawa', {'0000': ('wawa', None)})})}),
 '2122': ('kute',
  {'2200': ('ante', {'0000': ('ante', None)}),
   '2221': ('awen', {'0000': ('awen', None)}),
   '0000': ('kute', None),
   '2220': ('mije', {'0000': ('mije', None)}),
   '2000': ('mute', {'0000': ('mute', None)})}),
 '1122': ('esun', {'0000': ('esun', None)}),
 '1222': ('insa',
  {'0000': ('insa', None),
   '1201': ('kasi', {'0000': ('kasi', None)}),
   '1202': ('musi', {'0000': ('musi', None)}),
   '2100': ('na