In [284]:
import json, pathlib, random
from collections import defaultdict
import numpy as np
import pandas as pd


In [2]:
with open('lists.json') as f:
    j = json.load(f)

target_list = j['target']
guess_list = j["guess"]


In [292]:
def char_freq(lst):
    hist = defaultdict(int)
    for word in lst:
        for char in word:
            hist[char] += 1
    mx = max(hist.values())
    for char in hist:
        hist[char] /= mx
    return hist

def print_char_freq(cf):
    for char in sorted(list(cf.keys())):
        print(f'{char}: {cf[char]}')
        
def freq_score(word, cf):
    return sum(cf[x]/len(word) for x in word) 

def uniq_score(word):
    return (len(word) - len(set(word))) / (len(word) - 2)

In [288]:

cf = char_freq(target_list)
print_char_freq(cf)

a: 0.7939983779399837
b: 0.2278994322789943
c: 0.38686131386861317
d: 0.31873479318734793
e: 1.0
f: 0.18653690186536903
g: 0.2522303325223033
h: 0.31549067315490675
i: 0.5442011354420113
j: 0.021897810218978103
k: 0.170316301703163
l: 0.5831305758313058
m: 0.2562854825628548
n: 0.46634225466342255
o: 0.6115166261151662
p: 0.29764801297648014
q: 0.023519870235198703
r: 0.7291159772911597
s: 0.5425790754257908
t: 0.5912408759124088
u: 0.37875101378751014
v: 0.12408759124087591
w: 0.15815085158150852
x: 0.030008110300081103
y: 0.3446877534468775
z: 0.032441200324412


In [289]:
print(random.choice(guess_list))
print(random.choice(target_list))

swede
gipsy


In [309]:
dfg = pd.DataFrame([w, freq_score(w, cf), uniq_score(w), 1] for w in guess_list)
dft = pd.DataFrame([w, freq_score(w, cf), uniq_score(w), 0] for w in target_list)
df = dfg.append(dft)
df.set_index(0, inplace=True)

In [311]:
df

Unnamed: 0_level_0,1,2,3
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aahed,0.644444,0.333333,1
aalii,0.651906,0.666667,1
aargh,0.576967,0.333333,1
aarti,0.690511,0.333333,1
abaca,0.599351,0.666667,1
...,...,...,...
judge,0.394323,0.000000,0
rower,0.645580,0.333333,0
artsy,0.600324,0.000000,0
rural,0.642822,0.333333,0


In [276]:
class Env:
    def __init__(self, target_list, target_word=None):
        if target_word:
            self.target = target_word
        else:
            self.target = random.choice(target_list)
        
    def submit_guess(self, guess):
        wrongplace = [0] * len(self.target)
        hints = []
        rightplace = [guess[n] == chrt for n,chrt in enumerate(self.target)]
        
        for n,chrt in enumerate(self.target):
            if rightplace[n] == 1: continue #this character has already been scored, skip it
            for m,chrg in enumerate(guess):
                if n == m: continue # we've already checked rightplace matches above
                if chrt != chrg: continue
                if wrongplace[m] == 1: continue
                if rightplace[m] == 1: continue
                
                wrongplace[m] = 1
                break

        for i in range(len(self.target)):
            hints.append(2 if rightplace[i] == 1 else wrongplace[i])
        
        return hints
    
def hint_to_hinty(hint):
    #hint takes form [0,1,2,1,0]
    hinty = {}
    for n in [0,1,2]:
        hinty[n] = [i for i, x in enumerate(hint) if x == n]
    return hinty
    

def validate_against_hinty(word, guess, hinty):
    #hinty takes form {2:[idx,..], 1:[idx,..], 0:[idx,..]}
    for idx in hinty[2]: # check the fixed letters first
        if word[idx] != guess[idx]:
            return False
    for idx in hinty[0]:
        #get the number of times char appears in target word (minus the times it appears in the correct location)
        indices = [i for i,x in enumerate(word) if x == guess[idx] and i not in hinty[2]]
        #get number of times char appears in guess word in the wrong location
        indices_g = [n for n,x in enumerate(guess) if x == guess[idx] and n in hinty[1]]
        #we already know that there is one not-exist hint for this char, so
        #if there are more fewer wrong location hints for this letter than there are actual occurrences of the letter
        #then the hint does not validate against this word
        if len(indices) > len(indices_g):
            return False
    for idx in hinty[1]:
        if word[idx] == guess[idx]:
            return False
        #get all the indices of the character in the target word
        indices = [i for i,x in enumerate(word) if x == guess[idx] and i not in hinty[2]]
        #remove all the indices where there is already a fixed position hint
        
        #now count all the occurences of the char in guess where the location is wrong
        indices_g = [i for i,x in enumerate(guess) if x == guess[idx] and i in hinty[1]]
        #if there are more wrong loc hints for this char than there are actual occurrences, then it must be bogus
        if len(indices) < len(indices_g):
            return False
    return True            
    

In [278]:
e_simple = Env(target_list, target_word='abcde')
tests_simple = {'abcde': [2,2,2,2,2],
         'acbde': [2,1,1,2,2],
         'azcde': [2,0,2,2,2],
         'aacde': [2,0,2,2,2],
         'zacde': [0,1,2,2,2],
         'zzdzz': [0,0,1,0,0],
         'zzddz': [0,0,0,2,0],
         'zdddz': [0,0,0,2,0],
         'ddddd': [0,0,0,2,0],
         'zzzdd': [0,0,0,2,0],
         'zzdez': [0,0,1,1,0]}

e_repeat = Env(target_list, target_word='abcae')
tests_repeat = {'abcde': [2,2,2,0,2],
         'acbde': [2,1,1,0,2],
         'azcde': [2,0,2,0,2],
         'aacde': [2,1,2,0,2],
         'zacde': [0,1,2,0,2],
         'zzdzz': [0,0,0,0,0],
         'zzddz': [0,0,0,0,0],
         'zdddz': [0,0,0,0,0],
         'ddddd': [0,0,0,0,0],
         'zzzdd': [0,0,0,0,0],
         'zzdez': [0,0,0,1,0],
         'aaaaa': [2,0,0,2,0],
         'aaaza': [2,1,0,0,0],
         'zaazz': [0,1,1,0,0],
         'zaaza': [0,1,1,0,0]}

for e,tests in [(e_simple, tests_simple),(e_repeat, tests_repeat)]:
    for guess,expected in tests.items():
        #guess = random.choice(guess_list + target_list)
        actual = e.submit_guess(guess)
        hinty = hint_to_hinty(expected)
        hinty_valid = validate_against_hinty(e.target, guess, hinty)
        print(e.target, guess, actual, expected, expected == actual, hinty_valid)

abcde abcde [2, 2, 2, 2, 2] [2, 2, 2, 2, 2] True True
abcde acbde [2, 1, 1, 2, 2] [2, 1, 1, 2, 2] True True
abcde azcde [2, 0, 2, 2, 2] [2, 0, 2, 2, 2] True True
abcde aacde [2, 0, 2, 2, 2] [2, 0, 2, 2, 2] True True
abcde zacde [0, 1, 2, 2, 2] [0, 1, 2, 2, 2] True True
abcde zzdzz [0, 0, 1, 0, 0] [0, 0, 1, 0, 0] True True
abcde zzddz [0, 0, 0, 2, 0] [0, 0, 0, 2, 0] True True
abcde zdddz [0, 0, 0, 2, 0] [0, 0, 0, 2, 0] True True
abcde ddddd [0, 0, 0, 2, 0] [0, 0, 0, 2, 0] True True
abcde zzzdd [0, 0, 0, 2, 0] [0, 0, 0, 2, 0] True True
abcde zzdez [0, 0, 1, 1, 0] [0, 0, 1, 1, 0] True True
abcae abcde [2, 2, 2, 0, 2] [2, 2, 2, 0, 2] True True
abcae acbde [2, 1, 1, 0, 2] [2, 1, 1, 0, 2] True True
abcae azcde [2, 0, 2, 0, 2] [2, 0, 2, 0, 2] True True
abcae aacde [2, 1, 2, 0, 2] [2, 1, 2, 0, 2] True True
abcae zacde [0, 1, 2, 0, 2] [0, 1, 2, 0, 2] True True
abcae zzdzz [0, 0, 0, 0, 0] [0, 0, 0, 0, 0] True True
abcae zzddz [0, 0, 0, 0, 0] [0, 0, 0, 0, 0] True True
abcae zdddz [0, 0, 0, 0, 0] 

In [112]:
def random_guess(guess_list, target_list):
    guess_idx = random.randint(0, len(guess_list) + len(target_list))
    is_guess = guess_idx < len(guess_list)
    if is_guess:
        word = guess_list[guess_idx]
    else:
        word = target_list[guess_idx - len(guess_list)]
    return word, is_guess

In [149]:
def history_to_state(history):
    #the state will be a 26 * 5 array of the probability of a certain character being in a certain position
    placing = [[0] * 26] * len(list(history.keys())[0])
        
    for guess,hints in history.items():
        pos = 0
        anywhere_counts = {}
        located_counts = {}
        for c in set(guess):
            
        for chrg,hint in zip(list(guess), hints):
            
            idx_chrg = ord(chrg) - 97
            if hint == 0:
                if present[idx_chrg] == 0:
                    present[idx_chrg] = -1
                for pos2 in range(len(guess)):
                    if placing[idx2][idx_chrg] == 0:
                       placing[idx2][idx_chrg] = -1
            else:
                present[idx_chrg] = 1
                if hint == 2:
                    placing[pos][idx_chrg] = 1
                else:
                    placing[pos][idx_chrg] = -1
        pos += 1
    
    return placing + present

In [150]:
def construct_state_array(chr_value):
    ret = [0] * 26
    for chrg, value in chr_value.items():
        ret[ord(chrg) - 97] = value
    return ret

history = {'abcde': [0,1,2,1,0]}
state = [construct_state_array(cv) for cv in [{'a':-1, 'e':-1}, {'a':-1, 'b':-1}]]

NameError: name 'chr_values' is not defined

In [151]:
num_episodes = 6
e = Env(target_list)
print(e.target)
history = {}
for i in range(num_episodes):
    guess, is_guess_list = random_guess(guess_list, target_list)
    hints = e.submit_guess(guess)
    history[guess] = hints
    print(guess, is_guess_list, hints)

history_to_state(history)

wound
ethne True [0, 0, 0, 2, 0]
kohls True [0, 2, 0, 0, 0]
mudir True [0, 1, 1, 0, 0]
sagas True [0, 0, 0, 0, 0]
dowps True [1, 2, 1, 0, 0]
joram True [0, 2, 0, 0, 0]
{'n', 'e', 'd', 'g', 't', 'r', 'w', 'o', 'j', 'i', 'k', 'h', 'a', 'm', 's', 'u', 'p', 'l'}


NameError: name 'present' is not defined