In [1]:
from math import isclose
from itertools import product
import numpy as np
from numba import njit
from tqdm import tqdm
import multiprocessing as mp
import pickle

In [64]:
def load_words(path="wordlist.txt"):
    with open(path, "r") as f:
        s = f.read()
    return s.split("\n")

def match(w1, w2):
    temp = ""
    ret = ""
    yellows = ""
    for i in range(5):
        char = w1[i]
        if char == w2[i]:
            temp += "G"
            yellows += char
        else:
            temp += "B"
    for i in range(5):
        char = w1[i]
        if temp[i] == "B" and w2.count(char) > yellows.count(char):
            ret += "Y"
            yellows += char
        else:
            ret += temp[i]
    return ret

def new_match(w1, w2):
    temp = []
    ret = []
    yellows = []
    for i in range(5):
        char = w1[i]
        if char == w2[i]:
            temp.append(0)
            yellows.append(char)
        else:
            temp.append(1)
    for i in range(5):
        char = w1[i]
        if temp[i] == 1 and w2.count(char) > yellows.count(char):
            ret.append(2)
            yellows.append(char)
        else:
            ret.append(temp[i])
    return ret

In [31]:
class start(object):
    
    def __init__(self, word: str, entropy:bool = False):
        
        self.word = word
        self.entropy = entropy
        self.scores = {}
        for i in range(1, 7):
            self.scores[i] = []
        
    def add_score(self, score: int, soln: str = None):
        self.scores[score].append(soln)
        
    def average(self):
        numer = 0
        denom = 0
        for i in range(1, 7):
            numer += i * len(self.scores[i])
            denom += len(self.scores[i])    
        return numer / denom
    
    def __float__(self):
        return self.average()
    
    def __str__(self):
        return self.word

In [32]:
def basic_guess(guess, target):
    
    full_words = load_words("guesses.txt")
    words = load_words("possible_words.txt")

    acc = 0
    
    while True:
        
        acc += 1
        truth = match(guess, target).upper()

        if truth == "GGGGG":
            break

        new_words = []
        yellows = ""
        for i in range(5):
            if truth[i] != "B":
                yellows += guess[i]

        for w in words:
            flag = True
            for i in range(5):
                char = guess[i]
                if truth[i] == "G" and w[i] != char:
                    flag = False
                    break
                elif truth[i] == "B" and (
                    w[i] == char or w.count(char) > yellows.count(char)
                ):
                    flag = False
                    break
                elif truth[i] == "Y" and (
                    w[i] == char or yellows.count(char) > w.count(char)
                ):
                    flag = False
                    break
            if flag:
                new_words.append(w)

        words = new_words
        guess = None
        avg_score = float("inf")

        if len(words) == 1:
            guess = words[0]
            continue

        for chance in full_words:
            groups = {}
            for answer in words:
                matching = match(chance, answer)
                if matching not in groups:
                    groups[matching] = 0
                groups[matching] += 1
            a = 0
            for i in groups.values():
                a += i ** 2
            if len(groups) == 0:
                guess = chance
                break
            a /= len(groups)
            if a < avg_score:
                avg_score = a
                guess = chance
            elif isclose(a, avg_score) and "GGGGG" in groups:
                guess = chance

    return acc

In [33]:
def info_guess(guess, target):

    full_words = load_words("guesses.txt")
    words = load_words("possible_words.txt")
    
    base_dict = {}
    acc = 0
    for x in product("GBY", repeat=5):
        base_dict["".join(x)] = acc
        acc += 1
        
    acc = 0

    while True:

        print(guess)
        
        acc += 1
        truth = match(guess, target).upper()

        if truth == "GGGGG":
            break

        new_words = []
        yellows = ""
        for i in range(5):
            if truth[i] != "B":
                yellows += guess[i]

        for w in words:
            flag = True
            for i in range(5):
                char = guess[i]
                if truth[i] == "G" and w[i] != char:
                    flag = False
                    break
                elif truth[i] == "B" and (
                    w[i] == char or w.count(char) > yellows.count(char)
                ):
                    flag = False
                    break
                elif truth[i] == "Y" and (
                    w[i] == char or yellows.count(char) > w.count(char)
                ):
                    flag = False
                    break
            if flag:
                new_words.append(w)

        words = new_words
        guess = None
        avg_score = 0

        if len(words) == 1:
            guess = words[0]
            continue
        
        for chance in full_words:
            groups = np.zeros(243)
            for answer in words:
                matching = match(chance, answer)
                groups[base_dict[matching]] += 1
            
            
            groups = groups[groups != 0] / np.sum(groups)
            a = -np.dot(groups, np.log2(groups))
            
            if a > avg_score:
                avg_score = a
                guess = chance
            elif isclose(a, avg_score) and chance in words:
                guess = chance

    return acc

In [34]:
def add_word(word: start):
    
    if word.entropy:
        file = "info.db"
    else: file = "base.db"
    
    try:
        with open(file, "rb") as f:
            database = pickle.load(f)
    except FileNotFoundError:
        database = {}
        
    database[str(word)] = word
    with open(file, "wb") as f:
        pickle.dump(database, f)

In [35]:
def test_starting_guess(guess, entropy):
    
    word = start(guess)
    if entropy:
        alg = info_guess
    else: alg = basic_guess
        
    words = load_words("possible_words.txt")
    for target in words:
        score = alg(guess, target)
        word.add_score(score, target)
    
    return word

In [8]:
def test_all(entropy=False):
    
    full_words = load_words("guesses.txt")
    average = float("inf")
    word = None
    
    for guess in tqdm(full_words):
        w = test_starting_guess(guess, entropy)
        add_word(w)
        if w.average() < average:
            average = w.average()
            word = w
            print("new best start:", str(w), average)
    
    return word

In [9]:
test_all()

  0%|                                                                                        | 0/12972 [42:51<?, ?it/s]


KeyboardInterrupt: 

In [70]:
from time import time
from random import shuffle
def test_match(alg, trials=150):
    
    full_words = load_words("guesses.txt")
    for i in range(len(full_words)):
        full_words[i] = [k for k in map(ord, full_words[i])]
    
    alg(full_words[0], full_words[-1])
    
    start = time()
    for _ in range(trials):
        shuffle(full_words)
        for i in range(0, len(full_words), 2):
            pass
            #alg(full_words[i], full_words[i+1])
            
    return time() - start

In [71]:
test_match(new_match)

# basic match: 6.81s
# njit match: 24.84s :(
# new match: 4.59s
# njit new match: 9.79s :(

1.9953722953796387