# Setup and functions

In [518]:
## Import libraries
import random
import time
import os
import operator
from math import log, e
import timeit
import string
from itertools import permutations, product
import pandas as pd
import numpy as np
from scipy.stats import entropy

In [287]:
## Get list of all English 5-letter words
words = open(os.getcwd()+'/wordle/scrabble_words.txt').read().split()
wordslist = [x.upper() for x in words if len(x)==5]
solutions = open(os.getcwd()+'/wordle/solutions.txt').read().split()
solutionslist = [x.upper() for x in solutions if len(x)==5]

In [288]:
def getExactCharacters(guess, sol):
    out=''
    for x, y in zip(guess, sol):
        if x == y:
            out += 'G'
            sol = sol.replace(x, '', 1)
        else:
            out += '-'
    return out

In [289]:
def getMatchingCharacters(guess, sol):
    exact = getExactCharacters(guess, sol)
    out=''
    for x in range(len(guess)):
        if exact[x]=='G':
            out += 'G'
        elif guess[x] in sol:
            out += 'A'
            sol = sol.replace(guess[x], '', 1)
        else:
            out += '-'
    return out

# Automated - V1
- Uses letter frequency in all english words to define priority
- Uses all past info at all times (ie. Green and Amber letters are always kept in)

In [381]:
def sortAndList(count_dict):
    count_dict = dict(sorted(count_dict.items(), key=operator.itemgetter(1),reverse=True))
    sorted_list = list(count_dict.keys())
    return sorted_list

def getOrder(options,n):
    k = range(len(options))
    options_list = []

    for i in permutations(k,n):
        options_list.append(list(i))

    options_order = sorted(options_list,key=sum)
    
    return options_order

def containsLetter(letters, str):
    all(i in str for i in letters)


def tryAnagrams(next_guess, contains, word_poss, best_options, options_order):
    for attempt in options_order:
        guess = next_guess
        for j in attempt:
            guess = guess.replace('-', best_options[j],1)
        
        if (guess in wordslist) & all(guess[i] in word_poss[i] for i in range(5)):
            return guess
            break
        
                                        

def nextBestWord(prev_guess, result, does_not_contain): # Need to consider past guesses - 
                                                        # exclude all letters not taken forwards
    
    word_poss = [string.ascii_uppercase]*5 # possible chars for each position
    contains = []
    next_guess = list('-----')
    
    
    count_dict, multi_count_dict = count_dict_base, multi_count_dict_base
    
    for x in range(len(prev_guess)):
        l = prev_guess[x]
        # If exactly right set 'word_poss' accordingly
        if result[x]=='G':
            next_guess[x] = l
            word_poss[x] = l
            count_dict[l] = multi_count_dict[l]
            if l not in contains:
                contains.append(l)
            
        # If wrong place remove from 'word_poss' for that char and add to 'contains'
        elif result[x]=='A':
            if l not in contains:
                contains.append(l)
            word_poss[x] = word_poss[x].replace(l,'')
            count_dict[l] = multi_count_dict[l]
            
        else:
            if l not in contains:
                does_not_contain.append(l)
            word_poss[x] = word_poss[x].replace(l,'')

    # Find number of unknown letters and order by 
    remainder = next_guess.count('-')
    best_options = sortAndList(count_dict)
    best_options = [x for x in best_options if x not in does_not_contain]
    best_options = contains + best_options
    
    options_order = getOrder(best_options, remainder)
    
    next_guess = tryAnagrams(''.join(next_guess), contains, word_poss, best_options, options_order)
    
    return next_guess, does_not_contain
    

In [None]:
start_time = time.time()

# Work out frequency of letters to set priority order
count_dict_base = {'A':0, 'B':0, 'C':0, 'D':0, 'E':0, 'F':0, 'G':0, 'H':0, 'I':0, 'J':0, 'K':0, 'L':0, 'M':0, 
                                'N':0, 'O':0, 'P':0, 'Q':0, 'R':0, 'S':0, 'T':0, 'U':0, 'V':0, 'W':0, 'X':0, 'Y':0, 'Z':0}
multi_count_dict_base = {'A':0, 'B':0, 'C':0, 'D':0, 'E':0, 'F':0, 'G':0, 'H':0, 'I':0, 'J':0, 'K':0, 'L':0, 'M':0, 
                                'N':0, 'O':0, 'P':0, 'Q':0, 'R':0, 'S':0, 'T':0, 'U':0, 'V':0, 'W':0, 'X':0, 'Y':0, 'Z':0}

for word in wordslist:
    for i in word:
        count_dict_base[i] += 1
        if (word.count(i))>1:
            multi_count_dict_base[i] += (1/word.count(i))
            
multi_count_dict_base = {k: v/count_dict_base[k] for k,v in multi_count_dict_base.items()}
count_dict_base = {k: v/len(wordslist) for k,v in count_dict_base.items()}

# Repeatedly run game and store solution data
stats = pd.DataFrame(columns=['Number of Guesses'])

# Pick all possible solutions
for solution in random.sample(solutionslist, 100):
    # Set base variables
    guess, result, dnc = 'AROSE', '', []

    # Try AROSE (best starter by this method)
    result = getMatchingCharacters(guess, solution)
    if result=='GGGGG':
            i=1
            print('Completed in '+str(i)+'/6 attempts!')
            break
    
    # Loop through attempts
    for i in range(2,7):
        guess, dnc = nextBestWord(guess, result, dnc)
        if guess==None:
            print('Failed - the solution was: '+solution)
            i='7'
            break
        
        result = getMatchingCharacters(guess, solution)
        
        if result=='GGGGG':
            print('Completed in '+str(i)+'/6 attempts!')
            break

        elif i==6:
            print('Failed - the solution was: '+solution)
            i='F'
        
    stats.loc[solution] = i

fin_time = time.time()

print("--- %s seconds ---" % (fin_time - start_time))
print("--- %s minutes ---" % str((fin_time - start_time)/60))
print("Avg time per word: "+ str((fin_time - start_time)/stats.shape[0]))

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
stats

# Automated - V2
- Uses a few words to cover all the most common letters
- Then works out the remaining anagram by passing through words by commonality

In [388]:
def returnKnown(hc_guesses, results): # Need to consider past guesses - exclude all letters not taken forwards
    
    contains = []
    word_poss = [string.ascii_uppercase]*5
    next_guess = list('-----')
    does_not_contain = []
    
    for g in range(len(hc_guesses)):
        guess = hc_guesses[g]
        result = results[g]
        for x in range(len(guess)):
            l = guess[x]
            # If exactly right set 'word_poss' accordingly
            if result[x]=='G':
                next_guess[x] = l
                word_poss[x] = l
                if l not in contains:
                    contains.append(l)

            # If wrong place remove from 'word_poss' for that char and add to 'contains'
            elif result[x]=='A':
                if l not in contains:
                    contains.append(l)
                word_poss[x] = word_poss[x].replace(l,'')

            else:
                if l not in contains:
                    does_not_contain.append(l)
                word_poss[x] = word_poss[x].replace(l,'')

    return next_guess, contains, does_not_contain, word_poss
   
def pickWord(next_guess, contains, does_not_contain, word_poss):
    #print(next_guess)
    #print(contains)
    #print(does_not_contain)
    #print(word_poss)
    
    for word in wordslist: #['ARECA','GRACE', 'OTTER']:#word in wordslist:
        #print(word)
        if all(c in word for c in contains):
            #print('con')
            if all(dnc not in word for dnc in does_not_contain):
                #print('dnc, con')
                if all(next_guess[x] in ['-', word[x]] for x in range(len(word))) & all(list(word)[x] in word_poss[x] for x in range(len(word))):
                    #print('pass')
                    return word
                    break


In [None]:
start_time = time.time()

# Repeatedly run game and store solution data
stats2 = pd.DataFrame(columns=['Number of Guesses'])

# Pick all possible solutions
for solution in random.sample(solutionslist, 1000): #['GRACE']:

    # Force several starting words
    hc_guesses = ['CHALK', 'BRING', 'STONE', 'DUMPY']
    results = ['-----', '-----', '-----','-----']
    
    for i in range(len(hc_guesses)):
        results[i] = getMatchingCharacters(hc_guesses[i], solution)
        
        if results[i]=='GGGGG':
            print('Completed in '+str(i+1)+'/6 attempts!')
            stats2.loc[solution] = i+1
            break
    
    if results[-1]!='GGGGG':
        for i in range(4,7):
            if i==6:
                print('Failed - the solution was: '+solution)
                stats2.loc[solution] = 'F'
                break
            
            next_guess, contains, does_not_contain, word_poss = returnKnown(hc_guesses, results)
            hc_guesses.append(pickWord(next_guess, contains, does_not_contain, word_poss))
            results.append(getMatchingCharacters(hc_guesses[i], solution))
            
            if results[i]=='GGGGG':
                print('Completed in '+str(i+1)+'/6 attempts!')
                stats2.loc[solution] = i+1
                break

fin_time = time.time()

print("--- %s seconds ---" % (fin_time - start_time))
print("--- %s minutes ---" % str((fin_time - start_time)/60))
print("Avg time per word: "+ str((fin_time - start_time)/stats.shape[0]))

In [387]:
solution

'BRING'

In [None]:
stats2

# Automated - V3
- For each word works out the number of answers related to every possible outcome
- Pick the word with the best average outcome

In [553]:
def checkMatch(testword, targetword, outcome):
    match=True
    for i in range(len(outcome)):
        if (outcome[i]=='G') & (targetword[i]==testword[i]):
            testword.replace(targetword[i], '-', 1)
        elif (outcome[i]=='A') & (targetword[i] in testword) & (outcome[testword.find(targetword[i])]!='G'):
            testword.replace(targetword[i], '-', 1)
        elif (targetword[i] in testword):
            match=False
            break
            
    if match==True:
        return 1
    else:
        return 0
    
def entropy2(labels, base=None):
  n_labels = len(labels)

  if n_labels <= 1:
    return 0

  value,counts = np.unique(labels, return_counts=True)
  probs = counts / n_labels
  n_classes = np.count_nonzero(probs)

  if n_classes <= 1:
    return 0

  ent = 0.

  # Compute entropy
  base = e if base is None else base
  for i in probs:
    ent -= i * log(i, base)

  return ent

def getOutcomeEntropy(outcomes, cutwordslist, targetword):
    output = []
    entropy = 0
    for i in outcomes:
        matchcount=0
        for j in cutwordslist:
            # Get the number of options for each outcome and add to DF
            matchcount += checkMatch(j, targetword, i)
        output.append(matchcount)
        
    # Get Entropy of this option
    entropy = entropy2([k/len(cutwordslist) for k in output])
    
    return entropy
            

def nextBestWord3(cutwordslist):
    outcomes = list(map(''.join,product('GA-', repeat=5)))
    all_outputs = []

    for targetword in cutwordslist:
        all_outputs.append([getOutcomeEntropy(outcomes, cutwordslist, targetword)]) # improve to just hold biggest entropy?

    best_word = cutwordslist[all_outputs.index(max(all_outputs))]
    return best_word

def filterWords(cutwordslist, word, result)
    

In [560]:
testoutput = []
for i in cutwordslist:
    testoutput.append(checkMatch(i, 'PARTY', 'GGA-G'))

In [554]:
start_time = time.time()

a = nextBestWord3(wordslist)
    
fin_time = time.time()

print("--- %s seconds ---" % (fin_time - start_time))
print("--- %s minutes ---" % str((fin_time - start_time)/60))
print("Avg time per word: "+ str((fin_time - start_time)/stats.shape[0]))

IndexError: list index out of range

In [555]:
a

'ORALS'

In [None]:
poss = product('GA-', repeat=5)

In [481]:
output

Unnamed: 0,GGGGG,GGGGA,GGGG-,GGGAG,GGGAA,GGGA-,GGG-G,GGG-A,GGG--,GGAGG,GGAGA,GGAG-,GGAAG,GGAAA,GGAA-,GGA-G,GGA-A,GGA--,GG-GG,GG-GA,GG-G-,GG-AG,GG-AA,GG-A-,GG--G,GG--A,GG---,GAGGG,GAGGA,GAGG-,GAGAG,GAGAA,GAGA-,GAG-G,GAG-A,GAG--,GAAGG,GAAGA,GAAG-,GAAAG,GAAAA,GAAA-,GAA-G,GAA-A,GAA--,GA-GG,GA-GA,GA-G-,GA-AG,GA-AA,GA-A-,GA--G,GA--A,GA---,G-GGG,G-GGA,G-GG-,G-GAG,G-GAA,G-GA-,G-G-G,G-G-A,G-G--,G-AGG,G-AGA,G-AG-,G-AAG,G-AAA,G-AA-,G-A-G,G-A-A,G-A--,G--GG,G--GA,G--G-,G--AG,G--AA,G--A-,G---G,G---A,G----,AGGGG,AGGGA,AGGG-,AGGAG,AGGAA,AGGA-,AGG-G,AGG-A,AGG--,AGAGG,AGAGA,AGAG-,AGAAG,AGAAA,AGAA-,AGA-G,AGA-A,AGA--,AG-GG,AG-GA,AG-G-,AG-AG,AG-AA,AG-A-,AG--G,AG--A,AG---,AAGGG,AAGGA,AAGG-,AAGAG,AAGAA,AAGA-,AAG-G,AAG-A,AAG--,AAAGG,AAAGA,AAAG-,AAAAG,AAAAA,AAAA-,AAA-G,AAA-A,AAA--,AA-GG,AA-GA,AA-G-,AA-AG,AA-AA,AA-A-,AA--G,AA--A,AA---,A-GGG,A-GGA,A-GG-,A-GAG,A-GAA,A-GA-,A-G-G,A-G-A,A-G--,A-AGG,A-AGA,A-AG-,A-AAG,A-AAA,A-AA-,A-A-G,A-A-A,A-A--,A--GG,A--GA,A--G-,A--AG,A--AA,A--A-,A---G,A---A,A----,-GGGG,-GGGA,-GGG-,-GGAG,-GGAA,-GGA-,-GG-G,-GG-A,-GG--,-GAGG,-GAGA,-GAG-,-GAAG,-GAAA,-GAA-,-GA-G,-GA-A,-GA--,-G-GG,-G-GA,-G-G-,-G-AG,-G-AA,-G-A-,-G--G,-G--A,-G---,-AGGG,-AGGA,-AGG-,-AGAG,-AGAA,-AGA-,-AG-G,-AG-A,-AG--,-AAGG,-AAGA,-AAG-,-AAAG,-AAAA,-AAA-,-AA-G,-AA-A,-AA--,-A-GG,-A-GA,-A-G-,-A-AG,-A-AA,-A-A-,-A--G,-A--A,-A---,--GGG,--GGA,--GG-,--GAG,--GAA,--GA-,--G-G,--G-A,--G--,--AGG,--AGA,--AG-,--AAG,--AAA,--AA-,--A-G,--A-A,--A--,---GG,---GA,---G-,---AG,---AA,---A-,----G,----A,-----
