# Beat Wordle

### https://www.powerlanguage.co.uk/wordle/  

In [1]:
#Test

import re
import pandas as pd
from IPython.display import clear_output
import time
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
def special_match(pattern, strg):
    result = re.match(pattern, strg)
    return result is not None


In [None]:
def beat_wordle(attempts:int, word_length:int, dictionary:pd.core.frame.DataFrame, guess_percent:int = 0) -> None:
    #attempts - number os attempts
    #word_length - word length
    #dictionary - pandas DF containing a list of words
    #random_word_mode - if True, will get a random word of the remaining dictionary. If False, will order by
    #                   var and weight and get the first
    #                   var - variance. Words with mostmostly different letters on it
    #                   weight - words with the most used letters in English
   
    #History of tryed word/letter. Used in the report at the end.
    correct_pos_list = [] 
    incorrect_pos_list = [] 
    not_recognized_word_list = []        
    regex_list = []
    try_list = []
    
    #lists of words/letters used as source to create regex pattern
    correct_pos_list    = ["a-z"] * word_length
    incorrect_pos_list  = ["a-z"] * word_length
    exist_in_list  = []
    not_exist_list = ["."]
       
    
    #order by most used words
    dictionary =dictionary.sort_values(["count"], ascending=False)
    
    #run through the attempts
    for i in range(attempts):       
        
        #Update the Regex pattern that will be used on the next iteraction
        correct_pos_rex   = "^"+ "".join( "[" + str(x) +"]" for x in correct_pos_list ) +"$"
        incorrect_pos_rex = "^"+ "".join( "[" + str(x) +"]" for x in incorrect_pos_list ) +"$"
        exist_in_rex      = "^.*" + "".join( "(?=.*[" + str(x) +"])" for x in exist_in_list )  +".*"
        not_exist_rex     = "^.*(?=.*[" + "".join(str(x) for x in not_exist_list )  +"]).*"
        
        
        print("\n###############################################################################################")
        print(f'''Attempt {i+1}:''')
        
        print(f'''\nRegex Used:
        Correct Pos:          {correct_pos_rex}                    
        Incorrect Pos:        {incorrect_pos_rex}                    
        Contain:              {exist_in_rex}
        Does not Contain:     {not_exist_rex}
        Not Recognized words: {not_recognized_word_list}\n''')

        #REGEX filters
        #Apply filters on dictionary        
        
        
        dictionary = dictionary[dictionary["word"].str.contains(correct_pos_rex)==True]        
        dictionary = dictionary[dictionary["word"].str.contains(incorrect_pos_rex)==True]        
        dictionary = dictionary[dictionary["word"].str.contains(exist_in_rex)==True]
        dictionary = dictionary[dictionary["word"].str.contains(not_exist_rex)==False]        
        dictionary = dictionary[dictionary["word"].isin(not_recognized_word_list)==False]        
        
        #history of regex used. Just for the report at the end
        regex_list.append([correct_pos_rex, incorrect_pos_rex, exist_in_rex, not_exist_rex])
                
        #wait for the wordle response.  The while is to garantee we use only the right letters b/g/y/x
        resp = ""
        while len(resp) != word_length or special_match(f'''^[b|g|y]{{{word_length}}}$''',resp)==False:
            #User want to ignore suggested word and try his own. Or dictionary is empty.
            if resp=="i" or len(dictionary) == 0:
                if len(dictionary)==0: print("Dictionary is empty. Enter your own word:\n")
                str_try = ''  
                while len(str_try) != word_length:
                    str_try = input(f'''Enter your own {word_length} len char word:\n''')
                    clear_output()
                    time.sleep(0.3)
            else:    
                #select the suggestion. 
                if guess_percent > 0:
                    #choose a random work from the top 10% of the most used words
                    sort_range = 1 if int(len(dictionary) * (guess_percent/100 )) == 0 else int(len(dictionary) *  (guess_percent/100 ))
                    print(f'''Selecting a word from {guess_percent}% of the most used words''')
                    str_try = dictionary.head(sort_range).sample()["word"].item()
                else:
                    str_try = dictionary.head(1)["word"].item()  #get the first item


                if (len(dictionary) == 1 ): #if dictionary has only one entry, you 'probably' won
                    print(f'''The word (probably) is: '{str_try}' (1 of {len(dictionary)} or {"{:.2f}".format(((1/len(dictionary))*100))}% of success)''')
                else:
                    print(f'''Try the word: '{str_try}' (1 of {len(dictionary)} or {"{:.2f}".format(((1/len(dictionary))*100))}% of success)''')

            
            resp = input(f'''Enter {word_length} char wordle response (no spaces) of the word '{str_try}'
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'n' - not recognized word 
                            'i' - ignore suggestion and enter your own word
                            'q' - quit the game\n''')            
            
            #quit the game
            if (resp=='q'):
                print(f'''\nBye!''')
                return
                
            #if wordle returns a 'not recognized word (x), remove the word and select a new word 
            #from the dictionary, but dont miss the attempt
            if (resp=="n"):
                not_recognized_word_list.append(str_try)
                dictionary = dictionary[dictionary["word"].isin(not_recognized_word_list)==False]
                print(f'''\nNew word on Attempt {i+1}''')                            
           
            #user decided to ignore suggestion and enter own word
            if (resp == 'i'): 
                print(f'''\nNew word on Attempt {i+1}''')
            
            #add word in try list only if word is used
            if (resp not in ['n','i'] and str_try not in try_list): try_list.append(str_try)

            clear_output()
            time.sleep(0.3)
            
        #if the response are all 'g' means that we have won the game
        if (resp == 'g' * word_length):
            print("\nCongrats, you won!")
            for x in range(len(try_list)):
                print(f'''
                        Word: {try_list[x]}
                        Regex Used:
                            Correct Pos:     {regex_list[x][0]}                    
                            InCorrect Pos:   {regex_list[x][1]}                    
                            Contain:         {regex_list[x][2]}
                            Does not Contain:{regex_list[x][3]}
                            List of not recognized words tried:
                                {not_recognized_word_list}\n''')            
            return
        
         
        
        
        #Update the lists of words/letters used as source to create regex pattern,
        #for each y,b and g
        for k in range(len(resp)):   
            if resp[k] == 'g': 
                correct_pos_list[k] = str_try[k]
            if resp[k] == 'b':
                if (not_exist_list[0] == "."):
                    not_exist_list[0] = str_try[k]
                else:
                    if str_try[k] not in not_exist_list: not_exist_list.append(str_try[k])
            if resp[k] == 'y':
                if (incorrect_pos_list[k] == "a-z"):
                    incorrect_pos_list[k] = "^" + str_try[k]
                else:
                    incorrect_pos_list[k] = incorrect_pos_list[k] + str_try[k]                     
                
                if (str_try[k] not in exist_in_list):
                    exist_in_list.append(str_try[k])
        
#        print(f'''correct_pos_list {correct_pos_list}''')#        
#        print(f'''incorrect_pos_list {incorrect_pos_list}''')
#        print(f'''exist_in_list {exist_in_list}''')
#        print(f'''not_exist_list {not_exist_list}''')
        
        
        
        
        temp_list = not_exist_list.copy()
        for k in range(len(not_exist_list)):
            if (not_exist_list[k] in correct_pos_list):
                temp_list.remove(not_exist_list[k])
                y = [z for z in range(len(correct_pos_list)) if correct_pos_list[z] ==  not_exist_list[k]]
                for x in range(len(incorrect_pos_list)):              
                    if x not in y:
                        if (incorrect_pos_list[x] == "a-z"):
                            incorrect_pos_list[x] = "^" + not_exist_list[k]
                        else:
                            incorrect_pos_list[x] = incorrect_pos_list[x] + not_exist_list[k]                     
        
        not_exist_list = temp_list.copy()


        not_exist_list = [x for x in not_exist_list if x not in exist_in_list]
        if len(not_exist_list) == 0: not_exist_list = ["."]
        
#        print(f'''incorrect_pos_list after correction {incorrect_pos_list}''')
#        print(f'''not_exist_list after correction {not_exist_list}''')
     


    print("\nSorry, you lose")
        


#### Download dictionary and call function

In [None]:
words01 = pd.read_csv('https://raw.githubusercontent.com/IlyaSemenov/wikipedia-word-frequency/master/results/enwiki-20190320-words-frequency.txt', header=None, sep=' ', names = ['word','count'])
words02 = pd.read_csv('https://github.com/dwyl/english-words/blob/master/words_alpha.txt?raw=true', header=None, names = ['word','count'], sep='\n').fillna(1)
words_final = pd.concat([words01, words02]).drop_duplicates(subset = "word", keep='first')


In [None]:
beat_wordle(6,5,words_final)