# WORDLE Breaker

### https://www.powerlanguage.co.uk/wordle/  

In [9]:
import re
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [10]:
def special_match(pattern, strg):
    result = re.match(pattern, strg)
    return result is not None


In [15]:
def break_wordle(attempts, word_lengh, dictionary, random_word_mode = False):
    #attempts - number os attempts
    #word_lengh - word lengh
    #dictionary - pandas DF containing a list of words
    #random_word_mode - if True, will get a random word of the remaining dictionary. If False, will order by
    #                   var and weight and get the first
    #                   var - variance. Words with mostmostly different letters on it
    #                   weight - words with the most used letters in English
   
    #History of tryed word/letter. Used in the report at the end.
    correct_pos_list = [] 
    incorrect_pos_list = [] 
    not_recognized_word_list = []        
    regex_list = []
    try_list = []
    
    #lists of words/letters used as source to create regex pattern
    correct_pos_list    = ["a-z"] * word_lengh
    incorrect_pos_list  = ["a-z"] * word_lengh
    exist_in_list  = []
    not_exist_list = ["."]
    
    #First iteration regex pattern
    correct_pos_rex   = "^"+ "".join( "[" + str(x) +"]" for x in correct_pos_list ) +"$"
    incorrect_pos_rex = "^"+ "".join( "[" + str(x) +"]" for x in incorrect_pos_list ) +"$"
    exist_in_rex      = "^.*" + "".join( "(?=.*[" + str(x) +"])" for x in exist_in_list )  +".*"
    not_exist_rex     = "^.*(?=.*[" + "".join(str(x) for x in not_exist_list )  +"]).*"
    
    #run through the attempts
    for i in range(attempts):        
        print("\n###############################################################################################")
        print(f'''Attempt {i+1}:''')
        
        print(f'''\nRegex Used:
        Correct Pos:          {correct_pos_rex}                    
        InCorrect Pos:        {incorrect_pos_rex}                    
        Contain:              {exist_in_rex}
        Does not Contain:     {not_exist_rex}
        Not Recognized words: {not_recognized_word_list}
        ''')

        #REGEX filters
        #first iteraction, create a copy of original dictonary received. After that, the dictionary keep
        #getting smaller as we filter it.        
        if (i==0):            
            newWordList = dictionary[dictionary[0].str.contains(correct_pos_rex)==True]
        else:
            newWordList = newWordList[newWordList[0].str.contains(correct_pos_rex)==True]            
        newWordList = newWordList[newWordList[0].str.contains(incorrect_pos_rex)==True]
        newWordList = newWordList[newWordList[0].str.contains(exist_in_rex)==True]
        newWordList = newWordList[newWordList[0].str.contains(not_exist_rex)==False]
        newWordList = newWordList[newWordList[0].isin(not_recognized_word_list)==False]
        
        #history of regex used. Just for the report at the end
        regex_list.append([correct_pos_rex, incorrect_pos_rex, exist_in_rex, not_exist_rex])
       
        #first iteraction, create two variables - var and weight.
        #var - variance. Words with mostmostly different letters on it
        #weight - words with the most used letters in English
        #order by descending on those two variables
        if (i==0):
            newWordList["var"]=newWordList.apply(lambda x: 1-(sum([1 for y in x[0] if (x[0].count(y)-1) > 0]) / len(x[0])) , axis=1)
            newWordList["weight"]=newWordList.apply(lambda x: sum([1.5 for y in x[0] if y in ['e', 'a', 'i', 'o']]) + sum([1 for y in x[0] if y in ['t', 'n', 's', 'h', 'r']]), axis=1)
            newWordList =newWordList.sort_values(["var", "weight"], ascending=False)

        
        #select the suggestion. If dictionary is empty, you lose.
        if (len(newWordList) > 0 ):
            if random_word_mode:
                str_try = newWordList.sample()[0].item()  #get a random word
            else:
                str_try = newWordList.head(1)[0].item()  #get the first item
            if (len(newWordList) == 1 ): #if dictionary has only one entry, you 'probably' won
                print(f'''The word (probably) is: {str_try} (1 of {len(newWordList)} or {"{:.2f}".format(((1/len(newWordList))*100))}% of success)''')
            else:
                print(f'''Try the word: {str_try} (1 of {len(newWordList)} or {"{:.2f}".format(((1/len(newWordList))*100))}% of success)''')
        else:
            print("\nDictionary empty. You lose")
            return

        #wait for the wordle response.  The while is to garantee we use only the right letters b/g/y/x
        resp = ""
        while len(resp) != word_lengh or special_match(f'''^[b|g|y]{{{word_lengh}}}$''',resp)==False:
            resp = input(f'''Enter {word_lengh} char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word: ''')
            
            #if wordle returns a 'not recognized word (x), remove the word and select a new word 
            #from the dictionary, but dont miss the attempt
            if (resp=="x"):
                not_recognized_word_list.append(str_try)
                newWordList = newWordList[newWordList[0].isin(not_recognized_word_list)==False]
                if random_word_mode:
                    str_try = newWordList.sample()[0].item()  #get a random word
                else:
                    str_try = newWordList.head(1)[0].item() #get the first one               
                
                print(f'''\nNew word on Attempt {i+1}''')
                print(f'''Try the word: {str_try} (1 of {len(newWordList)} or {"{:.2f}".format(((1/len(newWordList))*100))}% of success)''')
            else:
                if (str_try not in try_list):
                    try_list.append(str_try)
                
        #if the response are all 'g' means that we have won the game
        if (resp == 'g' * word_lengh):
            print("\nCongrats, you won!")
            for x in range(len(try_list)):
                print(f'''
                        Word: {try_list[x]}
                        Regex Used:
                            Correct Pos:     {regex_list[x][0]}                    
                            InCorrect Pos:   {regex_list[x][1]}                    
                            Contain:         {regex_list[x][2]}
                            Does not Contain:{regex_list[x][3]}
                            List of not recognized words tried:
                                {not_recognized_word_list} ''')            
            return
        
        #Update the lists of words/letters used as source to create regex pattern,
        #for each y,b and g
        for k in range(len(resp)):   
            if resp[k] == 'g': 
                correct_pos_list[k] = str_try[k]
            if resp[k] == 'b':
                if (not_exist_list[0] == "."):
                    not_exist_list[0] = str_try[k]
                else:
                    not_exist_list.append(str_try[k])
            if resp[k] == 'y':
                if (incorrect_pos_list[k] == "a-z"):
                    incorrect_pos_list[k] = "^" + str_try[k]
                else:
                    incorrect_pos_list[k] = incorrect_pos_list[k] + str_try[k]                     
                
                if (str_try[k] not in exist_in_list):
                    exist_in_list.append(str_try[k])
        
        temp_list = not_exist_list.copy()
        for k in range(len(not_exist_list)):
            if (not_exist_list[k] in correct_pos_list):
                temp_list.remove(not_exist_list[k])
                y =  correct_pos_list.index(not_exist_list[k])
                for x in range(len(incorrect_pos_list)):              
                    if (x != y):
                        if (incorrect_pos_list[x] == "a-z"):
                            incorrect_pos_list[x] = "^" + not_exist_list[k]
                        else:
                            incorrect_pos_list[x] = incorrect_pos_list[x] + not_exist_list[k]                     
        
        not_exist_list = temp_list.copy()
        
        #Update the Regex pattern that will be used on the next iteraction
        correct_pos_rex   = "^"+ "".join( "[" + str(x) +"]" for x in correct_pos_list ) +"$"
        incorrect_pos_rex = "^"+ "".join( "[" + str(x) +"]" for x in incorrect_pos_list ) +"$"
        exist_in_rex      = "^.*" + "".join( "(?=.*[" + str(x) +"])" for x in exist_in_list )  +".*"
        not_exist_rex     = "^.*(?=.*[" + "".join(str(x) for x in not_exist_list )  +"]).*"

    print("\nSorry, you lose")
        


#### Download dictionary and call function

In [None]:
words = pd.read_csv('https://github.com/dwyl/english-words/raw/master/words_alpha.txt', header=None)
break_wordle(6,5,words, True)


###############################################################################################
Attempt 1:

Regex Used:
        Correct Pos:          ^[a-z][a-z][a-z][a-z][a-z]$                    
        InCorrect Pos:        ^[a-z][a-z][a-z][a-z][a-z]$                    
        Contain:              ^.*.*
        Does not Contain:     ^.*(?=.*[.]).*
        Not Recognized words: []
        
Try the word: erase (1 of 15918 or 0.01% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  bybbb



###############################################################################################
Attempt 2:

Regex Used:
        Correct Pos:          ^[a-z][a-z][a-z][a-z][a-z]$                    
        InCorrect Pos:        ^[a-z][^r][a-z][a-z][a-z]$                    
        Contain:              ^.*(?=.*[r]).*
        Does not Contain:     ^.*(?=.*[ease]).*
        Not Recognized words: []
        
Try the word: inorg (1 of 477 or 0.21% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  x



New word on Attempt 2
Try the word: tudor (1 of 476 or 0.21% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  x



New word on Attempt 2
Try the word: xyrid (1 of 475 or 0.21% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  x



New word on Attempt 2
Try the word: twirk (1 of 474 or 0.21% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  x



New word on Attempt 2
Try the word: bourd (1 of 473 or 0.21% of success)


Enter 5 char sequence (no spaces)
                            "g" - green 
                            "y" - yellow 
                            "b" - black
                            'x' - not recognized word:  ygbyb



###############################################################################################
Attempt 3:

Regex Used:
        Correct Pos:          ^[a-z][o][a-z][a-z][a-z]$                    
        InCorrect Pos:        ^[^b][^r][a-z][^r][a-z]$                    
        Contain:              ^.*(?=.*[r])(?=.*[b]).*
        Does not Contain:     ^.*(?=.*[easeud]).*
        Not Recognized words: ['inorg', 'tudor', 'xyrid', 'twirk']
        
Try the word: robot (1 of 5 or 20.00% of success)
