# Wordle Cheater Data Preprocessing

## Reading in the Data

In [1]:
fname='corncob_lowercase.txt'
file=open(fname,'r')
words=file.readlines()

In [2]:
words[0:10]

['aardvark\n',
 'aardwolf\n',
 'aaron\n',
 'aback\n',
 'abacus\n',
 'abaft\n',
 'abalone\n',
 'abandon\n',
 'abandoned\n',
 'abandonment\n']

## Data Preprocessing

In [3]:
wordle_words=[]
for i in range(len(words)):
    words[i]=words[i][0:-1] #getting rid of the /n 
    if len(words[i])==5:
        wordle_words.append(words[i])

In [4]:
print('number of initial words is:', len(words))
print('number of five letter words is:',len(wordle_words))
wordle_words[2230:2240]

number of initial words is: 58110
number of five letter words is: 4266


['mason',
 'masts',
 'match',
 'mated',
 'mater',
 'mates',
 'maths',
 'matte',
 'mauls',
 'mauve']

## Algorithm Building

In [17]:
def make_guess(wordle, guess):
    results=[]
    for i in range(len(wordle)):
        if guess[i]==wordle[i]:
            results.append((i, guess[i], ': ðŸŸ©'))
        elif guess[i] in wordle:
            results.append((i, guess[i], ': ðŸŸ¨'))
        else:
            results.append((i, guess[i], ': â¬œ'))
    if guess==wordle:
        win= 1
    else:
        win=0
    return(results, win)


In [18]:
make_guess('hello','adieu')

([(0, 'a', ': â¬œ'),
  (1, 'd', ': â¬œ'),
  (2, 'i', ': â¬œ'),
  (3, 'e', ': ðŸŸ¨'),
  (4, 'u', ': â¬œ')],
 0)

In [19]:
def validate(word):
    if word.lower() in wordle_words:
        return word.lower()
    else:
        word=input('Word not in wordle words. Please enter a new word: ')
        return validate(word)

In [21]:
validate('adi-eu')

Word not in wordle words. Please enter a new word: adieu


'adieu'

In [118]:
def update_history(history, results):
    if history=={}:
        #create dictionary
        for i in range(5):
            history[i]=[]
        
    #update dictionary based on results 
    for result in results:
        index, letter, status = result
        history[index].append((letter, status))
    return history
        


In [49]:
def display_history(history):
    #putting it into printable format
    printed_history=''
    for key in history:
        printed_history=printed_history+ str(key)+ str(history[key])+ "  "
    print(printed_history)

In [64]:
def display_keyboard(history):
     #alphabetizing history
    alpha={}
    alphabet='abcdefghijklmnopqrstuvwxyz'
    for letter in alphabet:
        alpha[letter]=''
    for key in history:
        for letter, status in history[key]:
        
            if alpha[letter]=='':
                alpha[letter]=status
            else:
                if alpha[letter]==': ðŸŸ©':
                    pass
                elif alpha[letter]==': ðŸŸ¨':
                    if status ==': ðŸŸ©':
                        alpha[letter]=status
                    else:
                        pass
                
    
    printed_history=''
    for key in alpha:
        printed_history=printed_history+ key+ alpha[key]+ "  "
    print(printed_history)

In [32]:
def display_results(results):
    for letter in results:
        print(letter[1],letter[2])

In [97]:
sample_wordle='hello'
first_guess='adieu'
second_guess='sport'
results, win=make_guess(sample_wordle, first_guess)
results2, win2=make_guess(sample_wordle, second_guess)
#display_results(results)
display_results(results2)

s : â¬œ
p : â¬œ
o : ðŸŸ¨
r : â¬œ
t : â¬œ


In [113]:
def play_wordle(wordle, show_keyboard='on', show_history='off'):
    history={}
    
    for Round in ['First','Second','Third', 'Fourth','Fifth','Sixth']:
        guess=input('{} guess: '.format(Round))
        guess= validate(guess)
        results, win=make_guess(wordle, guess)
        display_results(results)
        history=update_history(history, results)
        if show_history=='on':
            display_history(history)
        if show_keyboard=='on':
            display_keyboard(history)
        if win==1:
            print('Congrats! You guessed the wordle!')
            return 1
        elif Round=='Sixth':
            print( 'Ah, darn. The wordle was {}'.format(wordle))
            return 0
    

In [66]:
def play_random_wordle(show_keyboard='on', show_history='off'):
    import random
    wordle=wordle_words[random.randint(0, len(wordle_words))]
    play_wordle(wordle, show_keyboard, show_history)

In [101]:
sample_results=results
history1=update_history({},results)
history2=update_history(history1,results2)
history2

{0: [('a', ': â¬œ'), ('s', ': â¬œ')],
 1: [('d', ': â¬œ'), ('p', ': â¬œ')],
 2: [('i', ': â¬œ'), ('o', ': ðŸŸ¨')],
 3: [('e', ': ðŸŸ¨'), ('r', ': â¬œ')],
 4: [('u', ': â¬œ'), ('t', ': â¬œ')]}

In [67]:
play_random_wordle()

First guess: adieu
a : â¬œ
d : â¬œ
i : ðŸŸ©
e : â¬œ
u : â¬œ
a: â¬œ  b  c  d: â¬œ  e: â¬œ  f  g  h  i: ðŸŸ©  j  k  l  m  n  o  p  q  r  s  t  u: â¬œ  v  w  x  y  z  
Second guess: sport
s : ðŸŸ©
p : â¬œ
o : â¬œ
r : ðŸŸ©
t : â¬œ
a: â¬œ  b  c  d: â¬œ  e: â¬œ  f  g  h  i: ðŸŸ©  j  k  l  m  n  o: â¬œ  p: â¬œ  q  r: ðŸŸ©  s: ðŸŸ©  t: â¬œ  u: â¬œ  v  w  x  y  z  
Third guess: shirt
s : ðŸŸ©
h : â¬œ
i : ðŸŸ©
r : ðŸŸ©
t : â¬œ
a: â¬œ  b  c  d: â¬œ  e: â¬œ  f  g  h: â¬œ  i: ðŸŸ©  j  k  l  m  n  o: â¬œ  p: â¬œ  q  r: ðŸŸ©  s: ðŸŸ©  t: â¬œ  u: â¬œ  v  w  x  y  z  
Fourth guess: skirm
Word not in wordle words. Please enter a new word: stirs
s : ðŸŸ©
t : â¬œ
i : ðŸŸ©
r : ðŸŸ©
s : ðŸŸ¨
a: â¬œ  b  c  d: â¬œ  e: â¬œ  f  g  h: â¬œ  i: ðŸŸ©  j  k  l  m  n  o: â¬œ  p: â¬œ  q  r: ðŸŸ©  s: ðŸŸ©  t: â¬œ  u: â¬œ  v  w  x  y  z  
Fifth guess: stiry
Word not in wordle words. Please enter a new word: smirk
s : ðŸŸ©
m : ðŸŸ©
i : ðŸŸ©
r : ðŸŸ©
k : ðŸŸ©
a: â¬œ  b  c  d: â¬œ  e: â¬œ  f  g  h: â¬œ  i: ðŸŸ©  j  k: ðŸŸ©

## Cheating Algorithm

In [68]:
history1

{0: [('a', ': â¬œ'), ('s', ': â¬œ')],
 1: [('d', ': â¬œ'), ('p', ': â¬œ')],
 2: [('i', ': â¬œ'), ('o', ': ðŸŸ¨')],
 3: [('e', ': ðŸŸ¨'), ('r', ': â¬œ')],
 4: [('u', ': â¬œ'), ('t', ': â¬œ')]}

In [230]:
sample_gray=['a', 'd', 'i', 'p', 'r', 's', 't', 'u']
sample_yellow=['e', 'o']
sample_word='hello'
if len(set(sample_gray).intersection(set(sample_word)))>=1:
    print('cant be the word, theres a gray letter in this word')
elif len(set(sample_yellow).intersection(set(sample_word)))>=len(set(sample_yellow)):
    print('this could be a word')

this could be a word


### Meat

In [207]:
def give_suggestions(history):
    gray={}
    yellow={}
    green={}
    for index in history:
        for letter, status in history[index]:
            if status==': â¬œ':
                if letter in gray:
                    gray[letter].add(index)
                else:
                    gray[letter]={index}
            elif status==': ðŸŸ¨':
                if letter in yellow:
                    yellow[letter].add(index)
                else:
                    yellow[letter]={index}
            elif status==': ðŸŸ©':
                if letter in green:
                    green[letter].add(index)
                else:
                    green[letter]={index}

    filtered=[]
    included_letters=set(yellow).union(set(green))
    #if you have mulitple letters in a word (seats) but only one of that letter exists
    #in the wordle (phase) it will give one gray and one yellow so need to accomodate
    #for that
    excluded_letters=set(gray).difference(set(green))
    excluded_letters=excluded_letters.difference(set(yellow))

    for word in wordle_words:
        ##weeds out any words that have gray letters in them
        if len(set(excluded_letters).intersection(set(word)))>=1:
            pass
        ##finds words that have yellow letters in them
        elif len(included_letters.intersection(set(word)))>=len(included_letters):
            filtered.append(word)
            
    for word in filtered:
        for i in range(5):
            if word[i] in yellow:
                if yellow[word[i]]==i: #if the letter 
                    filtered.remove(word)
    return filtered, gray, yellow, green
            

In [214]:
filtered=[]

for word in wordle_words:
    if len(set(excluded_letters).intersection(set(word)))>=1:
        pass
    elif len(included_letters.intersection(set(word)))>=len(included_letters):
            filtered.append(word)
filtered

['abase',
 'abies',
 'abuse',
 'aches',
 'aegis',
 'aisle',
 'amens',
 'amuse',
 'anise',
 'apses',
 'ashen',
 'ashes',
 'asses',
 'avens',
 'axles',
 'babes',
 'bakes',
 'bales',
 'bases',
 'beaks',
 'beams',
 'beans',
 'beaus',
 'blase',
 'cafes',
 'cages',
 'cakes',
 'canes',
 'capes',
 'cases',
 'cause',
 'caves',
 'cease',
 'chase',
 'easel',
 'eases',
 'eaves',
 'essay',
 'exams',
 'faces',
 'fakes',
 'false',
 'faxes',
 'fleas',
 'gales',
 'games',
 'gapes',
 'gases',
 'gazes',
 'haves',
 'heals',
 'heaps',
 'james',
 'japes',
 'jeans',
 'laces',
 'lakes',
 'lanes',
 'lapse',
 'leaks',
 'leans',
 'leaps',
 'lease',
 'leash',
 'maces',
 'makes',
 'males',
 'manes',
 'manse',
 'mazes',
 'meals',
 'means',
 'names',
 'paces',
 'pages',
 'pales',
 'panes',
 'passe',
 'pause',
 'paves',
 'peaks',
 'peals',
 'phase',
 'pleas',
 'sable',
 'safes',
 'sages',
 'sakes',
 'salem',
 'sales',
 'salve',
 'sauce',
 'saves',
 'scale',
 'scape',
 'seals',
 'seams',
 'seamy',
 'sepia',
 'shake',


In [202]:
included_letters=set(yellow).union(set(green))
included_letters

excluded_letters=set(gray).difference(set(green))
excluded_letters=excluded_letters.difference(set(yellow))
excluded_letters

{'d', 'o', 'r', 't', 'w'}

In [192]:
def filter_suggestions(filtered, gray, yellow, green, history2):
    best_words=[]
    for word in filtered:
        for i in range(5):
            if word[i] in yellow:
                if yellow[word[i]]==i: #if the letter 
                    filtered.remove(word)
                
                
        

In [150]:
history2

{0: [('a', ': â¬œ'), ('h', ': ðŸŸ©')],
 1: [('d', ': â¬œ'), ('e', ': ðŸŸ©')],
 2: [('i', ': â¬œ'), ('l', ': ðŸŸ©')],
 3: [('e', ': ðŸŸ¨'), ('m', ': â¬œ')],
 4: [('u', ': â¬œ'), ('s', ': â¬œ')]}

In [158]:
filtered, gray, yellow, green= give_suggestions(history2)
filtered

['belch',
 'ethyl',
 'helen',
 'hello',
 'hotel',
 'hovel',
 'leech',
 'wheel',
 'whelk',
 'whelp',
 'whole']

In [160]:
results

[(0, 'a', ': â¬œ'),
 (1, 'd', ': â¬œ'),
 (2, 'i', ': â¬œ'),
 (3, 'e', ': ðŸŸ¨'),
 (4, 'u', ': â¬œ')]

In [173]:
def get_user_guess():
    guess=[]
    rounds=['first','second','third','fourth','fifth']
    for i in range(5):
        letter=input('Please put in {} letter of your guess: '.format(rounds[i]))
        color=input("""What was the color of the {} letter: {}?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        """.format(rounds[i],letter))
        if color=='1':
            status=': â¬œ'
        elif color=='2':
            status=': ðŸŸ¨'
        elif color=='3':
            status=': ðŸŸ©'
        guess.append((i, letter, status))
    display_results(guess)
    confirmation=input('Does this look correct? (yes, no)')
    if confirmation.lower()=='yes':
        return guess
    else:
        get_user_guess()
        

In [182]:
def validate_rounds():
    r=input('How many words have you guessed so far? ')
    try:
        if int(r) in [1,2,3,4,5,6]:
            return int(r)
        else:
            print('Please type a single digit between 1 and 6.')
            validate_rounds()
    except:
        print('Please type a single digit between 1 and 6.')
        validate_rounds()


In [187]:
def compile_user_guesses():
    r=validate_rounds()
    history={}
    for i in range(r):
        results=get_user_guess()
        update_history(history, results)
    return give_suggestions(history)

In [200]:
gray.keys()

dict_keys(['w', 'o', 'r', 't', 'd', 's'])

In [215]:
compile_user_guesses()

How many words have you guessed so far? 2
Please put in first letter of your guess: w
What was the color of the first letter: w?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        1
Please put in second letter of your guess: o
What was the color of the second letter: o?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        1
Please put in third letter of your guess: r
What was the color of the third letter: r?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        1
Please put in fourth letter of your guess: d
What was the color of the fourth letter: d?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        1
Please put in fifth letter of your guess: s
What was the color of the fifth letter: s?  
        1 for gray â¬œ, 2 for yellow ðŸŸ¨, 3 for green ðŸŸ©
        2
w : â¬œ
o : â¬œ
r : â¬œ
d : â¬œ
s : ðŸŸ¨
Does this look correct? (yes, no)yes
Please put in first letter of your guess: s
What was the color of the first lette

(['abase',
  'abies',
  'abuse',
  'aches',
  'aegis',
  'aisle',
  'amens',
  'amuse',
  'anise',
  'apses',
  'ashen',
  'ashes',
  'asses',
  'avens',
  'axles',
  'babes',
  'bakes',
  'bales',
  'bases',
  'beaks',
  'beams',
  'beans',
  'beaus',
  'blase',
  'cafes',
  'cages',
  'cakes',
  'canes',
  'capes',
  'cases',
  'cause',
  'caves',
  'cease',
  'chase',
  'easel',
  'eases',
  'eaves',
  'essay',
  'exams',
  'faces',
  'fakes',
  'false',
  'faxes',
  'fleas',
  'gales',
  'games',
  'gapes',
  'gases',
  'gazes',
  'haves',
  'heals',
  'heaps',
  'james',
  'japes',
  'jeans',
  'laces',
  'lakes',
  'lanes',
  'lapse',
  'leaks',
  'leans',
  'leaps',
  'lease',
  'leash',
  'maces',
  'makes',
  'males',
  'manes',
  'manse',
  'mazes',
  'meals',
  'means',
  'names',
  'paces',
  'pages',
  'pales',
  'panes',
  'passe',
  'pause',
  'paves',
  'peaks',
  'peals',
  'phase',
  'pleas',
  'sable',
  'safes',
  'sages',
  'sakes',
  'salem',
  'sales',
  'salve',

### More Testing

In [147]:
sample_wordle='hello'
first_guess='adieu'
second_guess='sport'
third_guess='helms'
results, win=make_guess(sample_wordle, first_guess)
results2, win=make_guess(sample_wordle, third_guess)
#results3, win=make_guess(sample_wordle, third_guess)

#display_results(results)
display_results(results2)
print(results3)

h : ðŸŸ©
e : ðŸŸ©
l : ðŸŸ©
m : â¬œ
s : â¬œ
[(0, 'h', ': ðŸŸ©'), (1, 'e', ': ðŸŸ©'), (2, 'l', ': ðŸŸ©'), (3, 'm', ': â¬œ'), (4, 's', ': â¬œ')]


In [145]:
history1=update_history({},results)


In [148]:
history1=update_history({},results)
history2=update_history(history1,results2)
#history3=update_history(history2, results3)
#history3

In [138]:
test=give_suggestions(history3)
test

(['hello', 'hovel', 'whole'],
 {'a': {0},
  's': {0, 4},
  'd': {1},
  'p': {1},
  'i': {2},
  'r': {3},
  'm': {3},
  'u': {4},
  't': {4}},
 {'o': {2}, 'e': {3}},
 {'h': {0}, 'e': {1}, 'l': {2}})

## Code Graveyard

In [30]:
def update_history(history, results):
    if history=={}:
        #create dictionary of letters
        alph='abcdefghijklmnopqrstuvwxyz'
        for letter in alph:
            history[letter]=''
        #update dictionary based on results
        for result in results:
            letter=result[1]
            status=result[2]
            history[letter]=[[result[1]],status]
    else:
        #update dictionary based on results
        for result in results:
            letter=result[1]
            status=result[2]
            if history[letter]=='ðŸŸ©' and status =='ðŸŸ¨':
                pass #won't update a green to a yellow
            else:
                history[letter][0].append( 
    return history

In [132]:
def give_suggestions(history):
    gray=[]
    yellow=[]
    green=[]
    for key in history:
        if history[key]==': â¬œ':
            gray.append(key)
        elif history[key]==': ðŸŸ¨':
            yellow.append(key)
        elif history[key]==': ðŸŸ©':
            green.append(key)

    suggestions=[]
    for word in wordle_words:
        ##weeds out any words that have gray letters in them
        if len(set(gray).intersection(set(word)))>=1:
            pass
        ##finds words that have yellow letters in them
        elif len(set(yellow).intersection(set(word)))>=len(set(yellow)):
            suggestions.append(word)
        
    return suggestions
            
        