In [1]:
import pandas as pd
import numpy as np
import string
import collections

In [27]:
def create_word_list_new(filename='wordle_words.csv'):
    word_df = pd.read_csv(filename, names=['words'])
    for i in range(5):
        word_df['pos_'+str(i)] = word_df['words'].apply(lambda x: x[i])
    print('Number of words: ', word_df.shape[0])
    return word_df

In [3]:
def count_prob(letter,word_df,num):
    counter = word_df['pos_'+str(num)].apply(lambda x: 1 if x.count(letter)==1 else 0)
    return counter.sum()/(len(word_df))

In [4]:
def generate_alphabet_prob(word_df):
    alphabet_string = string.ascii_lowercase
    alphabet_list = list(alphabet_string)
    alphabet_df = pd.DataFrame(alphabet_list, columns=['letters'])

    for i in range(5):
        column_name = 'prob_'+str(i)
        alphabet_df[column_name] = alphabet_df['letters'].apply(lambda x: count_prob(x,word_df,i))
    
    return alphabet_df

In [12]:
def word_score(word, alphabet_df):
    word_dict = {k: v for v, k in enumerate(list(word))}
    word_df = pd.DataFrame({'letters':word_dict.keys(),'position':word_dict.values()})
    word_df = word_df.merge(alphabet_df, how = 'left', on = ['letters'])
    word_df['prob'] = word_df.apply(find_prob, axis =1)
    return word_df['prob'].sum()

def find_prob(row):
    column_name = 'prob_' + str(row['position'])
    return row[column_name]

In [19]:
def suggest_word(input_word='',round_result='',word_df='',iteration=1,all_suggested=[]):
    print('Attempt Number: ',iteration)
    if (iteration ==1) & (len(word_df)==0):
        word_letter_df = create_word_list_new()
        alphabet_df = generate_alphabet_prob(word_letter_df)
        word_letter_df['word_prob'] = word_letter_df['words'].apply(lambda x: word_score(x, alphabet_df))
    
    elif (iteration ==1) & (len(word_df)!=0):
        word_letter_df = word_df.copy()

    else:
        word_list = list(input_word)
        result_list = list(round_result)

        black_letters_idx = [idx for idx,letter in enumerate(result_list) if letter =='b']
        black_letters = [word_list[idx] for idx in black_letters_idx]

        green_letters_idx = [idx for idx,letter in enumerate(result_list) if letter =='g']
        green_letters = [word_list[idx] for idx in green_letters_idx]

        yellow_letters_idx = [idx for idx,letter in enumerate(result_list) if letter =='y']
        yellow_letters = [word_list[idx] for idx in yellow_letters_idx]
        
        overlapping_letters_g = intersection(black_letters,green_letters)
        overlapping_letters_y = intersection(black_letters,yellow_letters)
        overlapping_letters_g = list(set(overlapping_letters_g) - set(overlapping_letters_y))
        
        remove_positions_g = dict()
        for i in overlapping_letters_g:
            indices = [j for j, x in enumerate(green_letters) if x == i]
            remove_positions_g[i] = list(set([0,1,2,3,4]) - set([elem for idx,elem in enumerate(green_letters_idx) if idx in indices]))
    
        
        word_letter_df = word_df.copy()
        
        print('Number of words before iteration: ', word_letter_df.shape[0])
        for i in black_letters_idx:
            if word_list[i] in remove_positions_g.keys():
                for j in remove_positions_g[word_list[i]]:
                    word_letter_df = word_letter_df[word_letter_df['pos_'+str(j)]!=word_list[i]]
            elif word_list[i] in overlapping_letters_y:
                word_letter_df = word_letter_df[word_letter_df['pos_'+str(i)]!=word_list[i]]
            else:
                word_letter_df = word_letter_df[word_letter_df['words'].apply(lambda x: x.find(word_list[i])<0)]
        
        for i in green_letters_idx:
            word_letter_df = word_letter_df[word_letter_df['pos_'+str(i)]==word_list[i]]
        
        for i in yellow_letters_idx:
            word_letter_df = word_letter_df[(word_letter_df['pos_'+str(i)]!=word_list[i]) & (word_letter_df['words'].apply(lambda x: x.find(word_list[i])>=0))]
        
        print('Number of words after filters: ', word_letter_df.shape[0])
        
        alphabet_df = generate_alphabet_prob(word_letter_df)
        word_letter_df['word_prob'] = word_letter_df['words'].apply(lambda x: word_score(x, alphabet_df))
    
    word_letter_df.sort_values(by=['word_prob'],ascending=False, inplace = True)
    word_letter_df.reset_index(drop=True, inplace=True)
    
    suggested_word = word_letter_df['words'].iloc[0]
    if suggested_word in all_suggested:
        suggested_word = word_letter_df['words'].iloc[1]
        print('Top 5 Choices')
        print(word_letter_df[['words','word_prob']].iloc[1:6])
    else:
        print('Top 5 Choices')
        print(word_letter_df[['words','word_prob']].iloc[:5])
    print('Suggested word: ',suggested_word)
    
    new_word = input('Press y to continue with suggested word else type other word: ')
    
    if new_word == 'y':
        pass
    else:
        suggested_word = new_word
    
    return suggested_word, word_letter_df
   

In [14]:
def start(suggested_word,word_df,iteration,all_suggested):
    if iteration == 1:
        suggested_word, word_df = suggest_word(word_df=word_df)
    else:
        round_result = input("Enter result: ")
        suggested_word, word_df = suggest_word(suggested_word,round_result,word_df,iteration,all_suggested)
    return suggested_word,word_df

def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [39]:
word_df_initial = create_word_list_new('wordle_words.csv')
alphabet_df_initial = generate_alphabet_prob(word_df_initial)
word_df_initial['word_prob'] = word_df_initial['words'].apply(lambda x: word_score(x, alphabet_df_initial))

Number of words:  12972


In [40]:
play = "y"
if play == "y":
    suggested_word = ''
    word_df = word_df_initial.copy()
    iteration = 1
    all_suggested=[]
while play=='y':
    suggested_word,word_df = start(suggested_word,word_df,iteration,all_suggested)
    all_suggested.append(suggested_word)
    if play == 'y':
        iteration +=1

Attempt Number:  1
Top 5 Choices
   words  word_prob
0  cares   0.822310
1  bares   0.821307
2  pares   0.817453
3  tares   0.814061
4  cores   0.809513
Suggested word:  cares
Press y to continue with suggested word else type other word: dears
Enter result: bbbbb
Attempt Number:  2
Number of words before iteration:  12972
Number of words after filters:  989
Top 5 Choices
   words  word_prob
0  ponty   0.959555
1  poncy   0.957533
2  conky   0.937310
3  colby   0.918099
4  pongy   0.917088
Suggested word:  ponty
Press y to continue with suggested word else type other word: point
Enter result: bybyb
Attempt Number:  3
Number of words before iteration:  989
Number of words after filters:  18
Top 5 Choices
   words  word_prob
0  blown   1.500000
1  flown   1.444444
2  clown   1.444444
3  known   1.277778
4  gluon   1.222222
Suggested word:  blown
Press y to continue with suggested word else type other word: clown
Enter result: bygby
Attempt Number:  4
Number of words before iteration:  18


KeyboardInterrupt: Interrupted by user