### Wordle Helper

Let's create a Wordle Helper

by Karthik Naga

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

In [2]:
df = pd.read_csv('C:\\Users\\knaga\\Documents\\Karthik_Files\\Other\\Code\\Python\\Wordle\\words.csv')
df.head()

Unnamed: 0,Word
0,aahed
1,aalii
2,aargh
3,aarti
4,abaca


In [3]:
#break the letters up into columns for frequency metrics
df["letter1"] = df.Word.str[0]
df["letter2"] = df.Word.str[1]
df["letter3"] = df.Word.str[2]
df["letter4"] = df.Word.str[3]
df["letter5"] = df.Word.str[4]

df.describe()

Unnamed: 0,Word,letter1,letter2,letter3,letter4,letter5
count,12971,12971,12971,12971,12971,12971
unique,12971,26,26,26,26,26
top,aahed,s,a,a,e,s
freq,1,1565,2262,1236,2327,3958


In [4]:
df.tail()

Unnamed: 0,Word,letter1,letter2,letter3,letter4,letter5
12966,zuzim,z,u,z,i,m
12967,zygal,z,y,g,a,l
12968,zygon,z,y,g,o,n
12969,zymes,z,y,m,e,s
12970,zymic,z,y,m,i,c


In [5]:
# create a new dataframe that has a column of all of the letters so that we can count the freq of each letter
wordle_melted = pd.melt(df, id_vars=['Word'], value_vars=['letter1','letter2','letter3','letter4','letter5'], value_name='Letters')

In [6]:
wordle_melted.tail()

Unnamed: 0,Word,variable,Letters
64850,zuzim,letter5,m
64851,zygal,letter5,l
64852,zygon,letter5,n
64853,zymes,letter5,s
64854,zymic,letter5,c


In [7]:
#create a new dataframe that has the freq (or points) of each of the letters
letter_freq = \
wordle_melted.groupby('Letters') \
             .count() \
             .reset_index() \
             .sort_values(['Word'], ascending=False) \
             .drop(columns=['variable']) \
             .rename(columns={"Word": "Points"}) \
             .set_index('Letters')

In [8]:
assert letter_freq.loc['x','Points'] == 288

In [9]:
# these are helper functions that accept a pandas series

#   and return each of the string's points 
@np.vectorize
def get_points(str):
    points = 0
    for letter in str:
        points += letter_freq.loc[letter,'Points']        
    return points

#   and whether it has unique or repeating strings
@np.vectorize
def unique_letters(str):
    freq = Counter(str)
    return len(freq) == 5


In [10]:
assert get_points('aahed') == 22852

In [11]:
# create new columns in the original data frame based on the calculations, and set Alive to True for each word

df["Points"] = df["Word"].apply(get_points)
df["UniqueLetters"] = df["Word"].apply(unique_letters)
df["Alive"] = True

# drop the columns that contains each letter as that is no longer needed
df.drop(columns=["letter1","letter2","letter3","letter4","letter5"], inplace=True)

In [12]:
df.tail()

Unnamed: 0,Word,Points,UniqueLetters,Alive
12966,zuzim,9114,False,True
12967,zygal,13511,True,True
12968,zygon,11542,True,True
12969,zymes,17809,True,True
12970,zymic,10271,True,True


In [13]:
# sets the current game back to the default (meaning all words are still 'alive')
def reset_current_game():
    return df.copy()

# this function returns the words with the 15 highest points...
#   unique is an attribute that sets whether or not we are looking for words with all unique letters
def show_best_options(df, unique=False):
    if unique:
        print(df[df.Alive & df.UniqueLetters].nlargest(15, 'Points'))
    else:
        print(df[df.Alive].nlargest(15, 'Points'))


In [14]:
# these functions set Alive to false (remove them from possible words) depending on the results of the game

@np.vectorize
def letter_remove(letter):
    # Set Alive to False for all words that contain letter
    current_options.loc[current_options['Word'].str.contains(letter), 'Alive'] = False

@np.vectorize
def letter_confirmed(letter, pos):
    # Set Alive to False for all words that do not contain letter in pos
    rgexp = '.....'
    rgexp = rgexp[:pos-1] + letter + rgexp[pos:]
    current_options.loc[~current_options['Word'].str.contains(pat=rgexp, regex = True), 'Alive'] = False

@np.vectorize
def letter_exists(letter):
    # Set Alive to False for all words that do not contain letter
    current_options.loc[~current_options['Word'].str.contains(letter), 'Alive'] = False


In [16]:
#reset the game board
current_options = reset_current_game()

show_best_options(current_options, unique=True)

        Word  Points  UniqueLetters  Alive
140    aeros   27910           True   True
549    arose   27910           True   True
10363  soare   27910           True   True
141    aesir   27231           True   True
533    arise   27231           True   True
8858   raise   27231           True   True
8966   reais   27231           True   True
9799   serai   27231           True   True
317    aloes   27122           True   True
10740  stoae   27047           True   True
11447  toeas   27047           True   True
537    arles   26842           True   True
3261   earls   26842           True   True
6042   laers   26842           True   True
6095   lares   26842           True   True


In [25]:
# LET'S PLAY!
# Use the three functions to help the program remove the right words from the possible answer list

#letter_exists('l')
#letter_exists('a')
#letter_remove('i')
letter_remove('d')
letter_remove('p')
letter_remove('g')
letter_remove('d')
letter_confirmed('e', 5)
letter_confirmed('r', 2)
letter_confirmed('a', 3)


array(None, dtype=object)

In [26]:
show_best_options(current_options, False)

        Word  Points  UniqueLetters  Alive
490    arame   24773          False   True
11969  urare   23477          False   True
1415   brame   20411           True   True
1412   brake   19940           True   True
4023   frame   19898           True   True
1426   brave   19129           True   True
1435   braze   18869           True   True
