## Retrieve Wordle Words

Simple web scraping code to retrieve the list of possible words curated for Wordle by NYTimes.

In [37]:
import requests

### Scraping from GitHub

Source: https://gist.github.com/cfreshman

In [38]:
github_url = "https://gist.githubusercontent.com/cfreshman/d97dbe7004522f7bc52ed2a6e22e2c04/raw/633058e11743065ad2822e1d2e6505682a01a9e6/wordle-nyt-words-14855.txt"

#### Loading Words

In [39]:
response = requests.get(github_url)

if response.status_code == 200:
    github_words = response.text
    github_word_list = github_words.splitlines()
    print(f"Fetched {len(github_word_list)} words.")
    print(github_word_list[:10])
else:
    print(f"Failed to fetch the file: {response.status_code}")

Fetched 14855 words.
['aahed', 'aalii', 'aapas', 'aargh', 'aarti', 'abaca', 'abaci', 'aback', 'abacs', 'abaft']


#### Saving Words

In [40]:
if response.status_code == 200:
    with open("github_words.txt", "w") as f:
        f.write(github_words)

### Scraping from Merriam-Webster

Source: https://www.merriam-webster.com/wordfinder

In [41]:
mw_base = "https://www.merriam-webster.com/wordfinder/classic/begins/all/5"
mw_letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

#### Checking Index of 5 Letter Words

In [42]:
mw_letter_index = 0
mw_page = 1

mw_url = mw_base + "/" + mw_letters[mw_letter_index] + "/" + str(mw_page)
response = requests.get(mw_url)

if response.status_code == 200:
    mw_website = response.text
    with open("mw_test.txt", "w") as f:
        f.write(mw_website)
else:
    print(f"Failed to fetch the file: {response.status_code}")

#### Loading Words

In [43]:
mw_letter_index = 0
mw_page = 1

mw_word_list = []

while True:
    mw_url = mw_base + "/" + mw_letters[mw_letter_index] + "/" + str(mw_page)
    has_words = False
    response = requests.get(mw_url)

    if response.status_code == 200:
        mw_website = response.text
        for i in range(len(mw_website)):
            if mw_website[i:i+12] == "/dictionary/":
                mw_word_list.append(mw_website[i+12:i+17])
                has_words = True
    else:
        print(f"Failed to fetch the file: {response.status_code}")
        break

    if has_words:
        mw_page += 1
    else:
        mw_letter_index += 1
        mw_page = 1

    if mw_letter_index == len(mw_letters):
        break

if response.status_code == 200:
    print(f"Fetched {len(mw_word_list)} words.")
    print(mw_word_list[:10])

Fetched 14132 words.
['aahed', 'aalii', 'aargh', 'abaca', 'abaci', 'aback', 'abaft', 'abase', 'abash', 'abasi']


#### Saving Words

In [44]:
mw_words = "\n".join(mw_word_list)

with open("mw_words.txt", "w") as f:
    f.write(mw_words)