# Wordle Notebook

This notebook is primarily intended as a coding demonstration of Python and Pandas for the filtering and processing of a set of string data.  On a secondary basis, it also serves to simply the daily filtering and sorting of wordle-style puzzles, such as those found at https://www.nytimes.com/games/wordle/index.html

For the Python and Pandas coder, the example code includes:
1. Pulling text data out of a URL into a dataframe
2. Dynamic .assign statements using dictionaries
3. Method chaining filtering with Pandas
4. Logging statements within a method chain
5. Dynamically creating a variety of regex statements using list comprehensions, lambdas, and reduce

In [None]:
import pandas as pd
import re
import logging
from functools import reduce

In [None]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

In [None]:
df = pd.read_csv(
    'https://raw.githubusercontent.com/tabatkins/wordle-list/main/words',
    header=None, 
    names=['words']
)

In [None]:
df = df.assign(**{f'l{i+1}' : eval(f'lambda x: x.words.str[{i}]') for i in range(0,5)})

In [None]:
def pipe_logger(val, label):
    logging.info(f'{label} : {val}')
    return val

## Find today's word

In [None]:
# tries is a list of tuples, each containing 5 letters
# the first tuple is the submitted word
# the second tuple contains matches
#  - Lower case = Yellow match
#  - Upper case = Green match
tries = [
    #'-----', '-----'
    ('takes', '     '),
    # ('chino', '   n '),
]

In [None]:
# Generate remaining candidates out of the tries list of tuples
candidates = (
    df
    [
        # match words not containing letters that failed matches
        ~df.words.str.contains(
            pipe_logger(
                ''.join(
                    [r'['] +
                    [
                        re.sub(
                            '\.',
                            '',
                            reduce(
                                # iterate over the mask, replacing each space with the letter in word a
                                lambda a, b: ''.join([re.sub(' ', a[i], b[i]) for i in range(0,5)]),
                                [
                                    t[0],
                                    # create a mask for removing characters
                                    re.sub('[A-Za-z]','.',t[1])
                                ]
                            )
                        )
                        # iterate over tuples
                        for t in tries
                    ] +
                    [']']
                ),
                'Unmatched Regex',
            ),           
            regex=True,
        )
        
        # match words containing successful letter placement
        & df.words.str.contains(
            pipe_logger(
                # create a regular expression to find exact matches
                re.sub(
                    ' ',
                    '.',
                    # reduce the list of successful letter finds to a single word
                    reduce(
                        # iterate over the letter, replacing spaces in word a with the letter in word b
                        lambda a, b: ''.join([re.sub(' ', b[i], a[i]) for i in range(0,5)]),
                        # select the Capital letters from the successful tries
                        [re.sub('[a-z]',' ',t[1]) for t in tries]
                    )
                ),
                'Successful Placement Regex',
            ),
            case=False,
            regex=True
        )

        # match words that must contain characters but placement is unknown
        & df.words.str.contains(
            pipe_logger(
                ''.join(
                    ['^'] +
                    [f'(?=.*{i}.*)' for i in set(sorted(''.join([re.sub('[A-Z ]','',t[1]) for t in tries])))] +
                    ['.*$']
                ),
                'Unknown Placement',
            ),
            regex=True,
        )
        
        # match words that do not have incorrect placment of characters
        & df.words.str.contains(
            pipe_logger(
                ''.join([
                    # replace empty characters sets with '.'
                    re.sub(
                        r'\[\^\]',
                        r'.',
                        # drop spaces and build simple regex character set for 'not'
                        '[^' + re.sub(' ','',t) + ']'
                    )
                    for t in 
                    # split list by every word attempt
                    re.findall(
                        '.' * len(tries),
                        # merge into a single string of characters
                        ''.join(
                            # take the nth character from each incorrect placement result
                            [re.sub('[A-Z]',' ',t[1])[i] for i in range(0,5) for t in tries]
                        )
                    )
                ]),
                'Incorrect Placement',
            ),
            regex=True,
        )
    ]
)
logging.info(f'Possible Candidates : {candidates.shape[0]}')
display(candidates)

In [None]:
# Calculate letter frequencies in remaining candidate words
freq = (
    pd.concat(
        [
            candidates.l1.value_counts(),
            candidates.l2.value_counts(),
            candidates.l3.value_counts(),
            candidates.l4.value_counts(),
            candidates.l5.value_counts(),
        ],
        axis = 1
    )
    .fillna(0)
    .astype('int')
)
freq['total'] = freq.sum(axis=1)
display(freq.sort_values('total', ascending=False))