<a href="https://colab.research.google.com/github/psb-david-petty/google-colaboratory/blob/master/spellingbee.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# `spellingbee.py`

In [7]:
#!/usr/bin.env python3
#
# spellingbee.py
#
"""
Solution to the NYTimes Spelling Bee puzzle.
https://www.nytimes.com/puzzles/spelling-bee
"""

# https://stackoverflow.com/a/5711095
import io, gzip, tarfile, zipfile
from urllib.request import urlopen

# https://docs.python-requests.org/en/master/
# or: requests.get(url).content

# https://docs.python.org/3/library/zipfile.html
# zipfile = ZipFile(io.BytesIO(resp.read()))
# names = zipfile.namelist()
# for name in names:
#     for line in zipfile.open(name).readlines():
#         print(line.decode('utf-8'))

# https://stackoverflow.com/a/49174340
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

################################ defs ################################

import itertools, os.path, string
from urllib.parse import urlparse

def hasonly(word, letters):
    """Return True if elements of word are only in letters, otherwise False."""
    letterset = set(letters)
    return letterset.union(set(word)) == letterset

def musthave(word, letters):
    """Return True if elements of letters are all in word, otherwise False."""
    letterset = set(letters)
    return letterset.intersection(set(word)) == letterset

is_valid = lambda w: w and hasonly(w, string.ascii_letters + '-') \
    and (w == w.lower() or w == w.upper())

def txtwordset(uri, key, v=False, n=100):
    """"""
    with urlopen(uri) as resp:
        wordset = {w.lower() for w in
            [line.decode('utf-8').strip() for line in resp.readlines()]
                if is_valid(w)}
        if v: print(f"{key}({len(wordset)}): {sorted(list(wordset))[: n]}")
        return wordset

def zipwordsets(uri, names, wordssets, v=False, n=100):
    """"""
    with urlopen(uri) as resp:
        with tarfile.open(fileobj=io.BytesIO(resp.read()), mode='r:gz') as tar:
            zipname = os.path.basename(urlparse(uri).path)
            if v: print(f"{zipname}: {tar.getnames()}")
            for name in names:
                key = os.path.basename(name)
                wordset = {w.lower() for w in
                    [line.decode('utf-8').strip() for line in
                        tar.extractfile(name).readlines()]
                            if is_valid(w)}
                if v: print(f"{key}({len(wordset)}): {sorted(list(wordset))[: n]}")
                wordssets[key] = wordset

def wordssites(verbose=False):
    """Return list of sets of words from:
    URI: https://raw.githubusercontent.com/dolph/dictionary/master/enable1.txt
    URI: https://raw.githubusercontent.com/dolph/dictionary/master/ospd.txt
    URI: https://raw.githubusercontent.com/dolph/dictionary/master/popular.txt
    URI: https://raw.githubusercontent.com/dolph/dictionary/master/unix-words
    URI: https://www.wordgamedictionary.com/english-word-list/download/english.txt
    URI: https://www.wordgamedictionary.com/sowpods/download/sowpods.txt
    URI: https://www.wordgamedictionary.com/twl06/download/twl06.txt
    URI: https://raw.githubusercontent.com/elasticdog/yawl/master/yawl-0.3.2.03.tar.gz yawl-0.3.2.03/sigword.list
    URI: https://raw.githubusercontent.com/elasticdog/yawl/master/yawl-0.3.2.03.tar.gz yawl-0.3.2.03/word.list
    URI: https://sdsawtelle.github.io/blog/output/scrabble-cheatsheet-with-python.html # cannot directly extract OWL3_Dictionary.7z
    """
    wordssets = dict()

    if verbose: print('#' * 10, 'elasticdog')
    for uri in [
        'https://raw.githubusercontent.com/dolph/dictionary/master/enable1.txt',
        'https://raw.githubusercontent.com/dolph/dictionary/master/ospd.txt',
        'https://raw.githubusercontent.com/dolph/dictionary/master/popular.txt',
        'https://raw.githubusercontent.com/dolph/dictionary/master/unix-words',
    ]:
        key = os.path.basename(urlparse(uri).path)
        wordssets[key] = txtwordset(uri, key, verbose)

    if verbose: print('#' * 10, 'wordgamedictionary')
    for uri in [
        'https://www.wordgamedictionary.com/english-word-list/download/english.txt',
        'https://www.wordgamedictionary.com/sowpods/download/sowpods.txt',
        'https://www.wordgamedictionary.com/twl06/download/twl06.txt',
    ]:
        key = os.path.basename(urlparse(uri).path)
        wordssets[key] = txtwordset(uri, key, verbose)

    if verbose: print('#' * 10, 'yawl')
    uri = 'https://raw.githubusercontent.com/elasticdog/yawl/master/yawl-0.3.2.03.tar.gz'
    keys = ['yawl-0.3.2.03/word.list', 'yawl-0.3.2.03/sigword.list', ]
    zipwordsets(uri, keys, wordssets, verbose)

    return wordssets

def wordsfiles(wordsdir=os.path.dirname(os.path.abspath('')),
      wordsfiles=[
          'enable1.txt', 'ospd.txt', 'popular.txt', 'unix-words',
          'english.txt', 'sowpods.txt', 'twl06.txt',
          'sigword.list', 'word.list',
          'OWL3_Dictionary.txt',
      ], verbose=False):
    """"""
    # Read word-list files from local directory into dictionary of word-sets.
    wordssets = dict()
    for wordsname in wordsfiles:
        if verbose:
            print(f"{wordsname} ", end='')
        with open(os.path.join(wordsdir, wordsname), 'r') as wordsfile:
            wordssets[wordsname] = {w.lower() for w in wordsfile.read().split('\n')
                if is_valid(w)}
    if verbose: print()
    return wordssets

def spellingbee(must, only, length, verbose=False):
    """Return list of spelling-bee words."""
    # Word-list files linked from:
    # https://github.com/dolph/dictionary
    # https://www.wordgamedictionary.com/sowpods/download/sowpods.txt
    # https://github.com/elasticdog/yawl
    # https://sdsawtelle.github.io/blog/output/scrabble-cheatsheet-with-python.html

    #wordsdict = wordsfiles(verbose=verbose)
    wordsdict = wordssites(verbose=verbose)

    if verbose:
        print('\n'.join([f"Intersection from files:"
            f"{x[0]}: {len(wordsdict[x[0]])}; "
            f"{x[1]}: {len(wordsdict[x[1]])}; "
            f"\u2229 {len(wordsdict[x[0]].intersection(wordsdict[x[1]]))}"
                for x in itertools.combinations(wordsdict, 2)]))
        print(f"{len(set.union(*wordsdict.values()))} unique words.")

    # words is the union of all words-sets.
    words = set.union(*wordsdict.values())
    words = wordsdict['word.list']

    # Look for words that must mave must and have only only.
    m, o = must.lower( ), only.lower( )
    return [w for w in sorted(words)
        if len(w) >= length and musthave(w, m) and hasonly(w, o)]

################################ test ################################

import optparse, os, sys

# TODO: fix spacing

class SpellingbeeOptionParser( optparse.OptionParser ):
    def __init__( self, **kwargs ):
        optparse.OptionParser.__init__( self, **kwargs )
        self.remove_option( "-h" )
        self.add_option( "-?", "--help", action="help",
            help="show this help message and exit" )
    def error( self, msg ):
        name = self.get_prog_name( )
        sys.stderr.write( "{name}: error: {msg}\n\n".format( **locals( ) ) )
        self.print_help( )
        sys.exit( 2 )

def test( argv ):
    # Parse command-line options.
    usage = "usage: %prog {LETTERS | -i} [-l L] [-? -v]"
    description = "Find spelling-bee words using LETTERS and including LETTERS[0]."
    parser = SpellingbeeOptionParser( usage=usage, description=description, version="1.0" )
    parser.add_option( "-i", "--input",
        action="store_true", dest="i", default=False,
        help="input LETTERS from keyboard? [%default]" )
    parser.add_option( "-l", "--length",
        action="store", type='int', dest="l", default=5,
        help="words of length >= L [%default]" )
    parser.add_option( "-v", "--verbose",
        action="store_true", dest="verbose", default=False,
        help="echo status information while processing [%default]" )
    opts, args = parser.parse_args( args=argv[ 1: ] )
    # Process command-line options.
    len_args = 0 if opts.i else 1
    if len( args ) != len_args:
        error = f"too {'few' if len(args) < len_args else 'many'} arguments"
        parser.error( error )
    letters, = args if not opts.i else (input('SpellingBee letters: '),)
    solution = spellingbee( letters[ 0 ], letters, opts.l, opts.verbose )
    # Score and print solutions.
    threePointers = [ w for w in solution if set( w ) == set( letters ) ]   # pangram
    score = len( solution ) + 2 * len( threePointers )
    print(f"Words: {solution}\nPangrams: {threePointers}")
    print(f"{len(solution)} words score {score}")

if __name__ == '__main__':
    is_idle, is_pycharm, is_jupyter = (
        'idlelib' in sys.modules,
        int(os.getenv('PYCHARM', 0)),
        '__file__' not in globals()
        )
    if any((is_idle, is_pycharm, is_jupyter, )):
        # Tests for hasonly and musthave
        print(hasonly('victor', 'vteimpr'), end = ' ')  # False
        print(hasonly('viper', 'vteimpr'), end = ' ')   # True
        print(musthave('viper', 'vteimpr'), end = ' ')  # False
        print(musthave('primitive', 'vteimpr'))         # True
        letters = 'vteimpr'
        letters = 'mailpry'
        letters = 'uatonmi' # 2016/01/31
        letters = 'maiortu' # 2016/05/15
        letters = 'ncehikt' # 2016/07/03
        letters = 'oglntuy' # 2019/03/02
        letters = 'cehilnp' # 2019/11/10
        letters = 'lcnauif' # 2021/01/11
        letters = 'pemntil' # 2021/11/08
        letters = 'yrmaloj' # 2021/11/12
        # test([sys.argv[0], letters, '-v', ])
        # Collab Jupyter Notebook
        test([sys.argv[0], '-i', '-v', ])
    else:
        test(sys.argv)


False True False True
SpellingBee letters: asdwert
Words: ['adawed', 'adaws', 'added', 'adder', 'adders', 'address', 'addressed', 'addressee', 'addressees', 'addresser', 'addressers', 'addresses', 'addrest', 'adrad', 'adread', 'adreaded', 'adreads', 'adred', 'adward', 'adwarded', 'adwards', 'aedes', 'aerate', 'aerated', 'aerates', 'arars', 'aread', 'areads', 'areae', 'arear', 'areas', 'aredd', 'arede', 'aredes', 'arere', 'arete', 'aretes', 'arets', 'arett', 'aretted', 'aretts', 'arras', 'arrased', 'arrases', 'arrear', 'arrears', 'arreede', 'arreedes', 'arrest', 'arrested', 'arrestee', 'arrestees', 'arrester', 'arresters', 'arrests', 'arret', 'arrets', 'arsed', 'arses', 'artwear', 'assart', 'assarted', 'assarts', 'assed', 'assert', 'asserted', 'asserter', 'asserters', 'asserts', 'asses', 'assess', 'assessed', 'assesses', 'asset', 'assets', 'astare', 'astart', 'astarted', 'astarts', 'aster', 'asters', 'astert', 'asterted', 'asterts', 'atars', 'attar', 'attars', 'attest', 'attested', 'att