# I've thought frequently about how many unique real words could be made using only the letters of the Eye Chart. It sounds like a perfect task for computer science and Python. So here we go

### The Eye Chart letters as a set

In [108]:
snellen_set = set(['e', 'f', 'p', 't', 'o', 'z', 'l', 'p',
                     'e', 'd', 'p', 'e', 'c', 'f', 'd', 'e',
                     'd', 'f', 'c', 'z', 'p', 'f', 'e', 'l',
                     'o', 'p', 'z', 'd', 'd', 'e', 'f', 'p', 
                     'o', 't', 'e', 'c', 'l', 'e', 'f', 'o', 
                     'd', 'p', 'c', 't', 'f', 'd', 'p', 'l', 
                     't', 'c', 'e', 'o', 'p', 'e', 'z', 'o',
                     'l', 'c', 'f', 't', 'd'])
snellen_set = list(snellen_set)
len(snellen_set)

9

### As a list

In [109]:
snellen_list = ['e', 'f', 'p', 't', 'o', 'z', 'l', 'p',
                     'e', 'd', 'p', 'e', 'c', 'f', 'd', 'e',
                     'd', 'f', 'c', 'z', 'p', 'f', 'e', 'l',
                     'o', 'p', 'z', 'd', 'd', 'e', 'f', 'p', 
                     'o', 't', 'e', 'c', 'l', 'e', 'f', 'o', 
                     'd', 'p', 'c', 't', 'f', 'd', 'p', 'l', 
                     't', 'c', 'e', 'o', 'p', 'e', 'z', 'o',
                     'l', 'c', 'f', 't', 'd']
len(snellen_list)

61

In [110]:
for i in range(len(snellen_set)):
    snellen_set[i] = snellen_set[i].upper()

#### Making a class to get rid of curly brackets when printing out our set of Test Card letters

In [111]:
class sset(set):
    def __str__(self):
        return ', '.join([str(i) for i in self])

In [112]:
print(f'The letters that appear on the Snellen Test Card are: {sset(snellen_set)}')

The letters that appear on the Snellen Test Card are: L, P, O, C, E, F, T, Z, D


In [116]:
amount = len(sset(snellen_set))
chart_lenght = len(snellen_list)
print(f'There are only {amount} unique letters on the Eye Chart')
print(f'And there are {chart_lenght} total letters  on the Chart')

There are only 9 unique letters on the Eye Chart
And there are 61 total letters  on the Chart


## Let's see how many times each letter shows up on the card

#### Since the letters are printed in upper case block-style, let's convert them to upper case in our print out

In [120]:

for i in range(len(snellen_list)):
    snellen_list[i] = snellen_list[i].upper()

In [123]:
from collections import Counter

amount = Counter(snellen_list)

print("Here's how frequently each letter appears on the test card")
amount.most_common()

Here's how frequently each letter appears on the test card


[('E', 10),
 ('P', 9),
 ('F', 8),
 ('D', 8),
 ('O', 6),
 ('C', 6),
 ('T', 5),
 ('L', 5),
 ('Z', 4)]

## Bringing in the NLTK Corpus

In [16]:
import nltk
nltk.download('words')

[nltk_data] Downloading package words to
[nltk_data]     /Users/nathanwalter/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [125]:
from nltk.corpus import words
word_list = words.words()
length_nltk = (len(word_list))

print(f"There are {length_nltk:,} words for us to reference in nltk")

There are 236,736 words for us to reference in nltk


# Using Trie to create a list of eye chart words from NLTK 

In [19]:
trie = {}

for word in word_list:
    cur = trie
    for l in word:
        cur  = cur.setdefault(l, {})
        cur['word'] = True # defined if this node indicates a complete word
        
def findWords(word, trie = trie, cur = '', word_list = []):
    for i, letter in enumerate(word):
        if letter in trie:
            if 'word' in trie[letter]:
                word_list.append(cur + letter)
            findWords(word, trie[letter], cur+letter, word_list )    
            # first example: findWords(word[:i] + word[i+1:], trie[letter], cur+letter, word_list )

    return word_list

words_longer = findWords("epfdoctlz")

In [20]:
longer_list = []

for word in words_longer:
    if len(word) > 1:
        longer_list.append(word)
print(longer_list) 

['ee', 'eel', 'eele', 'eelp', 'eelpo', 'eelpot', 'eelf', 'eelc', 'eell', 'ep', 'epe', 'epee', 'epep', 'epepo', 'epepop', 'eped', 'epo', 'epop', 'epope', 'epopee', 'epopo', 'epopoe', 'epopt', 'epopte', 'epod', 'epode', 'epoo', 'epoop', 'epoc', 'epol', 'epoll', 'ef', 'eff', 'effe', 'effec', 'effect', 'effecte', 'effectf', 'effecto', 'effectl', 'effectle', 'effet', 'effete', 'effo', 'effod', 'effl', 'efflo', 'efo', 'efol', 'eft', 'efte', 'efl', 'ed', 'ede', 'edeo', 'edeod', 'edeot', 'edeoto', 'edeol', 'edeolo', 'edel', 'edd', 'edde', 'eddo', 'eo', 'eop', 'eol', 'eoz', 'eozo', 'eozoo', 'ec', 'ece', 'ecp', 'ecd', 'ecde', 'eco', 'ecop', 'ecot', 'ecoto', 'ecol', 'ecole', 'ecolo', 'ecc', 'ecce', 'ecco', 'eccop', 'eccl', 'eccle', 'ect', 'ecte', 'ectep', 'ectet', 'ecto', 'ectoe', 'ectoet', 'ectop', 'ectopt', 'ectopte', 'ectopl', 'ectod', 'ectode', 'ectoc', 'ectoco', 'ectocoe', 'ectocoel', 'ectot', 'ectoto', 'ectol', 'ectole', 'ectolec', 'ectolo', 'ectolop', 'ectoz', 'ectozo', 'ectozoo', 'ecl', '

In [133]:
word_list_length = len(longer_list)
print(f""" The trie approach to finding words formed from letters of the eye chart 
using nltk, returns{word_list_length: ,} words. But, if you look closely, you'll see that it
is comprised of mostly fragments of full English words. Let's try to improve upon this.""")

 The trie approach to finding words formed from letters of the eye chart 
using nltk, returns 1,722 words. But, if you look closely, you'll see that it
is comprised of mostly fragments of full English words. Let's try to improve upon this.


# Let's use the trie with another word list and call it words3

In [22]:
trie = {}

with open('/Users/nathanwalter/jobsearch/Snellen_Letters_and_Words/web2.txt') as words3:


    for word in words3:
        cur = trie
        for l in word:
            cur  = cur.setdefault(l, {})
            cur['word'] = True # defined if this node indicates a complete word
        
def findWords(word, trie = trie, cur = '', words3 = []):
    for i, letter in enumerate(word):
        if letter in trie:
            if 'word' in trie[letter]:
                words3.append(cur)
            findWords(word, trie[letter], cur+letter, words3 )    
            # first example: findWords(word[:i] + word[i+1:], trie[letter], cur+letter, word_list )

    return [word for word in words3 if word in words3]

words3 = findWords("epfdoctlz")

In [23]:
third_list = []

for word in words3:
    if len(word) > 1:
        third_list.append(word)
third_list = set(third_list)   
third_list = list(third_list)
print(sorted(third_list))

['ce', 'cec', 'ced', 'cel', 'cele', 'cell', 'celle', 'cellep', 'cello', 'celo', 'celot', 'cep', 'cept', 'cet', 'ceto', 'cetol', 'cetot', 'cetoto', 'cl', 'cle', 'cled', 'clef', 'cleft', 'clefte', 'clep', 'clept', 'clet', 'clo', 'clod', 'clodd', 'clodl', 'clodle', 'clodp', 'clodpo', 'clodpol', 'clof', 'cloo', 'clot', 'clott', 'clotte', 'co', 'coc', 'cocc', 'cocco', 'coco', 'cocot', 'cocott', 'cocoz', 'cocoze', 'cocozel', 'cocozell', 'coct', 'cocto', 'cod', 'codd', 'coddl', 'code', 'codec', 'codef', 'codel', 'codep', 'codo', 'coe', 'coef', 'coeff', 'coeffe', 'coeffec', 'coel', 'coeld', 'coele', 'coelec', 'coelect', 'coelo', 'coelod', 'coelop', 'coeloz', 'coet', 'cof', 'cofe', 'cofeo', 'cofeof', 'cofeoff', 'cofeoffe', 'coff', 'coffe', 'coffee', 'coffeel', 'coffeep', 'coffeepo', 'coffl', 'col', 'colc', 'colco', 'cold', 'cole', 'colec', 'colect', 'coleo', 'coleop', 'coleopt', 'colep', 'coll', 'colle', 'collec', 'collect', 'collecte', 'collected', 'collet', 'collo', 'collop', 'collope', 'colo

In [134]:
words3_length = len(third_list)
print(f""" Words3 which uses a different list of words than NLTK,
returns {words3_length} words from the eye chart. 
However, upon closer inspection, most of these are the repeating beginnings of words. 
This is not the result we are looking for.""")

 Words3 which uses a different list of words than NLTK,
returns 931 words from the eye chart. 
However, upon closer inspection, most of these are the repeating beginnings of words. 
This is not the result we are looking for.


# Shifting away from trie for now

## Switching to Enchant

In [136]:
# uncomment below if you don't have enchant installed already
#!pip install pyenchant

In [137]:
from itertools import permutations
import enchant

In [82]:
# https://www.youtube.com/watch?v=nLWi-2b_OjE

d = enchant.Dict("en_US")
op = set()

inp = "epfdoctlz"
letter = [x.lower() for x in inp]

def enchanted_snellen(inp):
    for n in range(len(inp)):
        for y in list(permutations(letter, n)):
            z = ''.join(y)
            if len(z) > 2:
                if d.check(z):
                    op.add(z)
    return list(op)
enchant_snellen = enchanted_snellen(inp)

In [84]:
print(sorted(enchant_snellen))

['clef', 'cleft', 'clod', 'clop', 'clot', 'cod', 'code', 'coed', 'col', 'cold', 'colt', 'cop', 'cope', 'coped', 'cot', 'cote', 'cpd', 'cpl', 'def', 'deft', 'delft', 'depot', 'dept', 'doc', 'doe', 'dole', 'dolt', 'dope', 'dot', 'dote', 'doz', 'doze', 'dpt', 'ecol', 'elf', 'etc', 'fed', 'felt', 'fez', 'fled', 'floe', 'flop', 'flt', 'foe', 'fol', 'fold', 'fop', 'ftp', 'led', 'left', 'let', 'lode', 'loft', 'lofted', 'lop', 'lope', 'loped', 'lot', 'ltd', 'ode', 'oft', 'old', 'ole', 'ope', 'oped', 'opt', 'opted', 'pct', 'pelf', 'pelt', 'pet', 'plod', 'plot', 'pod', 'poet', 'pol', 'pole', 'poled', 'pot', 'ted', 'tel', 'toe', 'toed', 'told', 'tole', 'top', 'zed']


In [140]:
enchant_length = len(enchant_snellen)
print(f""" Enchant prints out a beautiful list of full English words. 
It is only {enchant_length} words long, though. I think it can be
improve upon.""")

 Enchant prints out a beautiful list of full English words. 
It is only 87 words long, though. I think it can be
improve upon.


# For loop with Break using NLTK word list

In [80]:
word_list = word_list
pattern = 'epfdoctlz'

def snellen_words(pattern):
    ans = []
    for word in word_list:
        find = True
        for letter in word:
            if letter not in pattern:
                find = False
                break
        if find == True and len(word) > 2:
            ans.append(word)
    return ans


chart_words = snellen_words(pattern)
chart_words

['cede',
 'cee',
 'cell',
 'celled',
 'cello',
 'celt',
 'cep',
 'cepe',
 'cled',
 'clee',
 'clef',
 'cleft',
 'clefted',
 'clep',
 'clod',
 'clodlet',
 'clodpoll',
 'cloff',
 'cloof',
 'cloop',
 'cloot',
 'clop',
 'clot',
 'clote',
 'cocco',
 'coco',
 'cocotte',
 'cocozelle',
 'cod',
 'coddle',
 'code',
 'codo',
 'codol',
 'coe',
 'coed',
 'coeffect',
 'coelect',
 'cofeoffee',
 'coff',
 'coffee',
 'coffeepot',
 'coffle',
 'coft',
 'col',
 'cold',
 'cole',
 'coll',
 'collect',
 'collected',
 'collet',
 'collop',
 'colloped',
 'colp',
 'colpeo',
 'colpocele',
 'colt',
 'coo',
 'coodle',
 'cooee',
 'coof',
 'cool',
 'coop',
 'coot',
 'cootfoot',
 'cop',
 'cope',
 'copepod',
 'copped',
 'coppet',
 'copple',
 'coppled',
 'cot',
 'cote',
 'coto',
 'cotte',
 'cotted',
 'coz',
 'coze',
 'decoct',
 'decode',
 'decollete',
 'dedo',
 'dee',
 'deed',
 'deedeed',
 'deep',
 'defect',
 'deflect',
 'deflected',
 'deft',
 'dele',
 'delete',
 'delf',
 'delft',
 'dell',
 'depeople',
 'deplete',
 'depot'

In [145]:
total = len(chart_words)
print(f""" When filtering for words containing 3 or more letters,
we are presented with {total} English words the Eye Chart letters. 
Although, not as large a number as some of the above examples,
the break method ensures that we don't return anything unless it is
a complete word in the nltk word list.""")

 When filtering for words containing 3 or more letters,
we are presented with 388 English words the Eye Chart letters. 
Although, not as large a number as some of the above examples,
the break method ensures that we don't return anything unless it is
a complete word in the nltk word list.
