In [2]:
from word_finder import WordFinder
d = WordFinder()

In [3]:
# Add 1 letter to make a new word
d.add_1_letter("race")

['races',
 'grace',
 'raced',
 'racer',
 'brace',
 'trace',
 'rance',
 'rache',
 'arace']

In [4]:
# Remove 1 letter to make a new word
d.remove_1_letter("trace")

['race', 'trac', 'tace']

In [5]:
# Replace 1 letter to make a new word
d.replace_1_letter("play")

['ploy',
 'blay',
 'slay',
 'plak',
 'clay',
 'plan',
 'plap',
 'play',
 'alay',
 'plat',
 'pray',
 'flay']

In [6]:
# Return all possible replaced words
d.replace_letters("computer", {2: "nl", 3: "qrs"})

['conruter', 'consuter', 'conquter', 'colruter', 'colsuter', 'colquter']

In [7]:
# Return the product of the word frequency 
# If a word is not found in the word frequency dictionary, use the lowest frequency
d.compute_freq(["hello", "world"]), d.compute_freq(["aaabbbccc"])

(np.float64(0.014236717413988867), np.float64(1.2711e-05))

In [8]:
# Return the top k findings of word frequency product
# NOTE: This naive approach ignores linguistic correlation between words
# E.g.: In the 2nd example, fake identity should be the correct answer, but the product of fake and identity is not the highest
#       For a better approach, please consider using GPT or a language model that studies word collocations
print(d.find_top_k([("annual incest"), ("annual income")], 1))
print(d.find_top_k([("fake identity"), ("make identity"), ("fake identify"), ("lake identify")], 2))

['annual income']
['make identity', 'lake identify']


In [9]:
# However in the case where the solution is made of 2 nouns, there is a chance that it exists in the 10k noun-noun dictionary
grid = d.replace_letters("co**t** **s**", {2: "nu", 3: "ailn", 5: "ehr", 6: "aeiouy", 8: "mn", 9: "iou", 11: "aei", 12: "cl"})
for word in grid:
    if word in d.noun_noun_list:
        print(word)

country music


In [10]:
# Solve crossword
d.crossword("**itar**t")

['unitarist', 'guitarist']

In [11]:
# Whenever get_top_k is used, it relies on a naive approach that computes word frequency. See find_top_k above for its limitations
d.crossword("**tho*", get_top_k=10)

['author',
 'method',
 'python',
 'pathol',
 'fathom',
 'mythos',
 'pathos',
 'hathor',
 'lithos',
 'pothos']

In [12]:
# Debug examples
print(d.crossword("x***z***", debug=True))
print(d.crossword("**********j", debug=True))

No match found for the given letter_info
[]
No match found for word of length 11 with letter j in position 10 (0-based)
[]


In [13]:
# Solve crossword made of 2 words
d.crossword2("g*o*d*y", get_top_k=10)

['good day',
 'g monday',
 'grow day',
 'good dry',
 'glow day',
 'grow dry',
 'go oddly',
 'geoid by',
 'g loudly',
 'grond by']

In [14]:
# Solve crossword made of any number of words
# NOTE: This can be very slow due to the large search space
d.crossword_plus(cipher="s*e**u*at*r", nbr_words=3, get_top_k=10)

['see you water',
 'she you water',
 'see you later',
 'she you later',
 'see you qatar',
 'see a curator',
 'sue you water',
 'she you qatar',
 's be educator',
 'she a curator']

In [15]:
# Solve crossword with known space
d.crossword_known_space("g*o* d*y", get_top_k=10)

['good day',
 'grow day',
 'good dry',
 'glow day',
 'grow dry',
 'geog day',
 'geol day',
 'gros day',
 'goog day',
 'good dey']

In [16]:
# Suppose we have the following information. We can combine replace_letters and crossword to search
# 1st digit: k or l or m
# 2nd digit: unknown
# 3rd digit: a or e or i or o or u or y
# 4th digit: unknown
# 5th digit: o
# 6th digit: unknown
# 7th digit: r
# 8th digit: unknown
grid = d.replace_letters("****o*r*", {0: "klm", 2: "aeiouy"})
d.crossword(grid)

['leeboard', 'keyboard']

In [17]:
# It works for other crossword methods too
grid = d.replace_letters("*n*ual**c***", {6: "il", 7: "an", 11: "et"})
d.crossword2(grid)

['annual incube',
 'annual incide',
 'annual incase',
 'annual incise',
 'annual incave',
 'annual incage',
 'annual incute',
 'annual incuse',
 'annual incede',
 'annual incite',
 'annual income',
 'ungual incube',
 'ungual incide',
 'ungual incase',
 'ungual incise',
 'ungual incave',
 'ungual incage',
 'ungual incute',
 'ungual incuse',
 'ungual incede',
 'ungual incite',
 'ungual income',
 'annual incult',
 'annual incept',
 'annual incest',
 'annual incast',
 'annual incant',
 'ungual incult',
 'ungual incept',
 'ungual incest',
 'ungual incast',
 'ungual incant',
 'annual lacune',
 'ungual lacune',
 'annual lacert',
 'ungual lacert']

In [18]:
grid = d.replace_letters("*e*r* ****ay", replacement={0: "lm", 6: "st", 7:"iou"})
d.crossword_known_space(grid)

['metre towkay',
 'metra towkay',
 'merry towkay',
 'metro towkay',
 'metre subway',
 'metre sunday',
 'metre surnay',
 'metre sunway',
 'metre sunray',
 'metra subway',
 'metra sunday',
 'metra surnay',
 'metra sunway',
 'metra sunray',
 'merry subway',
 'merry sunday',
 'merry surnay',
 'merry sunway',
 'merry sunray',
 'metro subway',
 'metro sunday',
 'metro surnay',
 'metro sunway',
 'metro sunray',
 'metre sompay',
 'metra sompay',
 'merry sompay',
 'metro sompay',
 'lehrs towkay',
 'lepra towkay',
 'leary towkay',
 'leora towkay',
 'leery towkay',
 'lepre towkay',
 'leers towkay',
 'learn towkay',
 'lepry towkay',
 'lears towkay',
 'lehrs subway',
 'lehrs sunday',
 'lehrs surnay',
 'lehrs sunway',
 'lehrs sunray',
 'lepra subway',
 'lepra sunday',
 'lepra surnay',
 'lepra sunway',
 'lepra sunray',
 'leary subway',
 'leary sunday',
 'leary surnay',
 'leary sunway',
 'leary sunray',
 'leora subway',
 'leora sunday',
 'leora surnay',
 'leora sunway',
 'leora sunray',
 'leery subway

In [19]:
d.crossword_plus(cipher=["s*e**u*atar", "s*e**u*ater"], nbr_words=3, get_top_k=10)

['see you water',
 'she you water',
 'see you later',
 'she you later',
 'see you qatar',
 'sue you water',
 'she you qatar',
 'sie you water',
 'see you cater',
 'see gnu water']

In [20]:
# Search words that start with a substring
d.start_with_substring(substring="cons", word_len=7), d.start_with_substring(substring="cons", word_len=7, get_top_k=5)

(['consarn',
  'consent',
  'consign',
  'consist',
  'console',
  'consols',
  'consomm',
  'consort',
  'conspue',
  'constat',
  'conster',
  'consuls',
  'consult',
  'consume',
  'consumo',
  'consute'],
 ['consent', 'console', 'consult', 'consist', 'consume'])

In [21]:
# Search words that end with a substring
d.end_with_substring(substring="ply", word_len=8), d.end_with_substring(substring="ply", word_len=8, get_top_k=5)

(['canreply',
  'centuply',
  'madcaply',
  'misapply',
  'multiply',
  'nonapply',
  'preapply',
  'recomply',
  'resupply',
  'scrimply',
  'sextiply',
  'sextuply',
  'undeeply',
  'underply',
  'unsimply',
  'unsupply'],
 ['multiply', 'resupply', 'unsupply', 'unsimply', 'underply'])

In [22]:
# Search word that contain a substring
d.contain_substring("ell", 5), d.contain_substring("ell", 5, get_top_k=5)

(['avell',
  'bella',
  'belle',
  'belli',
  'belly',
  'bello',
  'bells',
  'cella',
  'celli',
  'cello',
  'cells',
  'della',
  'delly',
  'dells',
  'dwell',
  'ellan',
  'ellen',
  'fella',
  'felly',
  'fells',
  'gelly',
  'helly',
  'hello',
  'hells',
  'yells',
  'jelly',
  'jello',
  'jells',
  'kella',
  'kelly',
  'knell',
  'mells',
  'nelly',
  'odell',
  'quell',
  'sella',
  'selle',
  'selli',
  'selly',
  'sells',
  'shell',
  'skell',
  'smell',
  'snell',
  'spell',
  'stell',
  'swell',
  'telly',
  'tells',
  'tellt',
  'udell',
  'welly',
  'wells'],
 ['cells', 'hello', 'kelly', 'shell', 'tells'])

In [23]:
# Solve anagram
d.anagram('mpoagrr')

['program']

In [24]:
# Solve anagram with 2 words
d.anagram2("uicpmops", get_top_k=20)

['pop music',
 'music pop',
 'opp music',
 'music opp',
 'pius comp',
 'comp pius',
 'mips coup',
 'coup mips',
 'pop musci',
 'musci pop',
 'opium cps',
 'cps opium',
 'pc opiums',
 'opiums pc',
 'opium csp',
 'csp opium',
 'pious pcm',
 'pcm pious',
 'pious cpm',
 'cpm pious']

In [25]:
# Consider the following problem
# The solution is made of 8 letters. 7 of the known letters are i, n, o, o, p, s, t and 1 is unknown. The order of these letters is also unknown
# We can use replace_letters and anagram to solve it
grid = d.replace_letters("*inoopst", replacement={0: "abcdefghijklmnopqrstuvwxyz"})
for x in grid:
    print(x, d.anagram(x))

tinoopst ['spittoon']
cinoopst []
einoopst []
minoopst ['tompions']
sinoopst ['opsonist']
finoopst []
winoopst []
dinoopst []
pinoopst []
qinoopst []
jinoopst []
linoopst []
yinoopst []
iinoopst ['position', 'sopition']
ginoopst ['stooping']
zinoopst []
kinoopst []
ninoopst []
hinoopst []
binoopst []
uinoopst []
vinoopst []
xinoopst []
rinoopst ['notropis', 'sorption', 'portions', 'positron']
ainoopst []
oinoopst []


In [26]:
# Extract 1 letter from a list of words
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "kindergarten"], get_top_k=10)

['black',
 'enter',
 'later',
 'union',
 'rated',
 'dance',
 'latin',
 'rapid',
 'blank',
 'inner']

In [27]:
# Use abcdefghijklmnopqrstuvwxyz for any word that is unknown
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "abcdefghijklmnopqrstuvwxyz"], get_top_k=20)

['black',
 'rates',
 'enter',
 'until',
 'later',
 'lines',
 'union',
 'rated',
 'units',
 'dance',
 'dates',
 'latin',
 'intel',
 'ratio',
 'lunch',
 'rapid',
 'dutch',
 'blank',
 'inner',
 'blind']

In [28]:
# Extract letter from a fix position
print(d.extract_letter_fix_position(["desks", "orange", "tame"]))
print(d.extract_letter_fix_position(["desks", "orange", "tame"], 1))
print(d.extract_letter_fix_position(["desks", "orange", "tame"], -1))

dot
era
see
