In [5]:
from word_finder import WordFinder
d = WordFinder()

In [6]:
# Return all possible replaced words
d.replace_letter("computer", {2: "nl", 3: "qrs"})

['conquter', 'consuter', 'conruter', 'colquter', 'colsuter', 'colruter']

In [7]:
# Return the product of the word frequency 
# If a word is not found in the word frequency dictionary, use the lowest frequency
d.compute_freq(["hello", "world"]), d.compute_freq(["aaabbbccc"])

(np.float64(0.014236717413988867), np.float64(1.2711e-05))

In [8]:
# Return the top k findings of word frequency product
# NOTE: This naive approach ignores linguistic correlation between words
# E.g.: In the 2nd example, fake identifty should be the correct answer, but the product of fake and identity is not the highest
#       For a better approach, please consider using GPT or a language model that studies word collocations
print(d.find_top_k([("annual incest"), ("annual income")], 1))
print(d.find_top_k([("fake identity"), ("make identity"), ("fake dentify"), ("lake identify")], 2))

['annual income']
['make identity', 'lake identify']


In [9]:
# Solve crossword
d.crossword("**itar**t")

['guitarist', 'unitarist']

In [10]:
# Whenever get_top_k is used, it relies on a naive approach that computes word frequency. See find_top_k above for its limitations
d.crossword("**tho*", get_top_k=10)

['author',
 'method',
 'python',
 'pathol',
 'fathom',
 'mythos',
 'pathos',
 'hathor',
 'lithos',
 'pothos']

In [11]:
# Debug examples
print(d.crossword("x***z***", debug=True))
print(d.crossword("**********j", debug=True))

No match found for the given letter_info
[]
No match found for word of length 11 with letter j in position 10 (0-based)
[]


In [12]:
# Solve crossword made of 2 words
d.crossword2("g*o*d*y", get_top_k=10)

['good day',
 'g monday',
 'grow day',
 'good dry',
 'glow day',
 'grow dry',
 'go oddly',
 'geoid by',
 'g loudly',
 'grond by']

In [13]:
# Solve crossword made of any number of words
# NOTE: This can be very slow due to the large search space
d.crossword_plus(cipher="s*e**u*at*r", nbr_words=3, get_top_k=10)

['see you water',
 'she you water',
 'see you later',
 'she you later',
 'see you qatar',
 'see a curator',
 'sue you water',
 'she you qatar',
 's be educator',
 'she a curator']

In [14]:
# Solve crossword with known space
d.crossword_known_space("g*o* d*y", get_top_k=10)

['good day',
 'grow day',
 'good dry',
 'glow day',
 'grow dry',
 'geog day',
 'geol day',
 'gros day',
 'goog day',
 'good dey']

In [15]:
# Suppose we have the following information. We can combine replace_letter and crossword to search
# 1st digit: k or l or m
# 2nd digit: unknown
# 3rd digit: a or e or i or o or u or y
# 4th digit: unknown
# 5th digit: o
# 6th digit: unknown
# 7th digit: r
# 8th digit: unknown
grid = d.replace_letter("****o*r*", {0: "klm", 2: "aeiouy"})
d.crossword(grid)

['keyboard', 'leeboard']

In [16]:
# It works for other crossword methods too
grid = d.replace_letter("*n*ual**c***", {6: "il", 7: "an", 11: "et"})
d.crossword2(grid)

['ungual incast',
 'ungual incept',
 'ungual incult',
 'ungual incest',
 'ungual incant',
 'annual incast',
 'annual incept',
 'annual incult',
 'annual incest',
 'annual incant',
 'ungual incuse',
 'ungual incube',
 'ungual incise',
 'ungual incide',
 'ungual incute',
 'ungual incage',
 'ungual incave',
 'ungual incite',
 'ungual income',
 'ungual incase',
 'ungual incede',
 'annual incuse',
 'annual incube',
 'annual incise',
 'annual incide',
 'annual incute',
 'annual incage',
 'annual incave',
 'annual incite',
 'annual income',
 'annual incase',
 'annual incede',
 'ungual lacert',
 'annual lacert',
 'ungual lacune',
 'annual lacune']

In [17]:
grid = d.replace_letter("*e*r* ****ay", replacement={0: "lm", 6: "st", 7:"iou"})
d.crossword_known_space(grid)

['metro subway',
 'metro sunway',
 'metro sunray',
 'metro sunday',
 'metro surnay',
 'metra subway',
 'metra sunway',
 'metra sunray',
 'metra sunday',
 'metra surnay',
 'metre subway',
 'metre sunway',
 'metre sunray',
 'metre sunday',
 'metre surnay',
 'merry subway',
 'merry sunway',
 'merry sunray',
 'merry sunday',
 'merry surnay',
 'metro sompay',
 'metra sompay',
 'metre sompay',
 'merry sompay',
 'metro towkay',
 'metra towkay',
 'metre towkay',
 'merry towkay',
 'lepry subway',
 'lepry sunway',
 'lepry sunray',
 'lepry sunday',
 'lepry surnay',
 'leora subway',
 'leora sunway',
 'leora sunray',
 'leora sunday',
 'leora surnay',
 'leary subway',
 'leary sunway',
 'leary sunray',
 'leary sunday',
 'leary surnay',
 'leers subway',
 'leers sunway',
 'leers sunray',
 'leers sunday',
 'leers surnay',
 'leery subway',
 'leery sunway',
 'leery sunray',
 'leery sunday',
 'leery surnay',
 'lehrs subway',
 'lehrs sunway',
 'lehrs sunray',
 'lehrs sunday',
 'lehrs surnay',
 'lepre subway

In [18]:
d.crossword_plus(cipher=["s*e**u*atar", "s*e**u*ater"], nbr_words=3, get_top_k=10)

['see you water',
 'she you water',
 'see you later',
 'she you later',
 'see you qatar',
 'sue you water',
 'she you qatar',
 'sie you water',
 'see you cater',
 'see gnu water']

In [19]:
# Search words that start with a substring
d.start_with_substring(substring="cons", word_len=7), d.start_with_substring(substring="cons", word_len=7, get_top_k=5)

(['consarn',
  'consent',
  'consign',
  'consist',
  'console',
  'consols',
  'consomm',
  'consort',
  'conspue',
  'constat',
  'conster',
  'consuls',
  'consult',
  'consume',
  'consumo',
  'consute'],
 ['consent', 'console', 'consult', 'consist', 'consume'])

In [20]:
# Search words that end with a substring
d.end_with_substring(substring="ply", word_len=8), d.end_with_substring(substring="ply", word_len=8, get_top_k=5)

(['canreply',
  'centuply',
  'madcaply',
  'misapply',
  'multiply',
  'nonapply',
  'preapply',
  'recomply',
  'resupply',
  'scrimply',
  'sextiply',
  'sextuply',
  'undeeply',
  'underply',
  'unsimply',
  'unsupply'],
 ['multiply', 'resupply', 'unsupply', 'unsimply', 'underply'])

In [21]:
# Search word that contain a substring
d.contain_substring("ell", 5), d.contain_substring("ell", 5, get_top_k=5)

(['avell',
  'bella',
  'belle',
  'belli',
  'belly',
  'bello',
  'bells',
  'cella',
  'celli',
  'cello',
  'cells',
  'della',
  'delly',
  'dells',
  'dwell',
  'ellan',
  'ellen',
  'fella',
  'felly',
  'fells',
  'gelly',
  'helly',
  'hello',
  'hells',
  'yells',
  'jelly',
  'jello',
  'jells',
  'kella',
  'kelly',
  'knell',
  'mells',
  'nelly',
  'odell',
  'quell',
  'sella',
  'selle',
  'selli',
  'selly',
  'sells',
  'shell',
  'skell',
  'smell',
  'snell',
  'spell',
  'stell',
  'swell',
  'telly',
  'tells',
  'tellt',
  'udell',
  'welly',
  'wells'],
 ['cells', 'hello', 'kelly', 'shell', 'tells'])

In [23]:
# An improved version of anagram
d.anagram('mpoagrr')

['program']

In [24]:
d.anagram('mpoagrr', prefix_len=2)

['program']

In [25]:
d.anagram('mpoagrr', prefix_len=3)

['program']

In [26]:
# An anagram with 2 words
d.anagram2("uicpmops", get_top_k=20)

['pop music',
 'music pop',
 'opp music',
 'music opp',
 'pius comp',
 'comp pius',
 'mips coup',
 'coup mips',
 'pop musci',
 'musci pop',
 'opium cps',
 'cps opium',
 'pc opiums',
 'opiums pc',
 'opium csp',
 'csp opium',
 'pious pcm',
 'pcm pious',
 'pious cpm',
 'cpm pious']

In [27]:
# Extract 1 letter from a list of words
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "kindergarten"], get_top_k=10)

['black',
 'enter',
 'later',
 'union',
 'rated',
 'dance',
 'latin',
 'rapid',
 'blank',
 'inner']

In [28]:
# Use abcdefghijklmnopqrstuvwxyz for any word that is unknown
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "abcdefghijklmnopqrstuvwxyz"], get_top_k=20)

['black',
 'rates',
 'enter',
 'until',
 'later',
 'lines',
 'union',
 'rated',
 'units',
 'dance',
 'dates',
 'latin',
 'intel',
 'ratio',
 'lunch',
 'rapid',
 'dutch',
 'blank',
 'inner',
 'blind']