In [1]:
from word_finder import WordFinder
d = WordFinder()

In [None]:
# Return all possible replaced words
d.replace_letter("computer", {2: "nl", 3: "qrs"})

In [None]:
# Return the product of the word frequency 
# If a word is not found in the word frequency dictionary, use the lowest frequency
d.compute_freq(["hello", "world"]), d.compute_freq(["aaabbbccc"])

In [None]:
# Return the top k findings of word frequency product
# NOTE: This naive approach ignores linguistic correlation between words
# E.g.: In the 2nd example, fake identifty should be the correct answer, but the product of fake and identity is not the highest
#       For a better approach, please consider using GPT or a language model that studies word collocations
print(d.find_top_k([("annual incest"), ("annual income")], 1))
print(d.find_top_k([("fake identity"), ("make identity"), ("fake dentify"), ("lake identify")], 2))

In [None]:
# Solve crossword
d.crossword("**itar**t")

In [None]:
# Whenever get_top_k is used, it relies on a naive approach that computes word frequency. See find_top_k above for its limitations
d.crossword("**tho*", get_top_k=10)

In [None]:
# Debug examples
print(d.crossword("x***z***", debug=True))
print(d.crossword("**********j", debug=True))

In [None]:
# Solve crossword made of 2 words
d.crossword2("g*o*d*y", get_top_k=10)

In [None]:
# Solve crossword made of any number of words
# NOTE: This can be very slow due to the large search space
d.crossword_plus(cipher="s*e**u*at*r", nbr_words=3, get_top_k=10)

In [None]:
# Solve crossword with known space
d.crossword_known_space("g*o* d*y", get_top_k=10)

In [None]:
# Suppose we have the following information. We can combine replace_letter and crossword to search
# 1st digit: k or l or m
# 2nd digit: unknown
# 3rd digit: a or e or i or o or u or y
# 4th digit: unknown
# 5th digit: o
# 6th digit: unknown
# 7th digit: r
# 8th digit: unknown
grid = d.replace_letter("****o*r*", {0: "klm", 2: "aeiouy"})
d.crossword(grid)

In [None]:
# It works for other crossword methods too
grid = d.replace_letter("*n*ual**c***", {6: "il", 7: "an", 11: "et"})
d.crossword2(grid)

In [None]:
grid = d.replace_letter("*e*r* ****ay", replacement={0: "lm", 6: "st", 7:"iou"})
d.crossword_known_space(grid)

In [None]:
d.crossword_plus(cipher=["s*e**u*atar", "s*e**u*ater"], nbr_words=3, get_top_k=10)

In [None]:
# Search words that start with a substring
d.start_with_substring(substring="cons", word_len=7), d.start_with_substring(substring="cons", word_len=7, get_top_k=5)

In [None]:
# Search words that end with a substring
d.end_with_substring(substring="ply", word_len=8), d.end_with_substring(substring="ply", word_len=8, get_top_k=5)

In [None]:
# Search word that contain a substring
d.contain_substring("ell", 5), d.contain_substring("ell", 5, get_top_k=5)

In [None]:
# A brute force version of anagram that can be really slow
d.anagram_brute_force('mpoagrr')

In [None]:
# An improved version of anagram
d.anagram('mpoagrr')

In [None]:
d.anagram('mpoagrr', prefix_len=2)

In [None]:
d.anagram('mpoagrr', prefix_len=3)

In [None]:
# An anagram with 2 words
d.anagram2("uicpmops", get_top_k=20)

In [2]:
# Extract 1 letter from a list of words
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "kindergarten"], get_top_k=10)

['black',
 'enter',
 'later',
 'union',
 'rated',
 'dance',
 'latin',
 'rapid',
 'blank',
 'inner']

In [4]:
# Use abcdefghijklmnopqrstuvwxyz for any word that is unknown
d.extract_letter(["bluebird", "lunatic", "anticipate", "notice", "abcdefghijklmnopqrstuvwxyz"], get_top_k=20)

['black',
 'rates',
 'enter',
 'until',
 'later',
 'lines',
 'union',
 'rated',
 'units',
 'dance',
 'dates',
 'latin',
 'intel',
 'ratio',
 'lunch',
 'rapid',
 'dutch',
 'blank',
 'inner',
 'blind']