In [None]:
# Define the language model probabilities
language_model = {
    "cat": 0.5,
    "car": 0.3,
    "bat": 0.2
}

# Define the error model probabilities
error_model = {
    ("cat", "caat"): 0.4,
    ("car", "caat"): 0.1,
    ("bat", "caat"): 0.05
}

# Observed word
observed_word = "caat"

# Calculate posterior probabilities for each candidate word
posterior_probabilities = {}

for word in language_model:
    prior = language_model[word]
    likelihood = error_model.get((word, observed_word), 0)  # Get P(o|w), default to 0 if missing
    posterior_probabilities[word] = prior * likelihood

# Find the word with the highest posterior probability
corrected_word = max(posterior_probabilities, key=posterior_probabilities.get)

# Output the results
print("Posterior Probabilities:", posterior_probabilities)
print("Corrected Word:", corrected_word)


Posterior Probabilities: {'cat': 0.2, 'car': 0.03, 'bat': 0.010000000000000002}
Corrected Word: cat


In [None]:
import string

def generate_candidates(word):
    """
    Generate candidate words with edit distance 0 or 1 from the given word.
    Edit operations include: deletion, insertion, substitution, and transposition.
    """
    letters = string.ascii_lowercase
    candidates = set()

    # Edit distance 0 (the original word)
    candidates.add(word)

    # Edit distance 1
    # 1. Deletions: Remove one character
    for i in range(len(word)):
        candidates.add(word[:i] + word[i+1:])

    # 2. Insertions: Add one character in every possible position
    for i in range(len(word) + 1):
        for letter in letters:
            candidates.add(word[:i] + letter + word[i:])

    # 3. Substitutions: Replace each character with another character
    for i in range(len(word)):
        for letter in letters:
            if word[i] != letter:  # Avoid replacing with the same letter
                candidates.add(word[:i] + letter + word[i+1:])

    # 4. Transpositions: Swap adjacent characters
    for i in range(len(word) - 1):
        candidates.add(word[:i] + word[i+1] + word[i] + word[i+2:])

    return candidates


# Example usage
word = "caat"
candidates = generate_candidates(word)

print(f"Original word: {word}")
print(f"Generated candidates (edit distance 0 or 1):")
print(candidates)


Original word: caat
Generated candidates (edit distance 0 or 1):
{'cabat', 'cavat', 'cwaat', 'caoat', 'caast', 'kaat', 'caait', 'taat', 'cagt', 'caxat', 'czaat', 'capat', 'caagt', 'caant', 'cacat', 'cuaat', 'caabt', 'jaat', 'xaat', 'czat', 'cabt', 'caam', 'paat', 'ceaat', 'calat', 'caato', 'craat', 'cait', 'cayt', 'cyaat', 'caaat', 'cdaat', 'zaat', 'caats', 'caft', 'caao', 'coaat', 'calt', 'caaet', 'fcaat', 'caay', 'caawt', 'ctat', 'ecaat', 'caht', 'waat', 'naat', 'cakat', 'scaat', 'dcaat', 'caatx', 'vaat', 'caat', 'cfaat', 'gaat', 'acat', 'caatu', 'cqaat', 'cayat', 'cyat', 'caaut', 'laat', 'cvat', 'caab', 'caag', 'camat', 'maat', 'caact', 'cahat', 'oaat', 'caaa', 'caaj', 'icaat', 'caatf', 'cawt', 'cnat', 'vcaat', 'caa', 'caart', 'cvaat', 'caatg', 'acaat', 'caatm', 'caamt', 'caatj', 'lcaat', 'raat', 'caqat', 'rcaat', 'cnaat', 'catat', 'caatv', 'cajat', 'gcaat', 'cuat', 'cazat', 'cafat', 'aaat', 'cgat', 'cmat', 'cast', 'ncaat', 'ccat', 'caeat', 'ycaat', 'caapt', 'caatn', 'caas', 'ckat',