In [37]:
#Importing the required libraries
import numpy as np
import string
from collections import Counter, defaultdict
import joblib

In [38]:
class SimpleHMM:
    def __init__(self):
        self.bigram_counts = defaultdict(Counter)
        self.unigram_counts = Counter()
        self.vocab = list(string.ascii_lowercase)

    def train(self, corpus_path):
        with open(corpus_path, 'r') as f:
            words = [w.strip().lower() for w in f.readlines() if w.strip()]
        for word in words:
            prev = "<s>"
            for ch in word:
                if ch in self.vocab:
                    self.bigram_counts[prev][ch] += 1
                    self.unigram_counts[prev] += 1
                    prev = ch
            self.bigram_counts[prev]["</s>"] += 1

    def get_letter_probs(self, masked_word, guessed):
        masked_word = masked_word.lower()
        guessed = set(guessed)
        probs = Counter({ch: 1e-6 for ch in self.vocab})

        for i, ch in enumerate(masked_word):
            if ch == "_":
                left = masked_word[i-1] if i>0 else "<s>"
                right = masked_word[i+1] if i < len(masked_word)-1 else "</s>"
                for l in self.vocab:
                    p_left = self.bigram_counts[left][l] / (self.unigram_counts[left] + 1e-6)
                    p_right = self.bigram_counts[l][right] / (self.unigram_counts[l] + 1e-6)
                    probs[l] += (p_left + p_right)

        for g in guessed:
            probs[g] = 0

        total = sum(probs.values())
        return np.array([probs[ch] / total for ch in self.vocab])

    def save(self, path):
        joblib.dump(self, path)
        print(f"✅ Model saved to {path}")

    @staticmethod
    def load(path):
        model = joblib.load(path)
        print(f"✅ Model loaded from {path}")
        return model

In [39]:
# --- Train ---
hmm = SimpleHMM()
hmm.train("./Data/corpus.txt")

In [40]:
def show_top_letters(masked_word, guessed):
    test_probs = hmm.get_letter_probs(masked_word, guessed)
    letters = list(string.ascii_lowercase)
    pairs = sorted(list(zip(letters, test_probs)), key=lambda x: -x[1])[:5]
    print(f"\nMasked Word: {masked_word} | Guessed: {guessed}")
    print(tabulate(pairs, headers=["Letter", "Probability"], floatfmt=".4f"))

In [41]:
# Test 1
show_top_letters("_a__", guessed=['a'])

# Test 2
show_top_letters("a_p_e", guessed=['a', 'p', 'e'])

# Test 3
show_top_letters("__oo_", guessed=['o'])


Masked Word: _a__ | Guessed: ['a']
Letter      Probability
--------  -------------
y                0.1454
d                0.0598
m                0.0550
l                0.0539
s                0.0525

Masked Word: a_p_e | Guessed: ['a', 'p', 'e']
Letter      Probability
--------  -------------
v                0.0882
z                0.0710
l                0.0706
r                0.0692
t                0.0667

Masked Word: __oo_ | Guessed: ['o']
Letter      Probability
--------  -------------
y                0.1519
d                0.0617
s                0.0570
n                0.0555
c                0.0537


In [42]:
hmm 
hmm.save("hmm_model.joblib")

✅ Model saved to hmm_model.joblib


In [43]:
from joblib import load
hmm = load("hmm_model.joblib")
print(dir(hmm))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__firstlineno__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slotnames__', '__static_attributes__', '__str__', '__subclasshook__', '__weakref__', 'bigram_counts', 'get_letter_probs', 'load', 'save', 'train', 'unigram_counts', 'vocab']
