In [None]:
import numpy as np

class HMM_POS_Tagger:
    def __init__(self, train_corpus):
        self.transition_matrix = self.calculate_transition_matrix(train_corpus)
        self.emission_matrix = self.calculate_emission_matrix(train_corpus)
        self.pos_tags = list(self.emission_matrix.keys())

    def calculate_transition_matrix(self, train_corpus):
        transition_matrix = {}
        for sentence in train_corpus:
            for i in range(len(sentence) - 1):
                current_tag = sentence[i][1]
                next_tag = sentence[i + 1][1]
                if current_tag not in transition_matrix:
                    transition_matrix[current_tag] = {}
                if next_tag not in transition_matrix[current_tag]:
                    transition_matrix[current_tag][next_tag] = 0
                transition_matrix[current_tag][next_tag] += 1

        # Normalize transition probabilities
        for tag, transitions in transition_matrix.items():
            total_count = sum(transitions.values())
            for next_tag in transitions:
                transition_matrix[tag][next_tag] /= total_count

        return transition_matrix

    def calculate_emission_matrix(self, train_corpus):
        emission_matrix = {}
        for sentence in train_corpus:
            for word, tag in sentence:
                if tag not in emission_matrix:
                    emission_matrix[tag] = {}
                if word not in emission_matrix[tag]:
                    emission_matrix[tag][word] = 0
                emission_matrix[tag][word] += 1

        # Normalize emission probabilities
        for tag, emissions in emission_matrix.items():
            total_count = sum(emissions.values())
            for word in emissions:
                emission_matrix[tag][word] /= total_count

        return emission_matrix

    def predict(self, sentence):
        # Perform POS tagging using Viterbi algorithm
        # (not implemented here for brevity)
        pass

# Example usage
train_corpus = [
    [("The", "DT"), ("dog", "NN"), ("barks", "VBZ")],
    [("A", "DT"), ("cat", "NN"), ("meows", "VBZ")]
]

hmm_tagger = HMM_POS_Tagger(train_corpus)
print("Transition matrix:")
print(np.array([[hmm_tagger.transition_matrix.get(tag1, {}).get(tag2, 0) for tag2 in hmm_tagger.pos_tags] for tag1 in hmm_tagger.pos_tags]))
print("\nEmission matrix:")
print(np.array([[hmm_tagger.emission_matrix.get(tag, {}).get(word, 0) for word in ["The", "dog", "barks", "A", "cat", "meows"]] for tag in hmm_tagger.pos_tags]))


Transition matrix:
[[0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 0.]]

Emission matrix:
[[0.5 0.  0.  0.5 0.  0. ]
 [0.  0.5 0.  0.  0.5 0. ]
 [0.  0.  0.5 0.  0.  0.5]]
