# Hidden Markov Models (HMM) for POS tagging
Here's a basic example to illustrate the use of an HMM for POS tagging. We'll use a simplified dataset for demonstration.

In [55]:
# Install required Libraries
#!pip install hmmlearn


In [60]:
import numpy as np
from hmmlearn import hmm

# Define states (POS tags) and observations (words)
states = ['Noun', 'Verb']
observations = ['eat', 'banana', 'flies', 'like', ]

# Define the start probability vector
start_probability = np.array([0.5, 0.5])

# Define the transition probability matrix between states
transition_probability = np.array([
  [0.7, 0.3],  # Noun -> Noun, Noun -> Verb
  [0.4, 0.6],  # Verb -> Noun, Verb -> Verb
])

# Define the emission probability matrix (probability of observation given state)
emission_probability = np.array([
  [0.1, 0.4, 0.1, 0.4],  # P(eat|Noun), P(banana|Noun), ...
  [0.4, 0.1, 0.4, 0.1],  # P(eat|Verb), P(banana|Verb), ...
])


# Create HMM instance with Multinomial distribution
model = hmm.CategoricalHMM(n_components=len(states))
model.startprob_ = start_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission_probability
model.n_trials=10

# Encode the observations
observation_sequence = np.array([[0, 1, 2, 3]]) # 'eat', 'banana', 'flies', 'like'

# Predict the states for the observation
logprob, states_prob = model.decode(observation_sequence, algorithm="viterbi")
print("Observations:", ', '.join(observations[idx] for idx in observation_sequence.flatten()))
print("Predicted States:", ', '.join(states[i] for i in states_prob))


Observations: eat, banana, flies, like
Predicted States: Verb, Noun, Noun, Noun


## Explanation:

- **States and Observations**: We define the possible states (POS tags) and observations (words) for our model.
- **Start Probability**: This is the probability of starting in each state.
- **Transition Probability**: This matrix defines the probability of transitioning from one state to another.
- **Emission Probability**: This matrix gives the probability of observing each word given a particular state.
- **Model Setup**: We initialize the HMM with the number of states (n_components) and set the defined probabilities.
- **Prediction**: We predict the sequence of states for a given sequence of words using the Viterbi algorithm, which is a dynamic programming approach to finding the most likely sequence of hidden states.

# Conditional Random Fields (CRFs) 

**Conditional Random Fields (CRFs)** are a class of statistical modeling methods often used in pattern recognition and machine learning, where they are commonly applied to structured prediction tasks like sequence labeling and part-of-speech tagging in Natural Language Processing (NLP). CRFs are particularly useful for tasks that require taking context into account, making them more sophisticated than models that treat input features independently.


## Python Code for NER Using CRF



In [11]:
## INSTALL THE REQUIRED LIBRARY
#!pip install sklearn-crfsuite scikit-learn

import sklearn_crfsuite
from sklearn_crfsuite import metrics
from sklearn_crfsuite import scorers
from sklearn_crfsuite.metrics import flat_classification_report

def word2features(sent, i):
    """ Function to extract features from each word in the sentence """
    word = sent[i][0]
    postag = sent[i][1]

    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True  # Beginning of Sentence

    if i < len(sent) - 1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True  # End of Sentence

    return features


def sent2features(sent):
    """ Extract features from a sentence """
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    """ Extract labels from a sentence """
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    """ Extract tokens from a sentence """
    return [token for token, postag, label in sent]

# Example sentence
sentences = [
    [("John", "NNP", "B-PER"), ("Smith", "NNP", "I-PER"), ("works", "VBZ", "O"), ("at", "IN", "O"), ("Google", "NNP", "B-ORG")],
    [("The", "DT", "O"), ("office", "NN", "O"), ("is", "VBZ", "O"), ("in", "IN", "O"), ("New", "NNP", "B-LOC"), ("York", "NNP", "I-LOC")],
    [("She", "PRP", "O"), ("traveled", "VBD", "O"), ("to", "TO", "O"), ("Paris", "NNP", "B-LOC")],
    [("IBM", "NNP", "B-ORG"), ("is", "VBZ", "O"), ("a", "DT", "O"), ("large", "JJ", "O"), ("company", "NN", "O")],
    [("Mark", "NNP", "B-PER"), ("and", "CC", "O"), ("Sarah", "NNP", "B-PER"), ("visited", "VBD", "O"), ("Microsoft", "NNP", "B-ORG")],
    [("Hello", "NNP", "O"), ("World", "NNP", "O")],  
]

# Extract features
# Extract features and labels from the dataset
X_train = [sent2features(s) for s in sentences]
y_train = [sent2labels(s) for s in sentences]

# Train the CRF model
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0,
    c2=.11,
    max_iterations=1000,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)

# Predict on the same sentence
y_pred = crf.predict(X_train)

for sent, labels in zip(sentences, y_pred):
    print([(word[0], label) for word, label in zip(sent, labels)])

[('John', 'B-PER'), ('Smith', 'I-PER'), ('works', 'O'), ('at', 'O'), ('Google', 'B-ORG')]
[('The', 'O'), ('office', 'O'), ('is', 'O'), ('in', 'O'), ('New', 'B-LOC'), ('York', 'I-LOC')]
[('She', 'O'), ('traveled', 'O'), ('to', 'O'), ('Paris', 'B-LOC')]
[('IBM', 'B-ORG'), ('is', 'O'), ('a', 'O'), ('large', 'O'), ('company', 'O')]
[('Mark', 'B-PER'), ('and', 'O'), ('Sarah', 'B-PER'), ('visited', 'O'), ('Microsoft', 'B-ORG')]
[('Hello', 'O'), ('World', 'O')]


In [12]:
# New test sentences
test_sentences = [
    [("Larry", "NNP", "B-PER"), ("Page", "NNP", "I-PER"), ("founded", "VBD", "O"), ("Google", "NNP", "B-ORG")],
    [("The", "DT", "O"), ("beautiful", "JJ", "O"), ("city", "NN", "O"), ("of", "IN", "O"), ("Rome", "NNP", "B-LOC")]
]

X_test = [sent2features(s) for s in test_sentences]
y_test = [sent2labels(s) for s in test_sentences]
y_pred = crf.predict(X_test)

# Print predictions
for sent, labels in zip(test_sentences, y_pred):
    print([(word[0], label) for word, label in zip(sent, labels)])


[('Larry', 'O'), ('Page', 'O'), ('founded', 'O'), ('Google', 'B-ORG')]
[('The', 'O'), ('beautiful', 'O'), ('city', 'O'), ('of', 'O'), ('Rome', 'B-ORG')]
