<a href="https://colab.research.google.com/github/mr-cri-spy/NLP-Projects/blob/main/nlp_practice_1_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk import CFG

#Define grammar
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> Det Noun
  VP -> Verb NP
  Det -> 'the' | 'a'
  Noun -> 'dog' | 'cat'
  Verb -> 'chases' | 'sees'
""")

#Create parser
parser = nltk.ChartParser(grammar)

# Parse a sentence
sentence = ['the', 'dog', 'chases', 'a', 'cat']
for tree in parser.parse(sentence):
    print(tree)


(S
  (NP (Det the) (Noun dog))
  (VP (Verb chases) (NP (Det a) (Noun cat))))


POS Tagging & Viterbi Algorithm (HMM)

In [3]:
from nltk.tag import hmm

# Labeled training data
train_data = [[('Akki', 'NOUN'), ('runs', 'VERB')],
 [('Gopala', 'NOUN'), ('eats', 'VERB')]]

#Train HMM
trainer = hmm.HiddenMarkovModelTrainer()
tagger = trainer.train_supervised(train_data)

#Testing
print(tagger.tag(['Akki', 'eats']))


[('Akki', 'NOUN'), ('eats', 'VERB')]


 Dependency Parsing (SpaCy)

In [4]:
import spacy

#Loading English model
nlp = spacy.load("en_core_web_sm")

# Analyze sentence
doc = nlp("Akki saw Gopala")
for token in doc:
    print(f"{token.text} --> {token.head.text} ({token.dep_})")


Akki --> saw (nsubj)
saw --> saw (ROOT)
Gopala --> saw (dobj)


EM Algorithm (Unsupervised HMM Training)

In [6]:
#Workaround:-Supervised HMM to simulate EM behavior
from nltk.tag import hmm

train_data = [
    [('Akki', 'NOUN'), ('writes', 'VERB')],
    [('Gopala', 'NOUN'), ('runs', 'VERB')],
    [('Akki', 'NOUN'), ('eats', 'VERB')]
]

trainer = hmm.HiddenMarkovModelTrainer()
tagger = trainer.train_supervised(train_data)
print(tagger.tag(['Gopala', 'writes']))


[('Gopala', 'NOUN'), ('writes', 'VERB')]


Forward-Backward Algorithm (NumPy)

In [8]:
import numpy as np
states = ['NOUN', 'VERB']
observations = ['Akki', 'runs']
obs_map = {'Akki': 0, 'runs': 1}
A = np.array([[0.6, 0.4], [0.3, 0.7]])
B = np.array([[0.9, 0.1], [0.2, 0.8]])
pi = np.array([0.5, 0.5])
T = len(observations)
N = len(states)

#Forward
alpha = np.zeros((T, N))
alpha[0] = pi * B[:, obs_map[observations[0]]]
for t in range(1, T):
    for j in range(N):
        alpha[t, j] = np.sum(alpha[t-1] * A[:, j]) * B[j, obs_map[observations[t]]]

#Backward
beta = np.zeros((T, N))
beta[T-1] = np.ones(N)
for t in range(T-2, -1, -1):
    for i in range(N):
        beta[t, i] = np.sum(A[i, :] * B[:, obs_map[observations[t+1]]] * beta[t+1])

# Posterior probabilities
gamma = (alpha * beta) / np.sum(alpha[-1])
print("Gamma (state probabilities):\n", gamma)


Gamma (state probabilities):
 [[0.74347826 0.25652174]
 [0.13043478 0.86956522]]


MaxEnt Classifier


In [11]:
from nltk.classify import MaxentClassifier

train_data = [
    ({"word": "Akki", "suffix": "i"}, "NOUN"),
    ({"word": "eats", "suffix": "s"}, "VERB"),
    ({"word": "apple", "suffix": "e"}, "NOUN")
]

classifier = MaxentClassifier.train(train_data, algorithm='iis', trace=0, max_iter=10)
print(classifier.classify({"word": "eats", "suffix": "s"}))
classifier.show_most_informative_features()


VERB
   1.661 word=='Akki' and label is 'NOUN'
   1.661 suffix=='i' and label is 'NOUN'
   1.661 word=='eats' and label is 'VERB'
   1.661 suffix=='s' and label is 'VERB'
   1.661 word=='apple' and label is 'NOUN'
   1.661 suffix=='e' and label is 'NOUN'
