In [321]:
# Context Free Grammar to parse 4 given sentences

import nltk
from nltk import FreqDist

grammar = nltk.CFG.fromstring("""
S -> NP VP | VP
NP -> PRP | DT ADJP NN | NN | PRP NNS | CD NNS
VP -> VBD NP NP | MD VP | VB ADVP | VBP RB ADVP ADJP | VBD S | TO VP | VB NP ADVP
ADVP -> RB | NP RB
NNS -> "kids" | "days"
RB -> "now" | "always" | "not" | "ago"
VB -> "go" | "visit"
MD -> "may"
ADJP -> JJ 
PRP -> "We" |"You" | "Their" | "She" | "me"
VBD -> "had" | "came"
CD -> "two"
VBP -> "are"
DT -> "a"
TO -> "to"
JJ -> "nice" | "naive"
NN -> "party" | "yesterday"
""")

In [291]:
# parsing first sentence - "We had a nice party yesterday"
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "We had a nice party yesterday"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)


['We', 'had', 'a', 'nice', 'party', 'yesterday']
(S
  (NP (PRP We))
  (VP
    (VBD had)
    (NP (DT a) (ADJP (JJ nice)) (NN party))
    (NP (NN yesterday))))


In [292]:
# parsing second sentence - "She came to visit me two days ago
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "She came to visit me two days ago"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['She', 'came', 'to', 'visit', 'me', 'two', 'days', 'ago']
(S
  (NP (PRP She))
  (VP
    (VBD came)
    (S
      (VP
        (TO to)
        (VP
          (VB visit)
          (NP (PRP me))
          (ADVP (NP (CD two) (NNS days)) (RB ago)))))))


In [293]:
# parsing third sentence - "You may go now"
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "You may go now"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['You', 'may', 'go', 'now']
(S (NP (PRP You)) (VP (MD may) (VP (VB go) (ADVP (RB now)))))


In [340]:
# parsing fourth sentence - "Their kids are not always naive"
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "Their kids are not always naive"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['Their', 'kids', 'are', 'not', 'always', 'naive']
(S
  (NP (PRP Their) (NNS kids))
  (VP (VBP are) (RB not) (ADVP (RB always)) (ADJP (JJ naive))))


In [341]:
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "She may visit now"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['She', 'may', 'visit', 'now']
(S (NP (PRP She)) (VP (MD may) (VP (VB visit) (ADVP (RB now)))))


In [342]:
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "You are not always nice"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['You', 'are', 'not', 'always', 'nice']
(S
  (NP (PRP You))
  (VP (VBP are) (RB not) (ADVP (RB always)) (ADJP (JJ nice))))


In [343]:
rd_parser = nltk.RecursiveDescentParser(grammar)
senttext = "She came two kids yesterday"
sentlist = senttext.split()
print(sentlist)

trees = rd_parser.parse(sentlist)
trees
treelist = list(trees)

type(treelist[0]) 
for tree in treelist:
    print (tree)

['She', 'came', 'two', 'kids', 'yesterday']
(S
  (NP (PRP She))
  (VP (VBD came) (NP (CD two) (NNS kids)) (NP (NN yesterday))))


In [331]:
# Creating a mini corpus of 4 given sentences to get the probablistic 
# frequency of each given word
corpus = "We had a nice party yesterday She came to visit me two days ago You may go now Their kids are not always naive"
corpus_words = corpus.split()
fdist = FreqDist(corpus_words)
# fdist for each pair is 1 which means that each word occurs with the equal probablity
fdist

FreqDist({'She': 1,
          'Their': 1,
          'We': 1,
          'You': 1,
          'a': 1,
          'ago': 1,
          'always': 1,
          'are': 1,
          'came': 1,
          'days': 1,
          'go': 1,
          'had': 1,
          'kids': 1,
          'may': 1,
          'me': 1,
          'naive': 1,
          'nice': 1,
          'not': 1,
          'now': 1,
          'party': 1,
          'to': 1,
          'two': 1,
          'visit': 1,
          'yesterday': 1})

In [334]:
# Probablistic Grammar
# The probabilities for each non-terminal symbol must add up to 1

prob_grammar = nltk.PCFG.fromstring("""
S -> NP VP[0.9] | VP [0.1]
NP -> PRP [0.5]| DT ADJP NN [0.2]| NN [0.1]| PRP NNS [0.1]| CD NNS[0.1]
VP -> VBD NP NP [0.3]| MD VP [0.2]| VB ADVP[0.1] | VBP RB ADVP ADJP[0.1] | VBD S [0.1]| TO VP[0.1] | VB NP ADVP[0.1]
ADVP -> RB [0.5]| NP RB[0.5]
NNS -> "kids"[0.5] | "days"[0.5]
RB -> "now"[0.25] | "always"[0.25] | "not"[0.25] | "ago"[0.25]
VB -> "go"[0.5] | "visit"[0.5]
MD -> "may"[1.0]
ADJP -> JJ [1.0]
PRP -> "We" [0.2]|"You"[0.2] | "Their"[0.2] | "She"[0.2] | "me"[0.2]
VBD -> "had"[0.5] | "came"[0.5]
CD -> "two"[1.0]
VBP -> "are"[1.0]
DT -> "a"[1.0]
TO -> "to"[1.0]
JJ -> "nice" [0.5]| "naive"[0.5]
NN -> "party" [0.5]| "yesterday"[0.5]
""")

In [335]:
viterbi_parser = nltk.ViterbiParser(prob_grammar)

In [336]:
for tree in viterbi_parser.parse(['We' ,'had','a', 'nice', 'party', 'yesterday']):
    print (tree)

(S
  (NP (PRP We))
  (VP
    (VBD had)
    (NP (DT a) (ADJP (JJ nice)) (NN party))
    (NP (NN yesterday)))) (p=3.375e-05)


In [337]:
for tree in viterbi_parser.parse(['She' ,'came', 'to', 'visit', 'me' ,'two' ,'days', 'ago']):
    print (tree)

(S
  (NP (PRP She))
  (VP
    (VBD came)
    (S
      (VP
        (TO to)
        (VP
          (VB visit)
          (NP (PRP me))
          (ADVP (NP (CD two) (NNS days)) (RB ago))))))) (p=1.40625e-09)


In [338]:
for tree in viterbi_parser.parse(['You' ,'may', 'go' ,'now']):
    print (tree)

(S
  (NP (PRP You))
  (VP (MD may) (VP (VB go) (ADVP (RB now))))) (p=0.0001125)


In [339]:
for tree in viterbi_parser.parse(['Their', 'kids', 'are', 'not' ,'always', 'naive']):
    print (tree)

(S
  (NP (PRP Their) (NNS kids))
  (VP
    (VBP are)
    (RB not)
    (ADVP (RB always))
    (ADJP (JJ naive)))) (p=1.40625e-05)
