# IMPORTS

In [1]:
import nltk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

# Exercise 1

What constraints are required to correctly parse word sequences like I am happy and she is happy but not *you is happy or *they am happy? Implement two solutions for the present tense paradigm of the verb be in English, first taking Grammar (6) as your starting point, and then taking Grammar (18) as the starting point.

In [3]:
grammar1 = nltk.CFG.fromstring("""
    S -> P_SG1 VP_SG1
    S -> P_SG3 VP_SG3
    S -> P_PL VP_PL
    
    VP_SG1 -> V_SG1 ADJ
    VP_SG3 -> V_SG3 ADJ
    VP_PL -> V_PL ADJ
    
    P_SG1 -> 'I'
    P_SG3 -> 'he' | 'she' | 'it'
    P_PL -> 'they' | 'we' | 'you'
    V_SG1 -> 'am'
    V_SG3 -> 'is'
    V_PL -> 'are'
    ADJ -> 'happy'
""")

In [4]:
sent1 = "I am happy".split()
sent2 = "she is happy".split()
sent3 = "you is happy".split()
sent4 = "they am happy".split()

In [5]:
parser1 = nltk.ChartParser(grammar1)

In [6]:
for sent in [sent1, sent2, sent3, sent4]:
    for tree in parser1.parse(sent):
        print(tree)

(S (P_SG1 I) (VP_SG1 (V_SG1 am) (ADJ happy)))
(S (P_SG3 she) (VP_SG3 (V_SG3 is) (ADJ happy)))


In [7]:
grammar2 = nltk.grammar.FeatureGrammar.fromstring("""   
    S -> P[AGR=?a] VP[AGR=?a]
    
    VP[AGR=?a] -> V[AGR=?a] ADJ 
    
    P[AGR=[NUM=sg, PER=1]] -> 'I'
    P[AGR=[NUM=sg, PER=3]] -> 'he' | 'she' | 'it'
    P[AGR=[NUM=pl]] -> 'they' | 'we' | 'you'
    V[AGR=[NUM=sg, PER=1]] -> 'am'
    V[AGR=[NUM=sg, PER=3]] -> 'is'
    V[AGR=[NUM=pl]] -> 'are'
    ADJ -> 'happy'
""")

In [8]:
parser2 = nltk.FeatureEarleyChartParser(grammar2)

In [9]:
for sent in [sent1, sent2, sent3, sent4]:
    for tree in parser2.parse(sent):
        print(tree)

(S[]
  (P[AGR=[NUM='sg', PER=1]] I)
  (VP[AGR=[NUM='sg', PER=1]]
    (V[AGR=[NUM='sg', PER=1]] am)
    (ADJ[] happy)))
(S[]
  (P[AGR=[NUM='sg', PER=3]] she)
  (VP[AGR=[NUM='sg', PER=3]]
    (V[AGR=[NUM='sg', PER=3]] is)
    (ADJ[] happy)))


# Exercise 2

Develop a variant of grammar in 1.1 that uses a feature count to make the distinctions shown below:

(54)	
a. The boy sings.	
b. *Boy sings.

(55)	
a. The boys sing.	
b. Boys sing.

(56)	
a. The boys sing.	
b. Boys sing.

(57)	
a. The water is precious.	
b.Water is precious.

In [18]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""   
    S -> NP[NUM=?n] VP[NUM=?n]
    
    VP[NUM=?n] -> V[NUM=?n] | V[NUM=?n] ADJ
    NP[NUM=?n] -> N[NUM=?n] | DET N[NUM=?n]
    
    V[NUM=sg] -> 'sings' | 'is'
    V[NUM=pl] -> 'sing'
    N[NUM=sg] -> 'boy' | 'Boy' | 'water' | 'Water'
    N[NUM=pl] -> 'boys' | 'Boys'
    DET -> 'The'
    ADJ -> 'precious'
""")

In [19]:
parser = nltk.FeatureEarleyChartParser(grammar)

In [20]:
sent1 = "The boy sings".split()
sent2 = "Boy sings".split()
sent3 = "The boys sing".split()
sent4 = "Boys sing".split()
sent5 = "The water is precious".split()
sent6 = "Water is precious".split()

In [21]:
for sent in [sent1, sent2, sent3, sent4, sent5, sent6]:
    for tree in parser.parse(sent):
        print(tree)

(S[]
  (NP[NUM='sg'] (DET[] The) (N[NUM='sg'] boy))
  (VP[NUM='sg'] (V[NUM='sg'] sings)))
(S[]
  (NP[NUM='sg'] (N[NUM='sg'] Boy))
  (VP[NUM='sg'] (V[NUM='sg'] sings)))
(S[]
  (NP[NUM='pl'] (DET[] The) (N[NUM='pl'] boys))
  (VP[NUM='pl'] (V[NUM='pl'] sing)))
(S[]
  (NP[NUM='pl'] (N[NUM='pl'] Boys))
  (VP[NUM='pl'] (V[NUM='pl'] sing)))
(S[]
  (NP[NUM='sg'] (DET[] The) (N[NUM='sg'] water))
  (VP[NUM='sg'] (V[NUM='sg'] is) (ADJ[] precious)))
(S[]
  (NP[NUM='sg'] (N[NUM='sg'] Water))
  (VP[NUM='sg'] (V[NUM='sg'] is) (ADJ[] precious)))


# Exercise 3

Write a function subsumes() which holds of two feature structures fs1 and fs2 just in case fs1 subsumes fs2.

In [60]:
fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
fs2 = nltk.FeatStruct(CITY='Paris')
fs3 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal', CITY='Paris')

In [61]:
def subsumes(fs1, fs2):
    return fs1.unify(fs2) == fs2

In [62]:
subsumes(fs1, fs2)

False

In [63]:
subsumes(fs1, fs3)

True

# Exercise 4

Modify the grammar illustrated in (28) to incorporate a bar feature for dealing with phrasal projections.

In [64]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""
    VP[TENSE=?t, NUM=?n, BAR=1] -> V[SUBCAT=intrans, TENSE=?t, NUM=?n, BAR=0]
    VP[TENSE=?t, NUM=?n, BAR=1] -> V[SUBCAT=trans, TENSE=?t, NUM=?n, BAR=0] NP
    VP[TENSE=?t, NUM=?n, BAR=1] -> V[SUBCAT=clause, TENSE=?t, NUM=?n, BAR=0] SBar

    V[SUBCAT=intrans, TENSE=pres, NUM=sg, BAR=0] -> 'disappears' | 'walks'
    V[SUBCAT=trans, TENSE=pres, NUM=sg, BAR=0] -> 'sees' | 'likes'
    V[SUBCAT=clause, TENSE=pres, NUM=sg, BAR=0] -> 'says' | 'claims'

    V[SUBCAT=intrans, TENSE=pres, NUM=pl, BAR=0] -> 'disappear' | 'walk'
    V[SUBCAT=trans, TENSE=pres, NUM=pl, BAR=0] -> 'see' | 'like'
    V[SUBCAT=clause, TENSE=pres, NUM=pl, BAR=0] -> 'say' | 'claim'

    V[SUBCAT=intrans, TENSE=past, NUM=?n, BAR=0] -> 'disappeared' | 'walked'
    V[SUBCAT=trans, TENSE=past, NUM=?n, BAR=0] -> 'saw' | 'liked'
""")

# Exercise 5

Modify the German grammar in 3.2 to incorporate the treatment of subcategorization presented in 3.

In [65]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""
    # Grammar Productions
    S -> NP[CASE=nom, AGR=?a] VP[AGR=?a]
    NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a]
    NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a]
    VP[AGR=?a] -> V[SUBCAT=intrans, AGR=?a]
    VP[AGR=?a] -> V[SUBCAT=trans, OBJCASE=?c, AGR=?a] NP[CASE=?c]
    # Lexical Productions
    # Singular determiners
    # masc
    Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der'
    Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem'
    Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den'
    # fem
    Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
    Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der'
    Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
    # Plural determiners
    Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die'
    Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den'
    Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die'
    # Nouns
    N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund'
    N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
    N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden'
    N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
    N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze'
    N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen'
    # Pronouns
    PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich'
    PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich'
    PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir'
    PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du'
    PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es'
    PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir'
    PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns'
    PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns'
    PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr'
    PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie'
    # Verbs
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=1]] -> 'komme'
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=2]] -> 'kommst'
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=3]] -> 'kommt'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=1]] -> 'kommen'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=2]] -> 'kommt'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=3]] -> 'kommen'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen'
""")

In [66]:
sent = 'ich folge den Katzen'.split()
parser = nltk.FeatureEarleyChartParser(grammar)
for tree in parser.parse(sent):
    print(tree)

(S[]
  (NP[AGR=[NUM='sg', PER=1], CASE='nom']
    (PRO[AGR=[NUM='sg', PER=1], CASE='nom'] ich))
  (VP[AGR=[NUM='sg', PER=1]]
    (V[AGR=[NUM='sg', PER=1], OBJCASE='dat', SUBCAT='trans'] folge)
    (NP[AGR=[GND='fem', NUM='pl', PER=3], CASE='dat']
      (Det[AGR=[NUM='pl', PER=3], CASE='dat'] den)
      (N[AGR=[GND='fem', NUM='pl', PER=3]] Katzen))))


# Exercise 6

Develop a feature based grammar that will correctly describe the following Spanish noun phrases:

In [67]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""
    NP -> DET[NUM=?n, GND=?g] N[NUM=?n] ADJ[NUM=?n, GND=?g]
    
    N[NUM=sg] -> 'cuadro' | 'cortina'
    N[NUM=pl] -> 'cuadros' | 'cortinas'
    
    ADJ[NUM=sg, GND=masc] -> 'hermoso'
    ADJ[NUM=pl, GND=masc] -> 'hermosos'
    ADJ[NUM=sg, GND=fem] -> 'hermosa'
    ADJ[NUM=pl, GND=fem] -> 'hermosas'
    
    DET[NUM=sg, GND=masc] -> 'un'
    DET[NUM=pl, GND=masc] ->  'unos'
    DET[NUM=sg, GND=fem] -> 'una'
    DET[NUM=pl, GND=fem] -> 'unas'
""")

In [68]:
sent1 = 'un cuadro hermoso'.split()
sent2 = 'unos cuadros hermosos'.split()
sent3 = 'una cortina hermosa'.split()
sent4 = 'unas cortinas hermosas'.split()

In [69]:
parser = nltk.FeatureEarleyChartParser(grammar)

In [70]:
for sent in [sent1, sent2, sent3, sent4]:
    for tree in parser.parse(sent):
        print(tree)

(NP[]
  (DET[GND='masc', NUM='sg'] un)
  (N[NUM='sg'] cuadro)
  (ADJ[GND='masc', NUM='sg'] hermoso))
(NP[]
  (DET[GND='masc', NUM='pl'] unos)
  (N[NUM='pl'] cuadros)
  (ADJ[GND='masc', NUM='pl'] hermosos))
(NP[]
  (DET[GND='fem', NUM='sg'] una)
  (N[NUM='sg'] cortina)
  (ADJ[GND='fem', NUM='sg'] hermosa))
(NP[]
  (DET[GND='fem', NUM='pl'] unas)
  (N[NUM='pl'] cortinas)
  (ADJ[GND='fem', NUM='pl'] hermosas))


# Exercise 7

Develop your own version of the EarleyChartParser which only prints a trace if the input sequence fails to parse.

# Exercise 8

Consider the feature structures shown in 6.1.

In [71]:
fs1 = nltk.FeatStruct("[A = ?x, B= [C = ?x]]")
fs2 = nltk.FeatStruct("[B = [D = d]]")
fs3 = nltk.FeatStruct("[B = [C = d]]")
fs4 = nltk.FeatStruct("[A = (1)[B = b], C->(1)]")
fs5 = nltk.FeatStruct("[A = (1)[D = ?x], C = [E -> (1), F = ?x] ]")
fs6 = nltk.FeatStruct("[A = [D = d]]")
fs7 = nltk.FeatStruct("[A = [D = d], C = [F = [D = d]]]")
fs8 = nltk.FeatStruct("[A = (1)[D = ?x, G = ?x], C = [B = ?x, E -> (1)] ]")
fs9 = nltk.FeatStruct("[A = [B = b], C = [E = [G = e]]]")
fs10 = nltk.FeatStruct("[A = (1)[B = b], C -> (1)]")

Work out on paper what the result is of the following unifications. (Hint: you might find it useful to draw the graph structures.)

1. fs1 and fs2
2. fs1 and fs3
3. fs4 and fs5
4. fs5 and fs6
5. fs5 and fs7
6. fs8 and fs9
7. fs8 and fs10

Check your answers using Python.

In [72]:
fs1.unify(fs2)

[A=?x, B=[C=?x, D='d']]

In [73]:
fs1.unify(fs3)

[A='d', B=[C='d']]

In [74]:
fs4.unify(fs5)

[A=(1)[B='b', D=?x, E->(1), F=?x], C->(1)]

In [75]:
fs5.unify(fs6)

[A=(1)[D='d'], C=[E->(1), F='d']]

In [76]:
fs5.unify(fs7)

In [77]:
fs8.unify(fs9)

[A=(1)[B='b', D='e', G='e'], C=[B='e', E->(1)]]

In [78]:
fs8.unify(fs10)

[A=(1)[B='b', D='b', E->(1), G='b'], C->(1)]

# Exercise 9

List two feature structures that subsume [A=?x, B=?x].

In [79]:
fs1 = nltk.FeatStruct('[A=?x, B=?x]')

In [80]:
fs2 = nltk.FeatStruct('[A=?x]')
fs3 = nltk.FeatStruct('[B=?x]')
fs4 = nltk.FeatStruct('[A=?x, B=?x]')

In [81]:
subsumes(fs2, fs1), subsumes(fs3, fs1), subsumes(fs4, fs1)

(True, True, True)

# Exercise 10

Ignoring structure sharing, give an informal algorithm for unifying two feature structures.

# Exercise 11

Extend the German grammar in 3.2 so that it can handle so-called verb-second structures like the following:
Heute sieht der Hund die Katze.

In [11]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""
    # Grammar Productions
    S -> NP[CASE=nom, AGR=?a] VP[AGR=?a]
    S -> ADV V[SUBCAT=intrans, AGR=?a] NP[CASE=nom, AGR=?a]
    S -> ADV V[SUBCAT=trans, OBJCASE=?c, AGR=?a] NP[CASE=nom, AGR=?a] NP[CASE=?c]
    NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a]
    NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a]
    VP[AGR=?a] -> V[SUBCAT=intrans, AGR=?a]
    VP[AGR=?a] -> V[SUBCAT=trans, OBJCASE=?c, AGR=?a] NP[CASE=?c]
    VP[AGR=?a] -> V[SUBCAT=intrans, AGR=?a] ADV
    VP[AGR=?a] -> V[SUBCAT=trans, OBJCASE=?c, AGR=?a] ADV NP[CASE=?c]
    # Lexical Productions
    # Singular determiners
    # masc
    Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der'
    Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem'
    Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den'
    # fem
    Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
    Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der'
    Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
    # Plural determiners
    Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die'
    Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den'
    Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die'
    # Nouns
    N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund'
    N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
    N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden'
    N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
    N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze'
    N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen'
    # Pronouns
    PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich'
    PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich'
    PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir'
    PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du'
    PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es'
    PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir'
    PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns'
    PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns'
    PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr'
    PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie'
    # Verbs
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=1]] -> 'komme'
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=2]] -> 'kommst'
    V[SUBCAT=intrans, AGR=[NUM=sg,PER=3]] -> 'kommt'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=1]] -> 'kommen'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=2]] -> 'kommt'
    V[SUBCAT=intrans, AGR=[NUM=pl, PER=3]] -> 'kommen'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt'
    V[SUBCAT=trans, OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft'
    V[SUBCAT=trans, OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen'
    # Adverbs
    ADV -> 'Heute' | 'heute'
""")

In [12]:
sent = 'Heute sieht der Hund die Katze'.split()

In [13]:
parser = nltk.FeatureEarleyChartParser(grammar)

In [14]:
for tree in parser.parse(sent):
    print(tree)

(S[]
  (ADV[] Heute)
  (V[AGR=[NUM='sg', PER=3], OBJCASE='acc', SUBCAT='trans'] sieht)
  (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='nom']
    (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='nom'] der)
    (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund))
  (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='acc']
    (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='acc'] die)
    (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze)))


# Exercise 12

Seemingly synonymous verbs have slightly different syntactic properties (Levin, 1993). Consider the patterns of grammaticality for the verbs loaded, filled, and dumped below. Can you write grammar productions to handle such data?

(59)	
a. The farmer loaded the cart with sand	
b. The farmer loaded sand into the cart	
c. The farmer filled the cart with sand	
d. The farmer filled sand into the cart	
e. The farmer dumped the cart with sand	
f. The farmer dumped sand into the cart

In [15]:
grammar = nltk.grammar.FeatureGrammar.fromstring("""
    S -> NP VP PP
    
    VP -> V NP
    PP -> Prep[COUNT=?c] NP[COUNT=?c]
    NP[COUNT=?c] -> Det N[COUNT=?c] | N[COUNT=?c]
    
    V -> 'filled' | 'loaded' | 'dumped'
    N[+COUNT] -> 'farmer' | 'cart'
    N[-COUNT] -> 'sand'
    Prep[+COUNT] -> 'into'
    Prep[-COUNT] -> 'with'
    Det -> 'The' | 'the'
""")

In [16]:
sent1 = 'The farmer loaded the cart with sand'.split()
sent2 = 'The farmer loaded sand into the cart'.split()
sent3 = 'The farmer filled the cart with sand'.split()
sent4 = 'The farmer filled sand into the cart'.split()
sent5 = 'The farmer dumped the cart with sand'.split()
sent6 = 'The farmer dumped sand into the cart'.split()

In [17]:
parser = nltk.FeatureEarleyChartParser(grammar)

In [18]:
for sent in [sent1, sent2, sent3, sent4, sent5, sent6]:
    for tree in parser.parse(sent):
        print(tree)

(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] loaded) (NP[+COUNT] (Det[] the) (N[+COUNT] cart)))
  (PP[] (Prep[-COUNT] with) (NP[-COUNT] (N[-COUNT] sand))))
(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] loaded) (NP[-COUNT] (N[-COUNT] sand)))
  (PP[]
    (Prep[+COUNT] into)
    (NP[+COUNT] (Det[] the) (N[+COUNT] cart))))
(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] filled) (NP[+COUNT] (Det[] the) (N[+COUNT] cart)))
  (PP[] (Prep[-COUNT] with) (NP[-COUNT] (N[-COUNT] sand))))
(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] filled) (NP[-COUNT] (N[-COUNT] sand)))
  (PP[]
    (Prep[+COUNT] into)
    (NP[+COUNT] (Det[] the) (N[+COUNT] cart))))
(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] dumped) (NP[+COUNT] (Det[] the) (N[+COUNT] cart)))
  (PP[] (Prep[-COUNT] with) (NP[-COUNT] (N[-COUNT] sand))))
(S[]
  (NP[+COUNT] (Det[] The) (N[+COUNT] farmer))
  (VP[] (V[] dumped) (NP[-COUNT] (N[-COUNT] sand)))
  (PP[

# Exercise 13

Morphological paradigms are rarely completely regular, in the sense of every cell in the matrix having a different realization. For example, the present tense conjugation of the lexeme walk only has two distinct forms: walks for the 3rd person singular, and walk for all other combinations of person and number. A successful analysis should not require redundantly specifying that 5 out of the 6 possible morphological combinations have the same realization. Propose and implement a method for dealing with this.

# Exercise 14

So-called head features are shared between the parent node and head child. For example, TENSE is a head feature that is shared between a VP and its head V child. See (Gazdar, Klein, & and, 1985) for more details. Most of the features we have looked at are head features — exceptions are SUBCAT and SLASH. Since the sharing of head features is predictable, it should not need to be stated explicitly in the grammar productions. Develop an approach that automatically accounts for this regular behavior of head features.

# Exercise 15

Extend NLTK's treatment of feature structures to allow unification into list-valued features, and use this to implement an HPSG-style analysis of subcategorization, whereby the SUBCAT of a head category is the concatenation its complements' categories with the SUBCAT value of its immediate parent.

# Exercise 16

Extend NLTK's treatment of feature structures to allow productions with underspecified categories, such as S[-INV] --> ?x S/?x.

# Exercise 17

Extend NLTK's treatment of feature structures to allow typed feature structures.

# Exercise 18

Pick some grammatical constructions described in (Huddleston & Pullum, 2002), and develop a feature based grammar to account for them.