# IMPORTS

In [1]:
import nltk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

# Exercise 1

The IOB format categorizes tagged tokens as I, O and B. Why are three tags necessary? What problem would be caused if we used I and O tags exclusively?

We cannot determine where chunks actually start because there will be no borders between adjacent chunks

# Exercise 2

Write a tag pattern to match noun phrases containing plural head nouns, e.g. "many/JJ researchers/NNS", "two/CD weeks/NNS", "both/DT new/JJ positions/NNS". Try to do this by generalizing the tag pattern that handled singular noun phrases.

In [5]:
from nltk.corpus import brown

In [9]:
chunk_string = "NP: {<(JJ|CD|DT).*>+<NNS?>}"

In [12]:
def search_chunks(chunk_string, tagged_sents):
    chunk_label = chunk_string[:chunk_string.find(':')]
    cp = nltk.RegexpParser(chunk_string)
    for sent in tagged_sents:
        tree = cp.parse(sent)
        for subtree in tree.subtrees():
            if subtree.label() == chunk_label: print(subtree)

In [13]:
search_chunks(chunk_string, brown.tagged_sents()[:50])

(NP recent/JJ primary/NN)
(NP any/DTI irregularities/NNS)
(NP over-all/JJ charge/NN)
(NP hard-fought/JJ primary/NN)
(NP relative/JJ handful/NN)
(NP such/JJ reports/NNS)
(NP widespread/JJ interest/NN)
(NP this/DT city/NN)
(NP these/DTS laws/NNS)
(NP grand/JJ jury/NN)
(NP best/JJT interest/NN)
(NP these/DTS two/CD offices/NNS)
(NP greater/JJR efficiency/NN)
(NP clerical/JJ personnel/NNS)
(NP this/DT problem/NN)
(NP outgoing/JJ jury/NN)
(NP effective/JJ date/NN)
(NP orderly/JJ implementation/NN)
(NP grand/JJ jury/NN)
(NP federal/JJ funds/NNS)
(NP foster/JJ homes/NNS)
(NP major/JJ items/NNS)
(NP general/JJ assistance/NN)
(NP these/DTS funds/NNS)
(NP this/DT money/NN)
(NP proportionate/JJ distribution/NN)
(NP these/DTS funds/NNS)
(NP this/DT program/NN)
(NP populous/JJ counties/NNS)
(NP some/DTI portion/NN)
(NP these/DTS available/JJ funds/NNS)
(NP disproportionate/JJ burden/NN)
(NP two/CD previous/JJ grand/JJ juries/NNS)
(NP These/DTS actions/NNS)
(NP undue/JJ costs/NNS)
(NP unmeritorious/

# Exercise 3

Pick one of the three chunk types in the CoNLL corpus. Inspect the CoNLL corpus and try to observe any patterns in the POS tag sequences that make up this kind of chunk. Develop a simple chunker using the regular expression chunker nltk.RegexpParser. Discuss any tag sequences that are difficult to chunk reliably.

In [17]:
from nltk.corpus import conll2000

In [27]:
def search_chunk_type(chunked_sents, chunk_type):
    for sent in chunked_sents:
        for subtree in sent.subtrees():
            if subtree.label() == chunk_type: print(subtree)

In [31]:
search_chunk_type(conll2000.chunked_sents('train.txt')[:50], 'VP')

(VP is/VBZ widely/RB expected/VBN to/TO take/VB)
(VP fail/VB to/TO show/VB)
(VP has/VBZ helped/VBN to/TO prevent/VB)
(VP reckon/VBP)
(VP has/VBZ been/VBN eroded/VBN)
(VP to/TO announce/VB)
(VP has/VBZ increased/VBN)
(VP being/VBG forced/VBN to/TO increase/VB)
(VP to/TO defend/VB)
(VP say/VBP)
(VP are/VBP)
(VP said/VBD)
(VP is/VBZ)
(VP could/MD be/VB)
(VP noted/VBD)
(VP range/VBP)
(VP expect/VBP)
(VP to/TO show/VB)
(VP reported/VBD)
(VP registered/VBN)
(VP are/VBP topped/VBN)
(VP said/VBD)
(VP is/VBZ)
(VP is/VBZ transforming/VBG)
(VP to/TO boost/VB)
(VP remains/VBZ)
(VP reckons/VBZ)
(VP will/MD narrow/VB)
(VP said/VBD)
(VP believes/VBZ)
(VP could/MD lead/VB)
(VP could/MD narrow/VB)
(VP forecasts/VBZ)
(VP warns/VBZ)
(VP are/VBP)
(VP wo/MD n't/RB advance/VB)
(VP will/MD want/VB to/TO see/VB)
(VP adjusting/VBG)
(VP noted/VBD)
(VP will/MD want/VB to/TO go/VB)
(VP remains/VBZ)
(VP warned/VBD)
(VP can/MD be/VB expected/VBN)
(VP takes/VBZ)
(VP are/VBP)
(VP released/VBD)
(VP do/VBP n't/RB sugge

In [56]:
cp = nltk.RegexpParser('VP: {<MD>?<V.*>*<RB>?<TO>?<V.*>+}')

In [57]:
print(cp.evaluate(conll2000.chunked_sents('test.txt', chunk_types=['VP'])))

ChunkParse score:
    IOB Accuracy:  97.3%%
    Precision:     82.6%%
    Recall:        88.7%%
    F-Measure:     85.6%%


# Exercise 4

An early definition of chunk was the material that occurs between chinks. Develop a chunker that starts by putting the whole sentence in a single chunk, and then does the rest of its work solely by chinking. Determine which tags (or tag sequences) are most likely to make up chinks with the help of your own utility program. Compare the performance and simplicity of this approach relative to a chunker based entirely on chunk rules.

In [6]:
from nltk.corpus import conll2000

In [41]:
chunk_string = """
    VP:
      {<.*>+}
      }<(JJ|NN|IN|CD|DT|\W|CC|PRP|W).*>+{
"""

In [42]:
cp = nltk.RegexpParser(chunk_string)

In [43]:
print(cp.evaluate(conll2000.chunked_sents('test.txt', chunk_types=['VP'])))

ChunkParse score:
    IOB Accuracy:  93.2%%
    Precision:     59.2%%
    Recall:        79.5%%
    F-Measure:     67.9%%


In the case of VP chunking, building chunk parser using chinking is not a good idea according to performance and simplicity perspective

# Exercise 5

Write a tag pattern to cover noun phrases that contain gerunds, e.g. "the/DT receiving/VBG end/NN", "assistant/NN managing/VBG editor/NN". Add these patterns to the grammar, one per line. Test your work using some tagged sentences of your own devising.

In [69]:
from nltk.corpus import brown

In [70]:
def test_pattern(pattern, n):
    label = pattern.split()[0][:-1]
    cp = nltk.RegexpParser(pattern)
    for sent in brown.tagged_sents()[:n]:
        tree = cp.parse(sent)
        for subtree in tree.subtrees():
            if subtree.label() == label: 
                print(subtree)

In [71]:
np_vbg_pattern = """
    NP_VBG:
      {<DT><VBG><N.*>}
      {<N.*><VBG><N.*>}
"""

In [72]:
test_pattern(np_vbg_pattern, 1000)

(NP_VBG County/NN-TL purchasing/VBG departments/NNS)
(NP_VBG Dallas/NP authorizing/VBG establishment/NN)
(NP_VBG Galveston/NP authorizing/VBG establishment/NN)
(NP_VBG school/NN teaching/VBG certificate/NN)
(NP_VBG welfare/NN consulting/VBG firm/NN)
(NP_VBG cent/NN starting/VBG Jan./NP)
(NP_VBG days/NNS following/VBG discharge/NN)
(NP_VBG Community/NN visiting/VBG nurse/NN)
(NP_VBG law/NN providing/VBG grants/NNS)
(NP_VBG laws/NNS regulating/VBG Sunday/NR)
(NP_VBG ordinance/NN permitting/VBG motorists/NNS)
(NP_VBG state/NN financing/VBG aid/NN)
(NP_VBG 1920s/NNS following/VBG adoption/NN)
(NP_VBG Administration's/NN$-TL housing/VBG bill/NN)
(NP_VBG each/DT passing/VBG week/NN)
(NP_VBG points/NNS bordering/VBG Lafayette/NP-TL)
(NP_VBG another/DT vexing/VBG issue/NN)
(NP_VBG problem/NN confronting/VBG Davis/NP)
(NP_VBG vouchers/NNS certifying/VBG work/NN)
(NP_VBG bill/NN raising/VBG fees/NNS)
(NP_VBG dinner/NN honoring/VBG Sen./NN-TL)


# Exercise 6

Write one or more tag patterns to handle coordinated noun phrases, e.g. "July/NNP and/CC August/NNP", "all/DT your/PRP$ managers/NNS and/CC supervisors/NNS", "company/NN courts/NNS and/CC adjudicators/NNS".

In [73]:
np_cc_pattern = """
    NP_CC:
      {<N.*><CC><N.*>}
      {<DT><P.*><N.*><CC><N.*>}
      {<N.*><N.*><CC><N.*>}
"""

In [74]:
test_pattern(np_cc_pattern, 1000)

(NP_CC praise/NN and/CC thanks/NNS)
(NP_CC registration/NN and/CC election/NN)
(NP_CC Atlanta/NP and/CC Fulton/NP-TL)
(NP_CC guardians/NNS and/CC administrators/NNS)
(NP_CC fees/NNS and/CC compensation/NN)
(NP_CC intern/NN or/CC extern/NN)
(NP_CC night/NN and/CC weekend/NN)
(NP_CC administration/NN and/CC operation/NN)
(NP_CC Bellwood/NP and/CC Alpharetta/NP)
(NP_CC man/NN and/CC wife/NN)
(NP_CC principal/NN and/CC chairman/NN)
(NP_CC Davis/NP and/CC Bush/NP)
(NP_CC insurance/NN and/CC pipeline/NN)
(NP_CC Harlingen/NP and/CC Howard/NP)
(NP_CC Tarrant/NP and/CC El/NP)
(NP_CC Berry/NP and/CC Joe/NP)
(NP_CC gifts/NNS and/CC donations/NNS)
(NP_CC stocks/NNS and/CC bonds/NNS)
(NP_CC Legislature/NN-TL and/CC Congress/NP)
(NP_CC Dallas/NP and/CC Fort/NN-TL)
(NP_CC Dallas/NP and/CC Sen./NN-TL)
(NP_CC Newton/NP and/CC Joe/NP)
(NP_CC Dallas/NP and/CC Fort/NN-TL)
(NP_CC math/NN or/CC English/NP)
(NP_CC A/NN &/CC I/NN)
(NP_CC College/NN-TL and/CC Massachusetts/NP-TL)
(NP_CC teacher/NN and/CC princ

# Exercise 7

Carry out the following evaluation tasks for any of the chunkers you have developed earlier. (Note that most chunking corpora contain some internal inconsistencies, such that any reasonable rule-based approach will produce errors.)<div>
    a. Evaluate your chunker on 100 sentences from a chunked corpus, and report the precision, recall and F-measure.<div>

In [4]:
from nltk.corpus import conll2000

In [5]:
cp = nltk.RegexpParser('VP: {<MD>?<V.*>*<RB>?<TO>?<V.*>+}')

In [6]:
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['VP'])[:100]

In [8]:
chunkscore = cp.evaluate(test_sents)

In [10]:
print(chunkscore)

ChunkParse score:
    IOB Accuracy:  98.1%%
    Precision:     84.2%%
    Recall:        93.8%%
    F-Measure:     88.7%%


b. Use the chunkscore.missed() and chunkscore.incorrect() methods to identify the errors made by your chunker. Discuss.<div>

In [15]:
chunkscore.missed()

[ImmutableTree('VP', [('said', 'VBD')]),
 ImmutableTree('VP', [('would', 'MD'), ('have', 'VB'), ('realized', 'VBN')]),
 ImmutableTree('VP', [('resigned', 'VBD')]),
 ImmutableTree('VP', [('had', 'VBD')]),
 ImmutableTree('VP', [('had', 'VBD')]),
 ImmutableTree('VP', [('asked', 'VBD')]),
 ImmutableTree('VP', [('more', 'JJR'), ('than', 'IN'), ('offset', 'VB')]),
 ImmutableTree('VP', [('took', 'VBD')]),
 ImmutableTree('VP', [('to', 'TO'), ('make', 'VB')]),
 ImmutableTree('VP', [('estimated', 'VBD')]),
 ImmutableTree('VP', [('to', 'TO'), ('pursue', 'VB')]),
 ImmutableTree('VP', [('had', 'VBD')])]

In [16]:
chunkscore.incorrect()

[ImmutableTree('VP', [('operating', 'VBG')]),
 ImmutableTree('VP', [('offset', 'VB')]),
 ImmutableTree('VP', [('continuing', 'VBG')]),
 ImmutableTree('VP', [('Including', 'VBG')]),
 ImmutableTree('VP', [('combined', 'VBN')]),
 ImmutableTree('VP', [('combined', 'VBN')]),
 ImmutableTree('VP', [('would', 'MD'), ('have', 'VB'), ('realized', 'VBN'), ('had', 'VBN')]),
 ImmutableTree('VP', [('also', 'RB'), ('said', 'VBD')]),
 ImmutableTree('VP', [('increased', 'VBN')]),
 ImmutableTree('VP', [('resigned', 'VBD'), ('to', 'TO'), ('pursue', 'VB')]),
 ImmutableTree('VP', [('segment', 'VBP')]),
 ImmutableTree('VP', [('following', 'VBG')]),
 ImmutableTree('VP', [('combined', 'VBN')]),
 ImmutableTree('VP', [('leading', 'VBG')]),
 ImmutableTree('VP', [('including', 'VBG'), ('discontinued', 'VBN')]),
 ImmutableTree('VP', [('concern', 'VBP')]),
 ImmutableTree('VP', [('had', 'VBD'), ('operating', 'VBG')]),
 ImmutableTree('VP', [('operating', 'VBG')]),
 ImmutableTree('VP', [('also', 'RB'), ('had', 'VBD')]

Very often chunker marks single VBN or VBG as VP chunk. Seems like this is the first direction for improvement.

c. Compare the performance of your chunker to the baseline chunker discussed in the evaluation section of this chapter.<div>

In [17]:
cp_baseline = nltk.RegexpParser("")

In [19]:
print(cp_baseline.evaluate(test_sents))

ChunkParse score:
    IOB Accuracy:  88.1%%
    Precision:      0.0%%
    Recall:         0.0%%
    F-Measure:      0.0%%


# Exercise 8

Develop a chunker for one of the chunk types in the CoNLL corpus using a regular-expression based chunk grammar RegexpChunk. Use any combination of rules for chunking, chinking, merging or splitting.

In [176]:
from nltk.corpus import conll2000

In [177]:
grammar = """
    PP:
      {<IN>}
      {<TO>(?=<DT>?<J.*>*<N.*>)}
      {<TO>(?=<$>?<CD>)}      
"""

In [178]:
cp = nltk.RegexpParser(grammar)

In [179]:
chunkscore = cp.evaluate(conll2000.chunked_sents('train.txt', chunk_types=['PP']))

In [180]:
print(chunkscore)

ChunkParse score:
    IOB Accuracy:  97.4%%
    Precision:     82.3%%
    Recall:        93.7%%
    F-Measure:     87.6%%


# Exercise 9

Sometimes a word is incorrectly tagged, e.g. the head noun in "12/CD or/CC so/RB cases/VBZ". Instead of requiring manual correction of tagger output, good chunkers are able to work with the erroneous output of taggers. Look for other examples of correctly chunked noun phrases with incorrect tags.

In [227]:
from nltk.corpus import conll2000

In [228]:
incorrect_tags_sents = [
    subtree
    for sent in conll2000.chunked_sents(chunk_types=['NP'])
    for subtree in sent.subtrees()
    if subtree.label() == 'NP'
    and not any(t.startswith('N') | t.startswith('P') 
                for w, t in subtree.leaves())
]

In [229]:
incorrect_tags_sents

[Tree('NP', [('This', 'DT')]),
 Tree('NP', [('there', 'EX')]),
 Tree('NP', [('there', 'EX')]),
 Tree('NP', [('there', 'EX')]),
 Tree('NP', [('only', 'RB'), ('#', '#'), ('1.8', 'CD'), ('billion', 'CD')]),
 Tree('NP', [('as', 'RB'), ('little', 'JJ'), ('as', 'IN'), ('#', '#'), ('1.3', 'CD'), ('billion', 'CD')]),
 Tree('NP', [('who', 'WP')]),
 Tree('NP', [('much', 'JJ')]),
 Tree('NP', [('This', 'DT')]),
 Tree('NP', [('1988', 'CD')]),
 Tree('NP', [('there', 'EX')]),
 Tree('NP', [('little', 'JJ')]),
 Tree('NP', [('$', '$'), ('1.5890', 'CD')]),
 Tree('NP', [('$', '$'), ('1.5940', 'CD')]),
 Tree('NP', [('much', 'JJ')]),
 Tree('NP', [('which', 'WDT')]),
 Tree('NP', [('$', '$'), ('367.30', 'CD')]),
 Tree('NP', [('$', '$'), ('366.50', 'CD')]),
 Tree('NP', [('$', '$'), ('40', 'CD')]),
 Tree('NP', [('about', 'RB'), ('$', '$'), ('106.6', 'CD'), ('million', 'CD')]),
 Tree('NP', [('$', '$'), ('38', 'CD')]),
 Tree('NP', [('$', '$'), ('1.875', 'CD')]),
 Tree('NP', [('$', '$'), ('35.50', 'CD'), ('to', 'T

# Exercise 10

The bigram chunker scores about 90% accuracy. Study its errors and try to work out why it doesn't get 100% accuracy. Experiment with trigram chunking. Are you able to improve the performance any more?

In [273]:
from nltk.corpus import conll2000

In [274]:
class BigramChunker(nltk.ChunkParserI):
    def __init__(self, train_sents):
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        self.tagger = nltk.BigramTagger(train_data)

    def parse(self, sentence):
        pos_tags = [pos for (word,pos) in sentence]
        tagged_pos_tags = self.tagger.tag(pos_tags)
        chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
        conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
                     in zip(sentence, chunktags)]
        return nltk.chunk.conlltags2tree(conlltags)

In [275]:
train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])

In [276]:
bigram_chunker_v0 = BigramChunker(train_sents)
bigram_score_v0 = bigram_chunker_v0.evaluate(test_sents)

In [277]:
print(bigram_score_v0)

ChunkParse score:
    IOB Accuracy:  93.3%%
    Precision:     82.3%%
    Recall:        86.8%%
    F-Measure:     84.5%%


In [278]:
bigram_score.missed()

[ImmutableTree('NP', [('$', '$'), ('12', 'CD'), ('billion', 'CD')]),
 ImmutableTree('NP', [('more', 'JJR'), ('than', 'IN'), ('5,000', 'CD'), ('formal', 'JJ'), ('requests', 'NNS')]),
 ImmutableTree('NP', [('yesterday', 'NN')]),
 ImmutableTree('NP', [('Ordinary', 'JJ'), ('shares', 'NNS')]),
 ImmutableTree('NP', [('Aug.', 'NNP'), ('6', 'CD'), (',', ','), ('1987', 'CD')]),
 ImmutableTree('NP', [('beef', 'NN'), ('and', 'CC'), ('pork', 'NN')]),
 ImmutableTree('NP', [('a', 'DT'), ('far', 'RB'), ('rosier', 'JJR'), ('assessment', 'NN')]),
 ImmutableTree('NP', [('The', 'DT'), ('ratings', 'NNS'), ('concern', 'VBP')]),
 ImmutableTree('NP', [('the', 'DT'), ('`', '``'), ('paperboy', 'NN'), ("'", "''"), ('bike', 'NN')]),
 ImmutableTree('NP', [('bonds', 'NNS'), ('and', 'CC'), ('stocks', 'NNS')]),
 ImmutableTree('NP', [('The', 'DT'), ('lists', 'NNS')]),
 ImmutableTree('NP', [('--', ':'), ('George', 'NNP'), ('O.', 'NNP'), ('Ludcke', 'NNP')]),
 ImmutableTree('NP', [('morally', 'RB'), ('dubious', 'JJ'), (

At first, let's try to use backoff taggers, e.g. UnigramTagger and DefaultTagger.

In [281]:
class NewBigramChunker(nltk.ChunkParserI):
    def __init__(self, train_sents):
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        t0 = nltk.DefaultTagger('O')
        t1 = nltk.UnigramTagger(train_data, backoff=t0)
        self.tagger = nltk.BigramTagger(train_data, backoff=t1)

    def parse(self, sentence):
        pos_tags = [pos for (word,pos) in sentence]
        tagged_pos_tags = self.tagger.tag(pos_tags)
        chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
        conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
                     in zip(sentence, chunktags)]
        return nltk.chunk.conlltags2tree(conlltags)

In [282]:
bigram_chunker_v1 = NewBigramChunker(train_sents)
bigram_score_v1 = bigram_chunker_v1.evaluate(test_sents)

In [283]:
print(bigram_score_v1)

ChunkParse score:
    IOB Accuracy:  93.4%%
    Precision:     82.3%%
    Recall:        87.0%%
    F-Measure:     84.6%%


At second, let's try to extend n-gram range and use backoff taggers as well

In [284]:
class TrigramChunker(nltk.ChunkParserI):
    def __init__(self, train_sents):
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        t0 = nltk.DefaultTagger('O')
        t1 = nltk.UnigramTagger(train_data, backoff=t0)
        t2 = nltk.BigramTagger(train_data, backoff=t1)
        self.tagger = nltk.TrigramTagger(train_data, backoff=t2)

    def parse(self, sentence):
        pos_tags = [pos for (word,pos) in sentence]
        tagged_pos_tags = self.tagger.tag(pos_tags)
        chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
        conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
                     in zip(sentence, chunktags)]
        return nltk.chunk.conlltags2tree(conlltags)

In [285]:
trigram_chunker = TrigramChunker(train_sents)
trigram_score = trigram_chunker.evaluate(test_sents)

In [286]:
print(trigram_score)

ChunkParse score:
    IOB Accuracy:  93.5%%
    Precision:     82.5%%
    Recall:        87.1%%
    F-Measure:     84.7%%


Using TrigramChunker with backoff approach helped us to increase all performance metrics by ~0.2% on average.

# Exercise 11

Apply the n-gram and Brill tagging methods to IOB chunk tagging. Instead of assigning POS tags to words, here we will assign IOB tags to the POS tags. E.g. if the tag DT (determiner) often occurs at the start of a chunk, it will be tagged B (begin). Evaluate the performance of these chunking methods relative to the regular expression chunking methods covered in this chapter.

In [296]:
from nltk.corpus import conll2000

In [297]:
train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])

In [298]:
grammar = r"NP: {<[CDJNP].*>+}"
regex_chunker = nltk.RegexpParser(grammar)
regex_score = regex_chunker.evaluate(test_sents)

In [299]:
print(regex_score)

ChunkParse score:
    IOB Accuracy:  87.7%%
    Precision:     70.6%%
    Recall:        67.8%%
    F-Measure:     69.2%%


In [300]:
class TrigramChunker(nltk.ChunkParserI):
    def __init__(self, train_sents):
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        t0 = nltk.DefaultTagger('O')
        t1 = nltk.UnigramTagger(train_data, backoff=t0)
        t2 = nltk.BigramTagger(train_data, backoff=t1)
        self.tagger = nltk.TrigramTagger(train_data, backoff=t2)

    def parse(self, sentence):
        pos_tags = [pos for (word,pos) in sentence]
        tagged_pos_tags = self.tagger.tag(pos_tags)
        chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
        conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
                     in zip(sentence, chunktags)]
        return nltk.chunk.conlltags2tree(conlltags)

In [301]:
trigram_chunker = TrigramChunker(train_sents)
trigram_score = trigram_chunker.evaluate(test_sents)

In [302]:
print(trigram_score)

ChunkParse score:
    IOB Accuracy:  93.5%%
    Precision:     82.5%%
    Recall:        87.1%%
    F-Measure:     84.7%%


In [323]:
from nltk.tag import brill, brill_trainer

In [324]:
class BrillChunker(nltk.ChunkParserI):
    def __init__(self, train_sents):
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        t0 = nltk.DefaultTagger('O')
        t1 = nltk.UnigramTagger(train_data, backoff=t0)
        t2 = nltk.BigramTagger(train_data, backoff=t1)
        t3 = nltk.TrigramTagger(train_data, backoff=t2)
        
        trainer = brill_trainer.BrillTaggerTrainer(
            initial_tagger=t3, 
            templates=brill.fntbl37()
        )
        self.tagger = trainer.train(train_data)

    def parse(self, sentence):
        pos_tags = [pos for (word,pos) in sentence]
        tagged_pos_tags = self.tagger.tag(pos_tags)
        chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
        conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
                     in zip(sentence, chunktags)]
        return nltk.chunk.conlltags2tree(conlltags)

In [325]:
brill_chunker = BrillChunker(train_sents)
brill_score = brill_chunker.evaluate(test_sents)

In [326]:
print(brill_score)

ChunkParse score:
    IOB Accuracy:  96.1%%
    Precision:     90.6%%
    Recall:        90.8%%
    F-Measure:     90.7%%


nltkdemo18:
ChunkParse score:
    IOB Accuracy:  94.5%%
    Precision:     85.8%%
    Recall:        87.9%%
    F-Measure:     86.9%%
        
nltkdemo18plus:
ChunkParse score:
    IOB Accuracy:  95.9%%
    Precision:     90.2%%
    Recall:        90.6%%
    F-Measure:     90.4%%
        
fntbl37:
ChunkParse score:
    IOB Accuracy:  96.1%%
    Precision:     90.7%%
    Recall:        91.0%%
    F-Measure:     90.8%%
        
brill24:
ChunkParse score:
    IOB Accuracy:  95.4%%
    Precision:     88.8%%
    Recall:        90.0%%
    F-Measure:     89.4%%

# Exercise 12 (Review)

We saw in 5. that it is possible to establish an upper limit to tagging performance by looking for ambiguous n-grams, n-grams that are tagged in more than one possible way in the training data. Apply the same method to determine an upper bound on the performance of an n-gram chunker.

Assuming we always pick the most likely tag in such ambiguous contexts, we can derive a lower bound on the performance of a trigram tagger.

In [390]:
from nltk.corpus import conll2000

In [391]:
def get_lower_bound(n, chunk_types=None):
    cfd = nltk.ConditionalFreqDist(
        ((*[t[2] for t in ngram[:-1]], ngram[-1][1]), ngram[-1][2])
        for sent in conll2000.chunked_sents(chunk_types=chunk_types)
        for ngram in nltk.ngrams(nltk.chunk.tree2conlltags(sent), n)
    )
    ambiguous_contexts = [c for c in cfd.conditions() if len(cfd[c]) > 1]
    return 1 - sum(cfd[c].N() for c in ambiguous_contexts) / cfd.N()

In [392]:
get_lower_bound(3)

0.08669974412046155

In [393]:
get_lower_bound(3, ['NP']), get_lower_bound(3, ['VP']), get_lower_bound(3, ['PP'])

(0.0671399243743176, 0.1500077986350281, 0.3488814228082674)

In [394]:
get_lower_bound(4)

0.1555582062122558

In [395]:
get_lower_bound(4, ['NP']), get_lower_bound(4, ['VP']), get_lower_bound(4, ['PP'])

(0.09260952734373273, 0.20812956409950567, 0.3757217517306579)