In [2]:
import nltk
from sklearn.model_selection import train_test_split as split
from nltk.tag import DefaultTagger, UnigramTagger, BigramTagger, TrigramTagger

In [3]:
tagged = nltk.corpus.brown.tagged_sents()

train_50, test_50 = split(tagged, test_size=0.5)

In [7]:
# set a basic tagger that defaults to nouns (NN)
default = DefaultTagger("NN")
uni = UnigramTagger(train=train_50, backoff=default)
bi = BigramTagger(train=train_50, backoff=uni)
tri = TrigramTagger(train=train_50, backoff=bi)

taggers = {
    "Default (no backoff)":   default,
    "Unigram (backoff: def)": uni,
    "Bigram (backoff: uni)":  bi,
    "Trigram (backoff: bi)":  tri
}

In [8]:
for name, tagger in taggers.items():
    print("{} acc:\t\t{}".format(name, tagger.accuracy(test_50)))

Default (no backoff) acc:		0.13081291573180007
Unigram (backoff: def) acc:		0.8940641647595675
Bigram (backoff: uni) acc:		0.913110436991572
Trigram (backoff: bi) acc:		0.9132363314023775


In [15]:
for name, tagger in taggers.items():
    print("{} acc:\t\t{}".format(
        name,
        tagger.evaluate_per_tag(test_50, truncate=5, sort_by_count=True)))
    

Default (no backoff) acc:		Tag | Prec.  | Recall | F-measure
----+--------+--------+-----------
 NN | 0.1308 | 1.0000 | 0.2314
 IN | 0.0000 | 0.0000 | 0.0000
 AT | 0.0000 | 0.0000 | 0.0000
 JJ | 0.0000 | 0.0000 | 0.0000
  . | 0.0000 | 0.0000 | 0.0000

Unigram (backoff: def) acc:		Tag | Prec.  | Recall | F-measure
----+--------+--------+-----------
 NN | 0.7740 | 0.9473 | 0.8519
 IN | 0.9481 | 0.8844 | 0.9152
 AT | 0.9848 | 1.0000 | 0.9923
 JJ | 0.9075 | 0.8600 | 0.8831
  . | 0.9904 | 0.9992 | 0.9948

Bigram (backoff: uni) acc:		Tag | Prec.  | Recall | F-measure
----+--------+--------+-----------
 NN | 0.7880 | 0.9658 | 0.8679
 IN | 0.9288 | 0.9272 | 0.9280
 AT | 0.9920 | 0.9976 | 0.9948
 JJ | 0.9173 | 0.8639 | 0.8898
  . | 0.9934 | 0.9977 | 0.9955

Trigram (backoff: bi) acc:		Tag | Prec.  | Recall | F-measure
----+--------+--------+-----------
 NN | 0.7880 | 0.9659 | 0.8680
 IN | 0.9351 | 0.9244 | 0.9297
 AT | 0.9928 | 0.9971 | 0.9950
 JJ | 0.9216 | 0.8572 | 0.8882
  . | 0.9932 | 0.998

In [10]:
dir(uni)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_params',
 '_confusion_cached',
 '_context_to_tag',
 '_n',
 '_taggers',
 '_train',
 'accuracy',
 'backoff',
 'choose_tag',
 'confusion',
 'context',
 'decode_json_obj',
 'encode_json_obj',
 'evaluate',
 'evaluate_per_tag',
 'f_measure',
 'json_tag',
 'precision',
 'recall',
 'size',
 'tag',
 'tag_one',
 'tag_sents']