# LSTM NER tagger: flair (bidirectional LSTM CRF)

In [None]:
%load_ext autoreload
%autoreload 2

## load data

In [None]:
from flair.data_fetcher import NLPTask, NLPTaskDataFetcher

corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03)
print(corpus)

In [None]:
print(corpus.train[5].to_tagged_string('ner'))

----

## load data

In [None]:
from flair.data import TaggedCorpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, CharacterEmbeddings, StackedEmbeddings
from typing import List

In [None]:
# EMBEDDING_DIM = 100
tag_type = 'ner'

In [None]:
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

In [None]:
embedding_types: List[TokenEmbeddings] = [
    WordEmbeddings('glove'),
    CharacterEmbeddings(),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

----

# Train

In [None]:
HIDDEN_DIM = 256
EPOCH_NUM = 10

In [None]:
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=HIDDEN_DIM,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

In [None]:
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

In [11]:
%%time
trainer.train('models/lstm_ner_flair',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=EPOCH_NUM)

2019-04-10 03:30:49,898 MICRO_AVG: acc 0.6394 - f1-score 0.78
2019-04-10 03:30:49,899 MACRO_AVG: acc 0.6227 - f1-score 0.7619
2019-04-10 03:30:49,900 LOC        tp: 1378 - fp: 257 - fn: 290 - tn: 1378 - precision: 0.8428 - recall: 0.8261 - accuracy: 0.7158 - f1-score: 0.8344
2019-04-10 03:30:49,902 MISC       tp: 466 - fp: 232 - fn: 236 - tn: 466 - precision: 0.6676 - recall: 0.6638 - accuracy: 0.4989 - f1-score: 0.6657
2019-04-10 03:30:49,903 ORG        tp: 1066 - fp: 349 - fn: 595 - tn: 1066 - precision: 0.7534 - recall: 0.6418 - accuracy: 0.5303 - f1-score: 0.6931
2019-04-10 03:30:49,904 PER        tp: 1423 - fp: 291 - fn: 194 - tn: 1423 - precision: 0.8302 - recall: 0.8800 - accuracy: 0.7458 - f1-score: 0.8544
2019-04-10 03:30:49,905 ----------------------------------------------------------------------------------------------------
CPU times: user 8min 31s, sys: 46.5 s, total: 9min 17s
Wall time: 7min 24s


{'dev_loss_history': [1.9760111570358276],
 'dev_score_history': [0.8033],
 'test_score': 0.78,
 'train_loss_history': [5.421660023851726]}

---

# Plot

In [None]:
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/example-ner/loss.tsv')
plotter.plot_weights('resources/taggers/example-ner/weights.txt')