# LSTM NER tagger: flair (bidirectional LSTM CRF)

In [None]:
%load_ext autoreload
%autoreload 2

## load data

In [None]:
from flair.data_fetcher import NLPTask, NLPTaskDataFetcher

corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03)
print(corpus)

In [None]:
print(corpus.train[5].to_tagged_string('ner'))

----

## load data

In [None]:
from flair.data import TaggedCorpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, CharacterEmbeddings, StackedEmbeddings
from typing import List

In [None]:
# EMBEDDING_DIM = 100
tag_type = 'ner'

In [None]:
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

In [None]:
embedding_types: List[TokenEmbeddings] = [
    WordEmbeddings('glove'),
    CharacterEmbeddings(),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

----

# Train

In [None]:
HIDDEN_DIM = 256
EPOCH_NUM = 10

In [None]:
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=HIDDEN_DIM,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

In [None]:
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

In [None]:
%%time
trainer.train('models/lstm_ner_flair',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=EPOCH_NUM)

---

# Plot

In [None]:
%matplotlib inline
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('models/lstm_ner_flair/loss.tsv')