In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger
from ipywidgets import IntProgress

# load tagger model
tagger = SequenceTagger.load("flair/ner-german")

In [44]:
text= '''The Smurfs (French: Les Schtroumpfs; Dutch: De Smurfen) is a 
        Belgian comic franchise centered on a fictional colony of small, 
        blue, humanoid creatures who live in mushroom-shaped houses in 
        the forest. The Smurfs was first created and introduced as a 
        series of comic characters by the Belgian comics artist Peyo 
        (the pen name of Pierre Culliford) in 1958, wherein they were 
        known as Les Schtroumpfs. There are more than 100 Smurf 
        characters, and their names are based on adjectives that 
        emphasise their characteristics, such as "Jokey Smurf", who 
        likes to play practical jokes on his fellow smurfs. "Smurfette" 
        was the first female Smurf to be introduced in the series. 
        The Smurfs wear Phrygian caps, which came to represent freedom 
        during the modern era. The word “smurf” is the original Dutch 
        translation of the French "schtroumpf", which, according to Peyo, 
        is a word he invented during a meal with fellow cartoonist 
        André Franquin when he could not remember the word salt.'''

In [45]:
# make example sentence
sentence = Sentence(text)
# predict NER tags
tagger.predict(sentence)
entities = []
# iterate over entities and print
for entity in sentence.get_spans('ner'):
    entities.append(entity)

In [46]:
entities

[<ORG-span (1,2): "The Smurfs">,
 <ORG-span (4): "French">,
 <ORG-span (6,7): "Les Schtroumpfs">,
 <PER-span (9): "Dutch">,
 <ORG-span (11,12): "De Smurfen">,
 <ORG-span (55,56,57,58): "Belgian comics artist Peyo">,
 <PER-span (64,65): "Pierre Culliford">,
 <MISC-span (75,76): "Les Schtroumpfs">,
 <ORG-span (101,102): "Jokey Smurf">,
 <MISC-span (117): "Smurfette">,
 <MISC-span (131,132,133,134,135): "The Smurfs wear Phrygian caps">,
 <MISC-span (161): "schtroumpf">,
 <MISC-span (168): "Peyo">,
 <PER-span (181,182): "André Franquin">]

In [None]:
from flair.data import Corpus
from flair.datasets import CONLL_03_GERMAN
from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings

# 1. get the corpus
corpus: Corpus = CONLL_03_GERMAN()

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

# 4. initialize each embedding we use
embedding_types = [

    # GloVe embeddings
    WordEmbeddings('de'),

    # contextual string embeddings, forward
    FlairEmbeddings('de-forward'),

    # contextual string embeddings, backward
    FlairEmbeddings('de-backward'),
]

# embedding stack consists of Flair and GloVe embeddings
embeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=tag_dictionary,
                        tag_type=tag_type)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer = ModelTrainer(tagger, corpus)

# 7. run training
trainer.train('resources/taggers/ner-german',
              train_with_dev=True,
              max_epochs=150)

In [None]:
#@inproceedings{akbik2018coling,
#  title={Contextual String Embeddings for Sequence Labeling},
#  author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
#  booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
#  pages     = {1638--1649},
#  year      = {2018}
#}