# Tutorial 1: NLP Base Types

In [2]:
# import des Flair Packages
from flair.data import Sentence 
from flair.models import SequenceTagger

## Creating a Sentence

In [3]:
# The sentence objects holds a sentence that we may want to embed or tag
from flair.data import Sentence

# Make a sentence object by passing a whitespace tokenized string
sentence = Sentence('The grass is green .')

# Print the object to see what's in there
print(sentence)

Sentence: "The grass is green ." - 5 Tokens


In [6]:
# using the token id
print(sentence.get_token(3))
# using the index itself 
print(sentence[3])

Token: 4 green
Token: 5 .


In [7]:
for token in sentence:
    print(token)

Token: 1 The
Token: 2 grass
Token: 3 is
Token: 4 green
Token: 5 .


## Tokenization

In [9]:
# Make a sentence object by passing an untokenized string and the 'use_tokenizer' flag
sentence = Sentence('The grass is green.', use_tokenizer=True)

# Print the object to see what's in there
print(sentence)

Sentence: "The grass is green ." - 5 Tokens


## Adding Tags to Tokens

In [10]:
# add a tag to a word in the sentence
sentence[3].add_tag('ner', 'color')

# print the sentence with all tags of this type
print(sentence.to_tagged_string())

The grass is green <color> .


In [11]:
from flair.data import Label

tag: Label = sentence[3].get_tag('ner')

print(f'"{sentence[3]}" is tagged as "{tag.value}" with confidence score "{tag.score}"')

"Token: 4 green" is tagged as "color" with confidence score "1.0"


## Adding Labels to Sentences

In [14]:
sentence = Sentence('France is the current world cup winner.')

# add a label to a sentence
sentence.add_label('sports')

# a sentence can also belong to multiple classes
sentence.add_labels(['sports', 'world cup'])

#you can also set the labels while initalizing the sentence
sentence = Sentence('France is the current world cup winner.', labels=['sports', 'world cup'])

In [15]:
sentence = Sentence('France is the current world cup winner.', labels=['sports', 'world cup'])

print(sentence)
for label in sentence.labels:
    print(label)

Sentence: "France is the current world cup winner." - 7 Tokens
sports (1.0)
world cup (1.0)


# Tutorial 2: Tagging your text

In [16]:
from flair.models import SequenceTagger

tagger = SequenceTagger.load('ner')

In [26]:
sentence = Sentence('Wenn Christian Wulff im Wahlkampf auf seine Leistungen verweist, dann fängt er hin und wieder mit dem Satz an: "Ich sage es in aller Bescheidenheit" .')

#predict NER tags
tagger.predict(sentence)

#print sentence with predicted tags
print(sentence.to_tagged_string())

Wenn <B-PER> Christian <I-PER> Wulff <E-PER> im Wahlkampf <S-PER> auf seine Leistungen verweist, dann fängt er hin und wieder mit dem Satz an: "Ich sage es in aller Bescheidenheit" .


In [30]:
from flair.data import Sentence
from flair.models import SequenceTagger

# make a sentence
sentence = Sentence('Fritz Berlin verbringt seine Sonntage gerne in einem Cafe in Berlin mit der Berliner Tageszeitung .')

# load the NER tagger
tagger = SequenceTagger.load('de-ner')

# run NER over sentence
tagger.predict(sentence)

# print sentence
print(sentence)
print('The following NER tags are found:')

# iterate over entities and print
for entity in sentence.get_spans('de-ner'):
    print(entity)

Sentence: "Fritz Berlin verbringt seine Sonntage gerne in einem Cafe in Berlin mit der Berliner Tageszeitung ." - 16 Tokens
The following NER tags are found:
